Merge remote-tracking branch 'matt/master'
This commit is contained in:
2
Makefile
2
Makefile
@@ -72,7 +72,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
|
||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||
opt.h stmt.h sym.h type.h util.h
|
||||
TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
|
||||
generic-16
|
||||
generic-16 generic-1
|
||||
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
|
||||
builtins/dispatch.ll
|
||||
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
|
||||
|
||||
123
ast.cpp
123
ast.cpp
@@ -98,6 +98,7 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
|
||||
StmtList *sl;
|
||||
PrintStmt *ps;
|
||||
AssertStmt *as;
|
||||
DeleteStmt *dels;
|
||||
|
||||
if ((es = dynamic_cast<ExprStmt *>(node)) != NULL)
|
||||
es->expr = (Expr *)WalkAST(es->expr, preFunc, postFunc, data);
|
||||
@@ -160,6 +161,8 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
|
||||
ps->values = (Expr *)WalkAST(ps->values, preFunc, postFunc, data);
|
||||
else if ((as = dynamic_cast<AssertStmt *>(node)) != NULL)
|
||||
as->expr = (Expr *)WalkAST(as->expr, preFunc, postFunc, data);
|
||||
else if ((dels = dynamic_cast<DeleteStmt *>(node)) != NULL)
|
||||
dels->expr = (Expr *)WalkAST(dels->expr, preFunc, postFunc, data);
|
||||
else
|
||||
FATAL("Unhandled statement type in WalkAST()");
|
||||
}
|
||||
@@ -180,6 +183,7 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
|
||||
DereferenceExpr *dre;
|
||||
SizeOfExpr *soe;
|
||||
AddressOfExpr *aoe;
|
||||
NewExpr *newe;
|
||||
|
||||
if ((ue = dynamic_cast<UnaryExpr *>(node)) != NULL)
|
||||
ue->expr = (Expr *)WalkAST(ue->expr, preFunc, postFunc, data);
|
||||
@@ -223,6 +227,12 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
|
||||
soe->expr = (Expr *)WalkAST(soe->expr, preFunc, postFunc, data);
|
||||
else if ((aoe = dynamic_cast<AddressOfExpr *>(node)) != NULL)
|
||||
aoe->expr = (Expr *)WalkAST(aoe->expr, preFunc, postFunc, data);
|
||||
else if ((newe = dynamic_cast<NewExpr *>(node)) != NULL) {
|
||||
newe->countExpr = (Expr *)WalkAST(newe->countExpr, preFunc,
|
||||
postFunc, data);
|
||||
newe->initExpr = (Expr *)WalkAST(newe->initExpr, preFunc,
|
||||
postFunc, data);
|
||||
}
|
||||
else if (dynamic_cast<SymbolExpr *>(node) != NULL ||
|
||||
dynamic_cast<ConstExpr *>(node) != NULL ||
|
||||
dynamic_cast<FunctionSymbolExpr *>(node) != NULL ||
|
||||
@@ -305,3 +315,116 @@ EstimateCost(ASTNode *root) {
|
||||
return cost;
|
||||
}
|
||||
|
||||
|
||||
/** Given an AST node, check to see if it's safe if we happen to run the
|
||||
code for that node with the execution mask all off.
|
||||
*/
|
||||
static bool
|
||||
lCheckAllOffSafety(ASTNode *node, void *data) {
|
||||
bool *okPtr = (bool *)data;
|
||||
|
||||
if (dynamic_cast<FunctionCallExpr *>(node) != NULL) {
|
||||
// FIXME: If we could somehow determine that the function being
|
||||
// called was safe (and all of the args Exprs were safe, then it'd
|
||||
// be nice to be able to return true here. (Consider a call to
|
||||
// e.g. floatbits() in the stdlib.) Unfortunately for now we just
|
||||
// have to be conservative.
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dynamic_cast<AssertStmt *>(node) != NULL) {
|
||||
// While it's fine to run the assert for varying tests, it's not
|
||||
// desirable to check an assert on a uniform variable if all of the
|
||||
// lanes are off.
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dynamic_cast<NewExpr *>(node) != NULL ||
|
||||
dynamic_cast<DeleteStmt *>(node) != NULL) {
|
||||
// We definitely don't want to run the uniform variants of these if
|
||||
// the mask is all off. It's also worth skipping the overhead of
|
||||
// executing the varying versions of them in the all-off mask case.
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (g->target.allOffMaskIsSafe == true)
|
||||
// Don't worry about memory accesses if we have a target that can
|
||||
// safely run them with the mask all off
|
||||
return true;
|
||||
|
||||
IndexExpr *ie;
|
||||
if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL && ie->baseExpr != NULL) {
|
||||
const Type *type = ie->baseExpr->GetType();
|
||||
if (type == NULL)
|
||||
return true;
|
||||
if (dynamic_cast<const ReferenceType *>(type) != NULL)
|
||||
type = type->GetReferenceTarget();
|
||||
|
||||
ConstExpr *ce = dynamic_cast<ConstExpr *>(ie->index);
|
||||
if (ce == NULL) {
|
||||
// indexing with a variable... -> not safe
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
const PointerType *pointerType =
|
||||
dynamic_cast<const PointerType *>(type);
|
||||
if (pointerType != NULL) {
|
||||
// pointer[index] -> can't be sure -> not safe
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
const SequentialType *seqType =
|
||||
dynamic_cast<const SequentialType *>(type);
|
||||
Assert(seqType != NULL);
|
||||
int nElements = seqType->GetElementCount();
|
||||
if (nElements == 0) {
|
||||
// Unsized array, so we can't be sure -> not safe
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
int32_t indices[ISPC_MAX_NVEC];
|
||||
int count = ce->AsInt32(indices);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (indices[i] < 0 || indices[i] >= nElements) {
|
||||
// Index is out of bounds -> not safe
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// All indices are in-bounds
|
||||
return true;
|
||||
}
|
||||
|
||||
MemberExpr *me;
|
||||
if ((me = dynamic_cast<MemberExpr *>(node)) != NULL &&
|
||||
me->dereferenceExpr) {
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
DereferenceExpr *de;
|
||||
if ((de = dynamic_cast<DereferenceExpr *>(node)) != NULL) {
|
||||
const Type *exprType = de->expr->GetType();
|
||||
if (dynamic_cast<const PointerType *>(exprType) != NULL) {
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
SafeToRunWithMaskAllOff(ASTNode *root) {
|
||||
bool safe = true;
|
||||
WalkAST(root, lCheckAllOffSafety, NULL, &safe);
|
||||
return safe;
|
||||
}
|
||||
|
||||
4
ast.h
4
ast.h
@@ -144,4 +144,8 @@ extern Stmt *TypeCheck(Stmt *);
|
||||
the given root. */
|
||||
extern int EstimateCost(ASTNode *root);
|
||||
|
||||
/** Returns true if it would be safe to run the given code with an "all
|
||||
off" mask. */
|
||||
extern bool SafeToRunWithMaskAllOff(ASTNode *root);
|
||||
|
||||
#endif // ISPC_AST_H
|
||||
|
||||
14
builtins.cpp
14
builtins.cpp
@@ -391,6 +391,8 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__count_trailing_zeros_i64",
|
||||
"__count_leading_zeros_i32",
|
||||
"__count_leading_zeros_i64",
|
||||
"__delete_uniform",
|
||||
"__delete_varying",
|
||||
"__do_assert_uniform",
|
||||
"__do_assert_varying",
|
||||
"__do_print",
|
||||
@@ -449,6 +451,9 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__min_varying_uint32",
|
||||
"__min_varying_uint64",
|
||||
"__movmsk",
|
||||
"__new_uniform",
|
||||
"__new_varying32",
|
||||
"__new_varying64",
|
||||
"__num_cores",
|
||||
"__packed_load_active",
|
||||
"__packed_store_active",
|
||||
@@ -794,6 +799,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
builtins_bitcode_generic_16_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 1:
|
||||
extern unsigned char builtins_bitcode_generic_1[];
|
||||
extern int builtins_bitcode_generic_1_length;
|
||||
AddBitcodeToModule(builtins_bitcode_generic_1,
|
||||
builtins_bitcode_generic_1_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
default:
|
||||
FATAL("logic error in DefineStdlib");
|
||||
}
|
||||
@@ -829,7 +841,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
// If the user wants the standard library to be included, parse the
|
||||
// serialized version of the stdlib.ispc file to get its
|
||||
// definitions added.
|
||||
if (g->target.isa == Target::GENERIC) {
|
||||
if (g->target.isa == Target::GENERIC&&g->target.vectorWidth!=1) { // 1 wide uses x86 stdlib
|
||||
extern char stdlib_generic_code[];
|
||||
yy_scan_string(stdlib_generic_code);
|
||||
yyparse();
|
||||
|
||||
935
builtins/target-generic-1.ll
Executable file
935
builtins/target-generic-1.ll
Executable file
@@ -0,0 +1,935 @@
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Define the standard library builtins for the NOVEC target
|
||||
define(`MASK',`i32')
|
||||
define(`WIDTH',`1')
|
||||
include(`util.m4')
|
||||
; Define some basics for a 1-wide target
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
scans()
|
||||
int64minmax()
|
||||
aossoa()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
gen_masked_store(1, i8, 8)
|
||||
gen_masked_store(1, i16, 16)
|
||||
gen_masked_store(1, i32, 32)
|
||||
gen_masked_store(1, i64, 64)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unaligned loads/loads+broadcasts
|
||||
|
||||
load_and_broadcast(1, i8, 8)
|
||||
load_and_broadcast(1, i16, 16)
|
||||
load_and_broadcast(1, i32, 32)
|
||||
load_and_broadcast(1, i64, 64)
|
||||
|
||||
masked_load(1, i8, 8, 1)
|
||||
masked_load(1, i16, 16, 2)
|
||||
masked_load(1, i32, 32, 4)
|
||||
masked_load(1, i64, 64, 8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
|
||||
; define these with the macros from stdlib.m4
|
||||
|
||||
gen_gather(1, i8)
|
||||
gen_gather(1, i16)
|
||||
gen_gather(1, i32)
|
||||
gen_gather(1, i64)
|
||||
|
||||
gen_scatter(1, i8)
|
||||
gen_scatter(1, i16)
|
||||
gen_scatter(1, i32)
|
||||
gen_scatter(1, i64)
|
||||
|
||||
|
||||
define <1 x i8> @__vselect_i8(<1 x i8>, <1 x i8> ,
|
||||
<1 x i32> %mask) nounwind readnone alwaysinline {
|
||||
; %mv = trunc <1 x i32> %mask to <1 x i8>
|
||||
; %notmask = xor <1 x i8> %mv, <i8 -1>
|
||||
; %cleared_old = and <1 x i8> %0, %notmask
|
||||
; %masked_new = and <1 x i8> %1, %mv
|
||||
; %new = or <1 x i8> %cleared_old, %masked_new
|
||||
; ret <1 x i8> %new
|
||||
|
||||
; not doing this the easy way because of problems with LLVM's scalarizer
|
||||
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
|
||||
; %sel = select <1 x i1> %cmp, <1 x i8> %0, <1 x i8> %1
|
||||
%m = extractelement <1 x i32> %mask, i32 0
|
||||
%cmp = icmp eq i32 %m, 0
|
||||
%d0 = extractelement <1 x i8> %0, i32 0
|
||||
%d1 = extractelement <1 x i8> %1, i32 0
|
||||
%sel = select i1 %cmp, i8 %d0, i8 %d1
|
||||
%r = insertelement <1 x i8> undef, i8 %sel, i32 0
|
||||
ret <1 x i8> %r
|
||||
}
|
||||
|
||||
define <1 x i16> @__vselect_i16(<1 x i16>, <1 x i16> ,
|
||||
<1 x i32> %mask) nounwind readnone alwaysinline {
|
||||
; %mv = trunc <1 x i32> %mask to <1 x i16>
|
||||
; %notmask = xor <1 x i16> %mv, <i16 -1>
|
||||
; %cleared_old = and <1 x i16> %0, %notmask
|
||||
; %masked_new = and <1 x i16> %1, %mv
|
||||
; %new = or <1 x i16> %cleared_old, %masked_new
|
||||
; ret <1 x i16> %new
|
||||
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
|
||||
; %sel = select <1 x i1> %cmp, <1 x i16> %0, <1 x i16> %1
|
||||
%m = extractelement <1 x i32> %mask, i32 0
|
||||
%cmp = icmp eq i32 %m, 0
|
||||
%d0 = extractelement <1 x i16> %0, i32 0
|
||||
%d1 = extractelement <1 x i16> %1, i32 0
|
||||
%sel = select i1 %cmp, i16 %d0, i16 %d1
|
||||
%r = insertelement <1 x i16> undef, i16 %sel, i32 0
|
||||
ret <1 x i16> %r
|
||||
|
||||
; ret <1 x i16> %sel
|
||||
}
|
||||
|
||||
|
||||
define <1 x i32> @__vselect_i32(<1 x i32>, <1 x i32> ,
|
||||
<1 x i32> %mask) nounwind readnone alwaysinline {
|
||||
; %notmask = xor <1 x i32> %mask, <i32 -1>
|
||||
; %cleared_old = and <1 x i32> %0, %notmask
|
||||
; %masked_new = and <1 x i32> %1, %mask
|
||||
; %new = or <1 x i32> %cleared_old, %masked_new
|
||||
; ret <1 x i32> %new
|
||||
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
|
||||
; %sel = select <1 x i1> %cmp, <1 x i32> %0, <1 x i32> %1
|
||||
; ret <1 x i32> %sel
|
||||
%m = extractelement <1 x i32> %mask, i32 0
|
||||
%cmp = icmp eq i32 %m, 0
|
||||
%d0 = extractelement <1 x i32> %0, i32 0
|
||||
%d1 = extractelement <1 x i32> %1, i32 0
|
||||
%sel = select i1 %cmp, i32 %d0, i32 %d1
|
||||
%r = insertelement <1 x i32> undef, i32 %sel, i32 0
|
||||
ret <1 x i32> %r
|
||||
|
||||
}
|
||||
define <1 x i64> @__vselect_i64(<1 x i64>, <1 x i64> ,
|
||||
<1 x i32> %mask) nounwind readnone alwaysinline {
|
||||
; %newmask = zext <1 x i32> %mask to <1 x i64>
|
||||
; %notmask = xor <1 x i64> %newmask, <i64 -1>
|
||||
; %cleared_old = and <1 x i64> %0, %notmask
|
||||
; %masked_new = and <1 x i64> %1, %newmask
|
||||
; %new = or <1 x i64> %cleared_old, %masked_new
|
||||
; ret <1 x i64> %new
|
||||
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
|
||||
; %sel = select <1 x i1> %cmp, <1 x i64> %0, <1 x i64> %1
|
||||
; ret <1 x i64> %sel
|
||||
%m = extractelement <1 x i32> %mask, i32 0
|
||||
%cmp = icmp eq i32 %m, 0
|
||||
%d0 = extractelement <1 x i64> %0, i32 0
|
||||
%d1 = extractelement <1 x i64> %1, i32 0
|
||||
%sel = select i1 %cmp, i64 %d0, i64 %d1
|
||||
%r = insertelement <1 x i64> undef, i64 %sel, i32 0
|
||||
ret <1 x i64> %r
|
||||
|
||||
}
|
||||
|
||||
define <1 x float> @__vselect_float(<1 x float>, <1 x float>,
|
||||
<1 x i32> %mask) nounwind readnone alwaysinline {
|
||||
; %v0 = bitcast <1 x float> %0 to <1 x i32>
|
||||
; %v1 = bitcast <1 x float> %1 to <1 x i32>
|
||||
; %r = call <1 x i32> @__vselect_i32(<1 x i32> %v0, <1 x i32> %v1, <1 x i32> %mask)
|
||||
; %rf = bitcast <1 x i32> %r to <1 x float>
|
||||
; ret <1 x float> %rf
|
||||
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
|
||||
; %sel = select <1 x i1> %cmp, <1 x float> %0, <1 x float> %1
|
||||
; ret <1 x float> %sel
|
||||
%m = extractelement <1 x i32> %mask, i32 0
|
||||
%cmp = icmp eq i32 %m, 0
|
||||
%d0 = extractelement <1 x float> %0, i32 0
|
||||
%d1 = extractelement <1 x float> %1, i32 0
|
||||
%sel = select i1 %cmp, float %d0, float %d1
|
||||
%r = insertelement <1 x float> undef, float %sel, i32 0
|
||||
ret <1 x float> %r
|
||||
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
define void @__masked_store_blend_8(<1 x i8>* nocapture, <1 x i8>,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%val = load <1 x i8> * %0, align 4
|
||||
%newval = call <1 x i8> @__vselect_i8(<1 x i8> %val, <1 x i8> %1, <1 x i32> %mask)
|
||||
store <1 x i8> %newval, <1 x i8> * %0, align 4
|
||||
ret void
|
||||
}
|
||||
define void @__masked_store_blend_16(<1 x i16>* nocapture, <1 x i16>,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%val = load <1 x i16> * %0, align 4
|
||||
%newval = call <1 x i16> @__vselect_i16(<1 x i16> %val, <1 x i16> %1, <1 x i32> %mask)
|
||||
store <1 x i16> %newval, <1 x i16> * %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @__masked_store_blend_32(<1 x i32>* nocapture, <1 x i32>,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%val = load <1 x i32> * %0, align 4
|
||||
%newval = call <1 x i32> @__vselect_i32(<1 x i32> %val, <1 x i32> %1, <1 x i32> %mask)
|
||||
store <1 x i32> %newval, <1 x i32> * %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_blend_64(<1 x i64>* nocapture, <1 x i64>,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%val = load <1 x i64> * %0, align 4
|
||||
%newval = call <1 x i64> @__vselect_i64(<1 x i64> %val, <1 x i64> %1, <1 x i32> %mask)
|
||||
store <1 x i64> %newval, <1 x i64> * %0, align 4
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @__movmsk(<1 x i32>) nounwind readnone alwaysinline {
|
||||
%item = extractelement <1 x i32> %0, i32 0
|
||||
%v = lshr i32 %item, 31
|
||||
ret i32 %v
|
||||
}
|
||||
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding
|
||||
;;
|
||||
;; There are not any rounding instructions in SSE2, so we have to emulate
|
||||
;; the functionality with multiple instructions...
|
||||
|
||||
; The code for __round_* is the result of compiling the following source
|
||||
; code.
|
||||
;
|
||||
; export float Round(float x) {
|
||||
; unsigned int sign = signbits(x);
|
||||
; unsigned int ix = intbits(x);
|
||||
; ix ^= sign;
|
||||
; x = floatbits(ix);
|
||||
; x += 0x1.0p23f;
|
||||
; x -= 0x1.0p23f;
|
||||
; ix = intbits(x);
|
||||
; ix ^= sign;
|
||||
; x = floatbits(ix);
|
||||
; return x;
|
||||
;}
|
||||
|
||||
define <1 x float> @__round_varying_float(<1 x float>) nounwind readonly alwaysinline {
|
||||
%float_to_int_bitcast.i.i.i.i = bitcast <1 x float> %0 to <1 x i32>
|
||||
%bitop.i.i = and <1 x i32> %float_to_int_bitcast.i.i.i.i, <i32 -2147483648>
|
||||
%bitop.i = xor <1 x i32> %float_to_int_bitcast.i.i.i.i, %bitop.i.i
|
||||
%int_to_float_bitcast.i.i40.i = bitcast <1 x i32> %bitop.i to <1 x float>
|
||||
%binop.i = fadd <1 x float> %int_to_float_bitcast.i.i40.i, <float 8.388608e+06>
|
||||
%binop21.i = fadd <1 x float> %binop.i, <float -8.388608e+06>
|
||||
%float_to_int_bitcast.i.i.i = bitcast <1 x float> %binop21.i to <1 x i32>
|
||||
%bitop31.i = xor <1 x i32> %float_to_int_bitcast.i.i.i, %bitop.i.i
|
||||
%int_to_float_bitcast.i.i.i = bitcast <1 x i32> %bitop31.i to <1 x float>
|
||||
ret <1 x float> %int_to_float_bitcast.i.i.i
|
||||
}
|
||||
|
||||
;; Similarly, for implementations of the __floor* functions below, we have the
|
||||
;; bitcode from compiling the following source code...
|
||||
|
||||
;export float Floor(float x) {
|
||||
; float y = Round(x);
|
||||
; unsigned int cmp = y > x ? 0xffffffff : 0;
|
||||
; float delta = -1.f;
|
||||
; unsigned int idelta = intbits(delta);
|
||||
; idelta &= cmp;
|
||||
; delta = floatbits(idelta);
|
||||
; return y + delta;
|
||||
;}
|
||||
|
||||
define <1 x float> @__floor_varying_float(<1 x float>) nounwind readonly alwaysinline {
|
||||
%calltmp.i = tail call <1 x float> @__round_varying_float(<1 x float> %0) nounwind
|
||||
%bincmp.i = fcmp ogt <1 x float> %calltmp.i, %0
|
||||
%val_to_boolvec32.i = sext <1 x i1> %bincmp.i to <1 x i32>
|
||||
%bitop.i = and <1 x i32> %val_to_boolvec32.i, <i32 -1082130432>
|
||||
%int_to_float_bitcast.i.i.i = bitcast <1 x i32> %bitop.i to <1 x float>
|
||||
%binop.i = fadd <1 x float> %calltmp.i, %int_to_float_bitcast.i.i.i
|
||||
ret <1 x float> %binop.i
|
||||
}
|
||||
|
||||
;; And here is the code we compiled to get the __ceil* functions below
|
||||
;
|
||||
;export uniform float Ceil(uniform float x) {
|
||||
; uniform float y = Round(x);
|
||||
; uniform int yltx = y < x ? 0xffffffff : 0;
|
||||
; uniform float delta = 1.f;
|
||||
; uniform int idelta = intbits(delta);
|
||||
; idelta &= yltx;
|
||||
; delta = floatbits(idelta);
|
||||
; return y + delta;
|
||||
;}
|
||||
|
||||
define <1 x float> @__ceil_varying_float(<1 x float>) nounwind readonly alwaysinline {
|
||||
%calltmp.i = tail call <1 x float> @__round_varying_float(<1 x float> %0) nounwind
|
||||
%bincmp.i = fcmp olt <1 x float> %calltmp.i, %0
|
||||
%val_to_boolvec32.i = sext <1 x i1> %bincmp.i to <1 x i32>
|
||||
%bitop.i = and <1 x i32> %val_to_boolvec32.i, <i32 1065353216>
|
||||
%int_to_float_bitcast.i.i.i = bitcast <1 x i32> %bitop.i to <1 x float>
|
||||
%binop.i = fadd <1 x float> %calltmp.i, %int_to_float_bitcast.i.i.i
|
||||
ret <1 x float> %binop.i
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding doubles
|
||||
|
||||
; expecting math lib to provide this
|
||||
declare double @ceil (double) nounwind readnone
|
||||
declare double @floor (double) nounwind readnone
|
||||
declare double @round (double) nounwind readnone
|
||||
;declare float @llvm.sqrt.f32(float %Val)
|
||||
declare double @llvm.sqrt.f64(double %Val)
|
||||
declare float @llvm.sin.f32(float %Val)
|
||||
declare float @llvm.cos.f32(float %Val)
|
||||
declare float @llvm.sqrt.f32(float %Val)
|
||||
declare float @llvm.exp.f32(float %Val)
|
||||
declare float @llvm.log.f32(float %Val)
|
||||
declare float @llvm.pow.f32(float %f, float %e)
|
||||
|
||||
|
||||
|
||||
|
||||
;; stuff that could be in builtins ...
|
||||
|
||||
define(`unary1to1', `
|
||||
%v_0 = extractelement <1 x $1> %0, i32 0
|
||||
%r_0 = call $1 $2($1 %v_0)
|
||||
%ret_0 = insertelement <1 x $1> undef, $1 %r_0, i32 0
|
||||
ret <1 x $1> %ret_0
|
||||
')
|
||||
|
||||
|
||||
|
||||
;; dummy 1 wide vector ops
|
||||
define void
|
||||
@__aos_to_soa4_float1(<1 x float> %v0, <1 x float> %v1, <1 x float> %v2,
|
||||
<1 x float> %v3, <1 x float> * noalias %out0,
|
||||
<1 x float> * noalias %out1, <1 x float> * noalias %out2,
|
||||
<1 x float> * noalias %out3) nounwind alwaysinline {
|
||||
|
||||
store <1 x float> %v0, <1 x float > * %out0
|
||||
store <1 x float> %v1, <1 x float > * %out1
|
||||
store <1 x float> %v2, <1 x float > * %out2
|
||||
store <1 x float> %v3, <1 x float > * %out3
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void
|
||||
@__soa_to_aos4_float1(<1 x float> %v0, <1 x float> %v1, <1 x float> %v2,
|
||||
<1 x float> %v3, <1 x float> * noalias %out0,
|
||||
<1 x float> * noalias %out1, <1 x float> * noalias %out2,
|
||||
<1 x float> * noalias %out3) nounwind alwaysinline {
|
||||
call void @__aos_to_soa4_float1(<1 x float> %v0, <1 x float> %v1,
|
||||
<1 x float> %v2, <1 x float> %v3, <1 x float> * %out0,
|
||||
<1 x float> * %out1, <1 x float> * %out2, <1 x float> * %out3)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void
|
||||
@__aos_to_soa3_float1(<1 x float> %v0, <1 x float> %v1,
|
||||
<1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1,
|
||||
<1 x float> * %out2) {
|
||||
store <1 x float> %v0, <1 x float > * %out0
|
||||
store <1 x float> %v1, <1 x float > * %out1
|
||||
store <1 x float> %v2, <1 x float > * %out2
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define void
|
||||
@__soa_to_aos3_float1(<1 x float> %v0, <1 x float> %v1,
|
||||
<1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1,
|
||||
<1 x float> * %out2) {
|
||||
call void @__aos_to_soa3_float1(<1 x float> %v0, <1 x float> %v1,
|
||||
<1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1,
|
||||
<1 x float> * %out2)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;; end builtins
|
||||
|
||||
|
||||
define <1 x double> @__round_varying_double(<1 x double>) nounwind readonly alwaysinline {
|
||||
unary1to1(double, @round)
|
||||
}
|
||||
|
||||
define <1 x double> @__floor_varying_double(<1 x double>) nounwind readonly alwaysinline {
|
||||
unary1to1(double, @floor)
|
||||
}
|
||||
|
||||
|
||||
define <1 x double> @__ceil_varying_double(<1 x double>) nounwind readonly alwaysinline {
|
||||
unary1to1(double, @ceil)
|
||||
}
|
||||
|
||||
; To do vector integer min and max, we do the vector compare and then sign
|
||||
; extend the i1 vector result to an i32 mask. The __vselect does the
|
||||
; rest...
|
||||
|
||||
define <1 x i32> @__min_varying_int32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
|
||||
%c = icmp slt <1 x i32> %0, %1
|
||||
%mask = sext <1 x i1> %c to <1 x i32>
|
||||
%v = call <1 x i32> @__vselect_i32(<1 x i32> %1, <1 x i32> %0, <1 x i32> %mask)
|
||||
ret <1 x i32> %v
|
||||
}
|
||||
|
||||
define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||
%c = icmp slt i32 %0, %1
|
||||
%r = select i1 %c, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define <1 x i32> @__max_varying_int32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
|
||||
%c = icmp sgt <1 x i32> %0, %1
|
||||
%mask = sext <1 x i1> %c to <1 x i32>
|
||||
%v = call <1 x i32> @__vselect_i32(<1 x i32> %1, <1 x i32> %0, <1 x i32> %mask)
|
||||
ret <1 x i32> %v
|
||||
}
|
||||
|
||||
define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||
%c = icmp sgt i32 %0, %1
|
||||
%r = select i1 %c, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
; The functions for unsigned ints are similar, just with unsigned
|
||||
; comparison functions...
|
||||
|
||||
define <1 x i32> @__min_varying_uint32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
|
||||
%c = icmp ult <1 x i32> %0, %1
|
||||
%mask = sext <1 x i1> %c to <1 x i32>
|
||||
%v = call <1 x i32> @__vselect_i32(<1 x i32> %1, <1 x i32> %0, <1 x i32> %mask)
|
||||
ret <1 x i32> %v
|
||||
}
|
||||
|
||||
define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
%c = icmp ult i32 %0, %1
|
||||
%r = select i1 %c, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define <1 x i32> @__max_varying_uint32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
|
||||
%c = icmp ugt <1 x i32> %0, %1
|
||||
%mask = sext <1 x i1> %c to <1 x i32>
|
||||
%v = call <1 x i32> @__vselect_i32(<1 x i32> %1, <1 x i32> %0, <1 x i32> %mask)
|
||||
ret <1 x i32> %v
|
||||
}
|
||||
|
||||
define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
%c = icmp ugt i32 %0, %1
|
||||
%r = select i1 %c, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops / reductions
|
||||
|
||||
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||
|
||||
define i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
|
||||
%call = call i32 @llvm.ctpop.i32(i32 %0)
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
|
||||
|
||||
define i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
|
||||
%call = call i64 @llvm.ctpop.i64(i64 %0)
|
||||
ret i64 %call
|
||||
}
|
||||
|
||||
|
||||
define float @__reduce_add_float(<1 x float> %v) nounwind readonly alwaysinline {
|
||||
%r = extractelement <1 x float> %v, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__reduce_min_float(<1 x float>) nounwind readnone {
|
||||
%r = extractelement <1 x float> %0, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define float @__reduce_max_float(<1 x float>) nounwind readnone {
|
||||
%r = extractelement <1 x float> %0, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_add_int32(<1 x i32> %v) nounwind readnone {
|
||||
%r = extractelement <1 x i32> %v, i32 0
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_min_int32(<1 x i32>) nounwind readnone {
|
||||
%r = extractelement <1 x i32> %0, i32 0
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_max_int32(<1 x i32>) nounwind readnone {
|
||||
%r = extractelement <1 x i32> %0, i32 0
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_add_uint32(<1 x i32> %v) nounwind readnone {
|
||||
%r = call i32 @__reduce_add_int32(<1 x i32> %v)
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_min_uint32(<1 x i32>) nounwind readnone {
|
||||
%r = extractelement <1 x i32> %0, i32 0
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define i32 @__reduce_max_uint32(<1 x i32>) nounwind readnone {
|
||||
%r = extractelement <1 x i32> %0, i32 0
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
|
||||
define double @__reduce_add_double(<1 x double>) nounwind readnone {
|
||||
%m = extractelement <1 x double> %0, i32 0
|
||||
ret double %m
|
||||
}
|
||||
|
||||
define double @__reduce_min_double(<1 x double>) nounwind readnone {
|
||||
%m = extractelement <1 x double> %0, i32 0
|
||||
ret double %m
|
||||
}
|
||||
|
||||
define double @__reduce_max_double(<1 x double>) nounwind readnone {
|
||||
%m = extractelement <1 x double> %0, i32 0
|
||||
ret double %m
|
||||
}
|
||||
|
||||
define i64 @__reduce_add_int64(<1 x i64>) nounwind readnone {
|
||||
%m = extractelement <1 x i64> %0, i32 0
|
||||
ret i64 %m
|
||||
}
|
||||
|
||||
define i64 @__reduce_min_int64(<1 x i64>) nounwind readnone {
|
||||
%m = extractelement <1 x i64> %0, i32 0
|
||||
ret i64 %m
|
||||
}
|
||||
|
||||
define i64 @__reduce_max_int64(<1 x i64>) nounwind readnone {
|
||||
%m = extractelement <1 x i64> %0, i32 0
|
||||
ret i64 %m
|
||||
}
|
||||
|
||||
define i64 @__reduce_min_uint64(<1 x i64>) nounwind readnone {
|
||||
%m = extractelement <1 x i64> %0, i32 0
|
||||
ret i64 %m
|
||||
}
|
||||
|
||||
define i64 @__reduce_max_uint64(<1 x i64>) nounwind readnone {
|
||||
%m = extractelement <1 x i64> %0, i32 0
|
||||
ret i64 %m
|
||||
}
|
||||
|
||||
define i1 @__reduce_equal_int32(<1 x i32> %vv, i32 * %samevalue,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%v=extractelement <1 x i32> %vv, i32 0
|
||||
store i32 %v, i32 * %samevalue
|
||||
ret i1 true
|
||||
|
||||
}
|
||||
|
||||
define i1 @__reduce_equal_float(<1 x float> %vv, float * %samevalue,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%v=extractelement <1 x float> %vv, i32 0
|
||||
store float %v, float * %samevalue
|
||||
ret i1 true
|
||||
|
||||
}
|
||||
|
||||
define i1 @__reduce_equal_int64(<1 x i64> %vv, i64 * %samevalue,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%v=extractelement <1 x i64> %vv, i32 0
|
||||
store i64 %v, i64 * %samevalue
|
||||
ret i1 true
|
||||
|
||||
}
|
||||
|
||||
define i1 @__reduce_equal_double(<1 x double> %vv, double * %samevalue,
|
||||
<1 x i32> %mask) nounwind alwaysinline {
|
||||
%v=extractelement <1 x double> %vv, i32 0
|
||||
store double %v, double * %samevalue
|
||||
ret i1 true
|
||||
|
||||
}
|
||||
|
||||
; extracting/reinserting elements because I want to be able to remove vectors later on
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
|
||||
define <1 x float> @__rcp_varying_float(<1 x float>) nounwind readonly alwaysinline {
|
||||
;%call = call <1 x float> @llvm.x86.sse.rcp.ps(<1 x float> %0)
|
||||
; do one N-R iteration to improve precision
|
||||
; float iv = __rcp_v(v);
|
||||
; return iv * (2. - v * iv);
|
||||
;%v_iv = fmul <1 x float> %0, %call
|
||||
;%two_minus = fsub <1 x float> <float 2., float 2., float 2., float 2.>, %v_iv
|
||||
;%iv_mul = fmul <1 x float> %call, %two_minus
|
||||
;ret <1 x float> %iv_mul
|
||||
%d = extractelement <1 x float> %0, i32 0
|
||||
%r = fdiv float 1.,%d
|
||||
%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
ret <1 x float> %rv
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; sqrt
|
||||
|
||||
define <1 x float> @__sqrt_varying_float(<1 x float>) nounwind readonly alwaysinline {
|
||||
;%call = call <1 x float> @llvm.x86.sse.sqrt.ps(<1 x float> %0)
|
||||
;ret <1 x float> %call
|
||||
%d = extractelement <1 x float> %0, i32 0
|
||||
%r = call float @llvm.sqrt.f32(float %d)
|
||||
%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
ret <1 x float> %rv
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; rsqrt
|
||||
|
||||
define <1 x float> @__rsqrt_varying_float(<1 x float> %v) nounwind readonly alwaysinline {
|
||||
; float is = __rsqrt_v(v);
|
||||
;%is = call <1 x float> @llvm.x86.sse.rsqrt.ps(<1 x float> %v)
|
||||
; Newton-Raphson iteration to improve precision
|
||||
; return 0.5 * is * (3. - (v * is) * is);
|
||||
;%v_is = fmul <1 x float> %v, %is
|
||||
;%v_is_is = fmul <1 x float> %v_is, %is
|
||||
;%three_sub = fsub <1 x float> <float 3., float 3., float 3., float 3.>, %v_is_is
|
||||
;%is_mul = fmul <1 x float> %is, %three_sub
|
||||
;%half_scale = fmul <1 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, %is_mul
|
||||
;ret <1 x float> %half_scale
|
||||
%s = call <1 x float> @__sqrt_varying_float(<1 x float> %v)
|
||||
%r = call <1 x float> @__rcp_varying_float(<1 x float> %s)
|
||||
ret <1 x float> %r
|
||||
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; svml stuff
|
||||
|
||||
define <1 x float> @__svml_sin(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_sinf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm.sin.f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
unary1to1(float,@llvm.sin.f32)
|
||||
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_cos(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_cosf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm.cos.f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
unary1to1(float, @llvm.cos.f32)
|
||||
|
||||
}
|
||||
|
||||
define void @__svml_sincos(<1 x float>, <1 x float> *, <1 x float> *) nounwind readnone alwaysinline {
|
||||
; %s = call <1 x float> @__svml_sincosf4(<1 x float> * %2, <1 x float> %0)
|
||||
; store <1 x float> %s, <1 x float> * %1
|
||||
; ret void
|
||||
%sin = call <1 x float> @__svml_sin (<1 x float> %0)
|
||||
%cos = call <1 x float> @__svml_cos (<1 x float> %0)
|
||||
store <1 x float> %sin, <1 x float> * %1
|
||||
store <1 x float> %cos, <1 x float> * %2
|
||||
ret void
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_tan(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_tanf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm_tan_f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
;unasry1to1(float, @llvm.tan.f32)
|
||||
; UNSUPPORTED!
|
||||
ret <1 x float > %0
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_atan(<1 x float>) nounwind readnone alwaysinline {
|
||||
; %ret = call <1 x float> @__svml_atanf4(<1 x float> %0)
|
||||
; ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm_atan_f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
;unsary1to1(float,@llvm.atan.f32)
|
||||
;UNSUPPORTED!
|
||||
ret <1 x float > %0
|
||||
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_atan2(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_atan2f4(<1 x float> %0, <1 x float> %1)
|
||||
;ret <1 x float> %ret
|
||||
;%y = extractelement <1 x float> %0, i32 0
|
||||
;%x = extractelement <1 x float> %1, i32 0
|
||||
;%q = fdiv float %y, %x
|
||||
;%a = call float @llvm.atan.f32 (float %q)
|
||||
;%rv = insertelement <1 x float> undef, float %a, i32 0
|
||||
;ret <1 x float> %rv
|
||||
; UNSUPPORTED!
|
||||
ret <1 x float > %0
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_exp(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_expf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
unary1to1(float, @llvm.exp.f32)
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_log(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_logf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
unary1to1(float, @llvm.log.f32)
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_pow(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_powf4(<1 x float> %0, <1 x float> %1)
|
||||
;ret <1 x float> %ret
|
||||
%r = extractelement <1 x float> %0, i32 0
|
||||
%e = extractelement <1 x float> %1, i32 0
|
||||
%s = call float @llvm.pow.f32(float %r,float %e)
|
||||
%rv = insertelement <1 x float> undef, float %s, i32 0
|
||||
ret <1 x float> %rv
|
||||
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float min/max
|
||||
|
||||
define <1 x float> @__max_varying_float(<1 x float>, <1 x float>) nounwind readonly alwaysinline {
|
||||
; %call = call <1 x float> @llvm.x86.sse.max.ps(<1 x float> %0, <1 x float> %1)
|
||||
; ret <1 x float> %call
|
||||
%a = extractelement <1 x float> %0, i32 0
|
||||
%b = extractelement <1 x float> %1, i32 0
|
||||
%d = fcmp ogt float %a, %b
|
||||
%r = select i1 %d, float %a, float %b
|
||||
%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
ret <1 x float> %rv
|
||||
}
|
||||
|
||||
define <1 x float> @__min_varying_float(<1 x float>, <1 x float>) nounwind readonly alwaysinline {
|
||||
; %call = call <1 x float> @llvm.x86.sse.min.ps(<1 x float> %0, <1 x float> %1)
|
||||
; ret <1 x float> %call
|
||||
%a = extractelement <1 x float> %0, i32 0
|
||||
%b = extractelement <1 x float> %1, i32 0
|
||||
%d = fcmp olt float %a, %b
|
||||
%r = select i1 %d, float %a, float %b
|
||||
%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
ret <1 x float> %rv
|
||||
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; double precision sqrt
|
||||
|
||||
;declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
|
||||
|
||||
define <1 x double> @__sqrt_varying_double(<1 x double>) nounwind alwaysinline {
|
||||
;unarya2to4(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
|
||||
;ret <1 x double> %ret
|
||||
unary1to1(double, @llvm.sqrt.f64)
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; double precision min/max
|
||||
|
||||
;declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||
;declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define <1 x double> @__min_varying_double(<1 x double>, <1 x double>) nounwind readnone {
|
||||
;binarsy2to4(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
|
||||
;ret <1 x double> %ret
|
||||
%a = extractelement <1 x double> %0, i32 0
|
||||
%b = extractelement <1 x double> %1, i32 0
|
||||
%d = fcmp olt double %a, %b
|
||||
%r = select i1 %d, double %a, double %b
|
||||
%rv = insertelement <1 x double> undef, double %r, i32 0
|
||||
ret <1 x double> %rv
|
||||
|
||||
}
|
||||
|
||||
define <1 x double> @__max_varying_double(<1 x double>, <1 x double>) nounwind readnone {
|
||||
;binary2sto4(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
|
||||
;ret <1 x double> %ret
|
||||
%a = extractelement <1 x double> %0, i32 0
|
||||
%b = extractelement <1 x double> %1, i32 0
|
||||
%d = fcmp ogt double %a, %b
|
||||
%r = select i1 %d, double %a, double %b
|
||||
%rv = insertelement <1 x double> undef, double %r, i32 0
|
||||
ret <1 x double> %rv
|
||||
|
||||
}
|
||||
|
||||
|
||||
define float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; uniform float iv = extract(__rcp_u(v), 0);
|
||||
; return iv * (2. - v * iv);
|
||||
%r = fdiv float 1.,%0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding floats
|
||||
|
||||
define float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; roundss, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||
; the roundss intrinsic is a total mess--docs say:
|
||||
;
|
||||
; __m128 _mm_round_ss (__m128 a, __m128 b, const int c)
|
||||
;
|
||||
; b is a 128-bit parameter. The lowest 32 bits are the result of the rounding function
|
||||
; on b0. The higher order 96 bits are copied directly from input parameter a. The
|
||||
; return value is described by the following equations:
|
||||
;
|
||||
; r0 = RND(b0)
|
||||
; r1 = a1
|
||||
; r2 = a2
|
||||
; r3 = a3
|
||||
;
|
||||
; It doesn't matter what we pass as a, since we only need the r0 value
|
||||
; here. So we pass the same register for both.
|
||||
%v = insertelement<1 x float> undef, float %0, i32 0
|
||||
%rv = call <1 x float> @__round_varying_float(<1 x float> %v)
|
||||
%r=extractelement <1 x float> %rv, i32 0
|
||||
ret float %r
|
||||
|
||||
}
|
||||
|
||||
define float @__floor_uniform_float(float) nounwind readonly alwaysinline {
|
||||
%v = insertelement<1 x float> undef, float %0, i32 0
|
||||
%rv = call <1 x float> @__floor_varying_float(<1 x float> %v)
|
||||
%r=extractelement <1 x float> %rv, i32 0
|
||||
ret float %r
|
||||
|
||||
}
|
||||
|
||||
define float @__ceil_uniform_float(float) nounwind readonly alwaysinline {
|
||||
%v = insertelement<1 x float> undef, float %0, i32 0
|
||||
%rv = call <1 x float> @__ceil_varying_float(<1 x float> %v)
|
||||
%r=extractelement <1 x float> %rv, i32 0
|
||||
ret float %r
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding doubles
|
||||
|
||||
|
||||
define double @__round_uniform_double(double) nounwind readonly alwaysinline {
|
||||
%rs=call double @round(double %0)
|
||||
ret double %rs
|
||||
}
|
||||
|
||||
define double @__floor_uniform_double(double) nounwind readonly alwaysinline {
|
||||
%rs = call double @floor(double %0)
|
||||
ret double %rs
|
||||
}
|
||||
|
||||
define double @__ceil_uniform_double(double) nounwind readonly alwaysinline {
|
||||
%rs = call double @ceil(double %0)
|
||||
ret double %rs
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; sqrt
|
||||
|
||||
|
||||
define float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||
%ret = call float @llvm.sqrt.f32(float %0)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define double @__sqrt_uniform_double(double) nounwind readonly alwaysinline {
|
||||
%ret = call double @llvm.sqrt.f64(double %0)
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rsqrt
|
||||
|
||||
|
||||
define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||
%s = call float @__sqrt_uniform_float(float %0)
|
||||
%r = call float @__rcp_uniform_float(float %s)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; fastmath
|
||||
|
||||
|
||||
define void @__fastmath() nounwind alwaysinline {
|
||||
; no-op
|
||||
ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float min/max
|
||||
|
||||
|
||||
define float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||
%d = fcmp ogt float %0, %1
|
||||
%r = select i1 %d, float %0, float %1
|
||||
ret float %r
|
||||
|
||||
}
|
||||
|
||||
define float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||
%d = fcmp olt float %0, %1
|
||||
%r = select i1 %d, float %0, float %1
|
||||
ret float %r
|
||||
|
||||
}
|
||||
define double @__max_uniform_double(double, double) nounwind readonly alwaysinline {
|
||||
%d = fcmp ogt double %0, %1
|
||||
%r = select i1 %d, double %0, double %1
|
||||
ret double %r
|
||||
|
||||
}
|
||||
|
||||
define double @__min_uniform_double(double, double) nounwind readonly alwaysinline {
|
||||
%d = fcmp olt double %0, %1
|
||||
%r = select i1 %d, double %0, double %1
|
||||
ret double %r
|
||||
|
||||
}
|
||||
|
||||
define_shuffles()
|
||||
|
||||
ctlztz()
|
||||
|
||||
define_prefetches()
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||
|
||||
@@ -98,6 +98,14 @@ declare void @__aos_to_soa4_float(float * noalias %p, <WIDTH x float> * noalias
|
||||
<WIDTH x float> * noalias %out2,
|
||||
<WIDTH x float> * noalias %out3) nounwind
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; half conversion routines
|
||||
|
||||
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
|
||||
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
|
||||
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
|
||||
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; math
|
||||
|
||||
|
||||
105
builtins/util.m4
105
builtins/util.m4
@@ -1805,10 +1805,69 @@ ok:
|
||||
ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; new/delete
|
||||
|
||||
declare i8 * @malloc(i64)
|
||||
declare void @free(i8 *)
|
||||
|
||||
define i8 * @__new_uniform(i64 %size) {
|
||||
%a = call i8 * @malloc(i64 %size)
|
||||
ret i8 * %a
|
||||
}
|
||||
|
||||
define <WIDTH x i64> @__new_varying32(<WIDTH x i32> %size, <WIDTH x MASK> %mask) {
|
||||
%ret = alloca <WIDTH x i64>
|
||||
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %ret
|
||||
%ret64 = bitcast <WIDTH x i64> * %ret to i64 *
|
||||
|
||||
per_lane(WIDTH, <WIDTH x MASK> %mask, `
|
||||
%sz_LANE_ID = extractelement <WIDTH x i32> %size, i32 LANE
|
||||
%sz64_LANE_ID = zext i32 %sz_LANE_ID to i64
|
||||
%ptr_LANE_ID = call i8 * @malloc(i64 %sz64_LANE_ID)
|
||||
%ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64
|
||||
%store_LANE_ID = getelementptr i64 * %ret64, i32 LANE
|
||||
store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID')
|
||||
|
||||
%r = load <WIDTH x i64> * %ret
|
||||
ret <WIDTH x i64> %r
|
||||
}
|
||||
|
||||
define <WIDTH x i64> @__new_varying64(<WIDTH x i64> %size, <WIDTH x MASK> %mask) {
|
||||
%ret = alloca <WIDTH x i64>
|
||||
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %ret
|
||||
%ret64 = bitcast <WIDTH x i64> * %ret to i64 *
|
||||
|
||||
per_lane(WIDTH, <WIDTH x MASK> %mask, `
|
||||
%sz_LANE_ID = extractelement <WIDTH x i64> %size, i32 LANE
|
||||
%ptr_LANE_ID = call i8 * @malloc(i64 %sz_LANE_ID)
|
||||
%ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64
|
||||
%store_LANE_ID = getelementptr i64 * %ret64, i32 LANE
|
||||
store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID')
|
||||
|
||||
%r = load <WIDTH x i64> * %ret
|
||||
ret <WIDTH x i64> %r
|
||||
}
|
||||
|
||||
define void @__delete_uniform(i8 * %ptr) {
|
||||
call void @free(i8 * %ptr)
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__delete_varying(<WIDTH x i64> %ptr, <WIDTH x MASK> %mask) {
|
||||
per_lane(WIDTH, <WIDTH x MASK> %mask, `
|
||||
%iptr_LANE_ID = extractelement <WIDTH x i64> %ptr, i32 LANE
|
||||
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to i8 *
|
||||
call void @free(i8 * %ptr_LANE_ID)
|
||||
')
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; read hw clock
|
||||
|
||||
define i64 @__clock() nounwind uwtable ssp {
|
||||
define i64 @__clock() nounwind {
|
||||
entry:
|
||||
tail call void asm sideeffect "xorl %eax,%eax \0A cpuid", "~{rax},~{rbx},~{rcx},~{rdx},~{dirflag},~{fpsr},~{flags}"() nounwind
|
||||
%0 = tail call { i32, i32 } asm sideeffect "rdtsc", "={ax},={dx},~{dirflag},~{fpsr},~{flags}"() nounwind
|
||||
@@ -2187,9 +2246,9 @@ return:
|
||||
define(`gen_masked_store', `
|
||||
define void @__masked_store_$3(<$1 x $2>* nocapture, <$1 x $2>, <$1 x i32>) nounwind alwaysinline {
|
||||
per_lane($1, <$1 x i32> %2, `
|
||||
%ptr_ID = getelementptr <$1 x $2> * %0, i32 0, i32 LANE
|
||||
%storeval_ID = extractelement <$1 x $2> %1, i32 LANE
|
||||
store $2 %storeval_ID, $2 * %ptr_ID')
|
||||
%ptr_LANE_ID = getelementptr <$1 x $2> * %0, i32 0, i32 LANE
|
||||
%storeval_LANE_ID = extractelement <$1 x $2> %1, i32 LANE
|
||||
store $2 %storeval_LANE_ID, $2 * %ptr_LANE_ID')
|
||||
ret void
|
||||
}
|
||||
')
|
||||
@@ -2644,7 +2703,7 @@ pl_known_mask:
|
||||
pl_all_on:
|
||||
;; the mask is all on--just expand the code for each lane sequentially
|
||||
forloop(i, 0, eval($1-1),
|
||||
`patsubst(`$3', `ID\|LANE', i)')
|
||||
`patsubst(`$3', `LANE', i)')
|
||||
br label %pl_done
|
||||
|
||||
pl_unknown_mask:
|
||||
@@ -2806,11 +2865,11 @@ define <$1 x $2> @__gather32_$2(<$1 x i32> %ptrs,
|
||||
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
|
||||
%ret_ptr = alloca <$1 x $2>
|
||||
per_lane($1, <$1 x i32> %vecmask, `
|
||||
%iptr_ID = extractelement <$1 x i32> %ptrs, i32 LANE
|
||||
%ptr_ID = inttoptr i32 %iptr_ID to $2 *
|
||||
%val_ID = load $2 * %ptr_ID
|
||||
%store_ptr_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
|
||||
store $2 %val_ID, $2 * %store_ptr_ID
|
||||
%iptr_LANE_ID = extractelement <$1 x i32> %ptrs, i32 LANE
|
||||
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $2 *
|
||||
%val_LANE_ID = load $2 * %ptr_LANE_ID
|
||||
%store_ptr_LANE_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
|
||||
store $2 %val_LANE_ID, $2 * %store_ptr_LANE_ID
|
||||
')
|
||||
|
||||
%ret = load <$1 x $2> * %ret_ptr
|
||||
@@ -2822,11 +2881,11 @@ define <$1 x $2> @__gather64_$2(<$1 x i64> %ptrs,
|
||||
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
|
||||
%ret_ptr = alloca <$1 x $2>
|
||||
per_lane($1, <$1 x i32> %vecmask, `
|
||||
%iptr_ID = extractelement <$1 x i64> %ptrs, i32 LANE
|
||||
%ptr_ID = inttoptr i64 %iptr_ID to $2 *
|
||||
%val_ID = load $2 * %ptr_ID
|
||||
%store_ptr_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
|
||||
store $2 %val_ID, $2 * %store_ptr_ID
|
||||
%iptr_LANE_ID = extractelement <$1 x i64> %ptrs, i32 LANE
|
||||
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $2 *
|
||||
%val_LANE_ID = load $2 * %ptr_LANE_ID
|
||||
%store_ptr_LANE_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
|
||||
store $2 %val_LANE_ID, $2 * %store_ptr_LANE_ID
|
||||
')
|
||||
|
||||
%ret = load <$1 x $2> * %ret_ptr
|
||||
@@ -2910,10 +2969,10 @@ define void @__scatter_base_offsets64_$2(i8* %base, <$1 x i64> %offsets, i32 %of
|
||||
define void @__scatter32_$2(<$1 x i32> %ptrs, <$1 x $2> %values,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
per_lane($1, <$1 x i32> %mask, `
|
||||
%iptr_ID = extractelement <$1 x i32> %ptrs, i32 LANE
|
||||
%ptr_ID = inttoptr i32 %iptr_ID to $2 *
|
||||
%val_ID = extractelement <$1 x $2> %values, i32 LANE
|
||||
store $2 %val_ID, $2 * %ptr_ID
|
||||
%iptr_LANE_ID = extractelement <$1 x i32> %ptrs, i32 LANE
|
||||
%ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $2 *
|
||||
%val_LANE_ID = extractelement <$1 x $2> %values, i32 LANE
|
||||
store $2 %val_LANE_ID, $2 * %ptr_LANE_ID
|
||||
')
|
||||
ret void
|
||||
}
|
||||
@@ -2922,10 +2981,10 @@ define void @__scatter32_$2(<$1 x i32> %ptrs, <$1 x $2> %values,
|
||||
define void @__scatter64_$2(<$1 x i64> %ptrs, <$1 x $2> %values,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
per_lane($1, <$1 x i32> %mask, `
|
||||
%iptr_ID = extractelement <$1 x i64> %ptrs, i32 LANE
|
||||
%ptr_ID = inttoptr i64 %iptr_ID to $2 *
|
||||
%val_ID = extractelement <$1 x $2> %values, i32 LANE
|
||||
store $2 %val_ID, $2 * %ptr_ID
|
||||
%iptr_LANE_ID = extractelement <$1 x i64> %ptrs, i32 LANE
|
||||
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $2 *
|
||||
%val_LANE_ID = extractelement <$1 x $2> %values, i32 LANE
|
||||
store $2 %val_LANE_ID, $2 * %ptr_LANE_ID
|
||||
')
|
||||
ret void
|
||||
}
|
||||
|
||||
@@ -2114,7 +2114,8 @@ bool CWriter::doInitialization(Module &M) {
|
||||
I->getName() == "memset" || I->getName() == "memset_pattern16" ||
|
||||
I->getName() == "puts" ||
|
||||
I->getName() == "printf" || I->getName() == "putchar" ||
|
||||
I->getName() == "fflush")
|
||||
I->getName() == "fflush" || I->getName() == "malloc" ||
|
||||
I->getName() == "free")
|
||||
continue;
|
||||
|
||||
// Don't redeclare ispc's own intrinsics
|
||||
@@ -3437,6 +3438,9 @@ void CWriter::visitCallInst(CallInst &I) {
|
||||
Callee = RF;
|
||||
}
|
||||
|
||||
if (Callee->getName() == "malloc")
|
||||
Out << "(uint8_t *)";
|
||||
|
||||
if (NeedsCast) {
|
||||
// Ok, just cast the pointer type.
|
||||
Out << "((";
|
||||
|
||||
13
ctx.cpp
13
ctx.cpp
@@ -642,12 +642,12 @@ FunctionEmitContext::inSwitchStatement() const {
|
||||
|
||||
void
|
||||
FunctionEmitContext::Break(bool doCoherenceCheck) {
|
||||
Assert(controlFlowInfo.size() > 0);
|
||||
if (breakTarget == NULL) {
|
||||
Error(currentPos, "\"break\" statement is illegal outside of "
|
||||
"for/while/do loops and \"switch\" statements.");
|
||||
return;
|
||||
}
|
||||
Assert(controlFlowInfo.size() > 0);
|
||||
|
||||
if (bblock == NULL)
|
||||
return;
|
||||
@@ -721,6 +721,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) {
|
||||
"for/while/do/foreach loops.");
|
||||
return;
|
||||
}
|
||||
Assert(controlFlowInfo.size() > 0);
|
||||
|
||||
if (ifsInCFAllUniform(CFInfo::Loop) || GetInternalMask() == LLVMMaskAllOn) {
|
||||
// Similarly to 'break' statements, we can immediately jump to the
|
||||
@@ -1279,7 +1280,11 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::GetStringPtr(const std::string &str) {
|
||||
#ifdef LLVM_3_1svn
|
||||
llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str);
|
||||
#else
|
||||
llvm::Constant *lstr = llvm::ConstantArray::get(*g->ctx, str);
|
||||
#endif
|
||||
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage;
|
||||
llvm::Value *lstrPtr = new llvm::GlobalVariable(*m->module, lstr->getType(),
|
||||
true /*isConst*/,
|
||||
@@ -1329,7 +1334,11 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
|
||||
|
||||
static llvm::Value *
|
||||
lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
|
||||
#ifdef LLVM_3_1svn
|
||||
llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s);
|
||||
#else
|
||||
llvm::Constant *sConstant = llvm::ConstantArray::get(*g->ctx, s);
|
||||
#endif
|
||||
llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(),
|
||||
true /* const */,
|
||||
llvm::GlobalValue::InternalLinkage,
|
||||
@@ -2923,7 +2932,7 @@ FunctionEmitContext::SyncInst() {
|
||||
|
||||
|
||||
/** When we gathering from or scattering to a varying atomic type, we need
|
||||
to add an appropraite offset to the final address for each lane right
|
||||
to add an appropriate offset to the final address for each lane right
|
||||
before we use it. Given a varying pointer we're about to use and its
|
||||
type, this function determines whether these offsets are needed and
|
||||
returns an updated pointer that incorporates these offsets if needed.
|
||||
|
||||
81
decl.cpp
81
decl.cpp
@@ -113,6 +113,12 @@ DeclSpecs::DeclSpecs(const Type *t, StorageClass sc, int tq) {
|
||||
const Type *
|
||||
DeclSpecs::GetBaseType(SourcePos pos) const {
|
||||
const Type *bt = baseType;
|
||||
|
||||
if (bt == NULL) {
|
||||
Warning(pos, "No type specified in declaration. Assuming int32.");
|
||||
bt = AtomicType::UnboundInt32;
|
||||
}
|
||||
|
||||
if (vectorSize > 0) {
|
||||
const AtomicType *atomicType = dynamic_cast<const AtomicType *>(bt);
|
||||
if (atomicType == NULL) {
|
||||
@@ -171,6 +177,11 @@ Declarator::Declarator(DeclaratorKind dk, SourcePos p)
|
||||
void
|
||||
Declarator::InitFromDeclSpecs(DeclSpecs *ds) {
|
||||
const Type *t = GetType(ds);
|
||||
if (t == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
Symbol *sym = GetSymbol();
|
||||
if (sym != NULL) {
|
||||
sym->type = t;
|
||||
@@ -248,8 +259,10 @@ Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *funArgs) {
|
||||
// already have been added to the symbol table by AddGlobal() by the
|
||||
// time we get here.)
|
||||
Symbol *funSym = m->symbolTable->LookupFunction(declSym->name.c_str(), type);
|
||||
if (funSym != NULL)
|
||||
if (funSym == NULL)
|
||||
// May be NULL due to error earlier in compilation
|
||||
Assert(m->errorCount > 0);
|
||||
else
|
||||
funSym->pos = pos;
|
||||
|
||||
// Walk down to the declarator for the function. (We have to get past
|
||||
@@ -262,11 +275,18 @@ Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *funArgs) {
|
||||
|
||||
for (unsigned int i = 0; i < d->functionParams.size(); ++i) {
|
||||
Symbol *sym = d->GetSymbolForFunctionParameter(i);
|
||||
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
|
||||
if (sym->type == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
|
||||
|
||||
funArgs->push_back(sym);
|
||||
}
|
||||
|
||||
funSym->type = funSym->type->ResolveUnboundVariability(Type::Varying);
|
||||
if (funSym != NULL)
|
||||
funSym->type = funSym->type->ResolveUnboundVariability(Type::Varying);
|
||||
|
||||
return funSym;
|
||||
}
|
||||
@@ -331,6 +351,16 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
break;
|
||||
|
||||
case DK_ARRAY:
|
||||
if (type == AtomicType::Void) {
|
||||
Error(pos, "Arrays of \"void\" type are illegal.");
|
||||
return NULL;
|
||||
}
|
||||
if (dynamic_cast<const ReferenceType *>(type)) {
|
||||
Error(pos, "Arrays of references (type \"%s\") are illegal.",
|
||||
type->GetString().c_str());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
type = new ArrayType(type, arraySize);
|
||||
if (child)
|
||||
return child->GetType(type, ds);
|
||||
@@ -357,6 +387,11 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
"function parameter declaration for parameter \"%s\".",
|
||||
lGetStorageClassName(d->declSpecs->storageClass),
|
||||
sym->name.c_str());
|
||||
if (sym->type == AtomicType::Void) {
|
||||
Error(sym->pos, "Parameter with type \"void\" illegal in function "
|
||||
"parameter list.");
|
||||
sym->type = NULL;
|
||||
}
|
||||
|
||||
const ArrayType *at = dynamic_cast<const ArrayType *>(sym->type);
|
||||
if (at != NULL) {
|
||||
@@ -368,8 +403,12 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
// report this differently than it was originally declared
|
||||
// in the function, but it's not clear that this is a
|
||||
// significant problem.)
|
||||
sym->type = PointerType::GetUniform(at->GetElementType());
|
||||
if (at->GetElementType() == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
sym->type = PointerType::GetUniform(at->GetElementType());
|
||||
// Make sure there are no unsized arrays (other than the
|
||||
// first dimension) in function parameter lists.
|
||||
at = dynamic_cast<const ArrayType *>(at->GetElementType());
|
||||
@@ -413,6 +452,10 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
Error(pos, "No return type provided in function declaration.");
|
||||
return NULL;
|
||||
}
|
||||
if (dynamic_cast<const FunctionType *>(returnType) != NULL) {
|
||||
Error(pos, "Illegal to return function type from function.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
bool isExported = ds && (ds->storageClass == SC_EXPORT);
|
||||
bool isExternC = ds && (ds->storageClass == SC_EXTERN_C);
|
||||
@@ -434,6 +477,11 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (child == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const Type *functionType =
|
||||
new FunctionType(returnType, args, argNames, argDefaults,
|
||||
argPos, isTask, isExported, isExternC);
|
||||
@@ -536,14 +584,23 @@ Declaration::GetVariableDeclarations() const {
|
||||
|
||||
for (unsigned int i = 0; i < declarators.size(); ++i) {
|
||||
Declarator *decl = declarators[i];
|
||||
if (decl == NULL)
|
||||
if (decl == NULL) {
|
||||
// Ignore earlier errors
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
Symbol *sym = decl->GetSymbol();
|
||||
if (sym == NULL || sym->type == NULL) {
|
||||
// Ignore errors
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
|
||||
|
||||
if (dynamic_cast<const FunctionType *>(sym->type) == NULL) {
|
||||
if (sym->type == AtomicType::Void)
|
||||
Error(sym->pos, "\"void\" type variable illegal in declaration.");
|
||||
else if (dynamic_cast<const FunctionType *>(sym->type) == NULL) {
|
||||
m->symbolTable->AddVariable(sym);
|
||||
vars.push_back(VariableDeclaration(sym, decl->initExpr));
|
||||
}
|
||||
@@ -558,11 +615,18 @@ Declaration::DeclareFunctions() {
|
||||
|
||||
for (unsigned int i = 0; i < declarators.size(); ++i) {
|
||||
Declarator *decl = declarators[i];
|
||||
if (decl == NULL)
|
||||
if (decl == NULL) {
|
||||
// Ignore earlier errors
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
Symbol *sym = decl->GetSymbol();
|
||||
if (sym == NULL || sym->type == NULL) {
|
||||
// Ignore errors
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
|
||||
|
||||
if (dynamic_cast<const FunctionType *>(sym->type) == NULL)
|
||||
@@ -610,6 +674,9 @@ GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
|
||||
|
||||
Symbol *sym = d->GetSymbol();
|
||||
|
||||
if (sym->type == AtomicType::Void)
|
||||
Error(d->pos, "\"void\" type illegal for struct member.");
|
||||
|
||||
const ArrayType *arrayType =
|
||||
dynamic_cast<const ArrayType *>(sym->type);
|
||||
if (arrayType != NULL && arrayType->GetElementCount() == 0) {
|
||||
|
||||
@@ -1,3 +1,42 @@
|
||||
=== v1.1.4 === (4 February 2012)
|
||||
|
||||
There are two major bugfixes for Windows in this release. First, a number
|
||||
of failures in AVX code generation on Windows have been fixed; AVX on
|
||||
Windows now has no known issues. Second, a longstanding bug in parsing 64-bit
|
||||
integer constants on Windows has been fixed.
|
||||
|
||||
This release features a new experimental scalar target, contributed by Gabe
|
||||
Weisz <gweisz@cs.cmu.edu>. This target ("--target=generic-1") compiles
|
||||
gangs of single program instances (i.e. programCount == 1); it can be
|
||||
useful for debugging ispc programs.
|
||||
|
||||
The compiler now supports dynamic memory allocation in ispc programs (with
|
||||
"new" and "delete" operators based on C++). See
|
||||
http://ispc.github.com/ispc.html#dynamic-memory-allocation in the
|
||||
documentation for more information.
|
||||
|
||||
ispc now performs "short circuit" evaluation of the || and && logical
|
||||
operators and the ? : selection operator. (This represents the correction
|
||||
of a major incompatibility with C.) Code like "(index < arraySize &&
|
||||
array[index] == 1)" thus now executes as in C, where "array[index]" won't
|
||||
be evaluated unless "index" is less than "arraySize".
|
||||
|
||||
The standard library now provides "local" atomic operations, which are
|
||||
atomic across the gang of program instances (but not across other gangs or
|
||||
other hardware threads. See the updated documentation on atomics for more
|
||||
information:
|
||||
http://ispc.github.com/ispc.html#atomic-operations-and-memory-fences.
|
||||
|
||||
The standard library now offers a clock() function, which returns a uniform
|
||||
int64 value that counts processor cycles; it can be used for
|
||||
fine-resolution timing measurements.
|
||||
|
||||
Finally (of limited interest now): ispc now supports the forthcoming AVX2
|
||||
instruction set, due with Haswell-generation CPUs. All tests and examples
|
||||
compile and execute correctly with AVX2. (Thanks specifically to Craig
|
||||
Topper and Nadav Rotem for work on AVX2 support in LLVM, which made this
|
||||
possible.)
|
||||
|
||||
=== v1.1.3 === (20 January 2012)
|
||||
|
||||
With this release, the language now supports "switch" statements, with the
|
||||
|
||||
318
docs/ispc.rst
318
docs/ispc.rst
@@ -96,6 +96,9 @@ Contents:
|
||||
|
||||
+ `Declarations and Initializers`_
|
||||
+ `Expressions`_
|
||||
|
||||
* `Dynamic Memory Allocation`_
|
||||
|
||||
+ `Control Flow`_
|
||||
|
||||
* `Conditional Statements: "if"`_
|
||||
@@ -1148,6 +1151,7 @@ in C:
|
||||
* Structs and arrays
|
||||
* Support for recursive function calls
|
||||
* Support for separate compilation of source files
|
||||
* "Short-circuit" evaluation of ``||``, ``&&`` and ``? :`` operators
|
||||
* The preprocessor
|
||||
|
||||
``ispc`` adds a number of features from C++ and C99 to this base:
|
||||
@@ -1162,6 +1166,7 @@ in C:
|
||||
* The ``inline`` qualifier to indicate that a function should be inlined
|
||||
* Function overloading by parameter type
|
||||
* Hexadecimal floating-point constants
|
||||
* Dynamic memory allocation with ``new`` and ``delete``.
|
||||
|
||||
``ispc`` also adds a number of new features that aren't in C89, C99, or
|
||||
C++:
|
||||
@@ -1180,7 +1185,6 @@ C++:
|
||||
There are a number of features of C89 that are not supported in ``ispc``
|
||||
but are likely to be supported in future releases:
|
||||
|
||||
* Short circuiting of logical operations
|
||||
* There are no types named ``char``, ``short``, or ``long`` (or ``long
|
||||
double``). However, there are built-in ``int8``, ``int16``, and
|
||||
``int64`` types
|
||||
@@ -1965,19 +1969,137 @@ operator also work as expected.
|
||||
(*fp).a = 0;
|
||||
fp->b = 1;
|
||||
|
||||
As in C and C++, evaluation of the ``||`` and ``&&`` logical operators as
|
||||
well as the selection operator ``? :`` is "short-circuited"; the right hand
|
||||
side won't be evaluated if the value from the left-hand side determines the
|
||||
logical operator's value. For example, in the following code,
|
||||
``array[index]`` won't be evaluated for values of ``index`` that are
|
||||
greater than or equal to ``NUM_ITEMS``.
|
||||
|
||||
::
|
||||
|
||||
if (index < NUM_ITEMS && array[index] > 0) {
|
||||
// ...
|
||||
}
|
||||
|
||||
|
||||
Dynamic Memory Allocation
|
||||
-------------------------
|
||||
|
||||
``ispc`` programs can dynamically allocate (and free) memory, using syntax
|
||||
based on C++'s ``new`` and ``delete`` operators:
|
||||
|
||||
::
|
||||
|
||||
int count = ...;
|
||||
int *ptr = new uniform int[count];
|
||||
// use ptr...
|
||||
delete[] ptr;
|
||||
|
||||
In the above code, each program instance allocates its own ``count`-sized
|
||||
array of ``uniform int`` values, uses that memory, and then deallocates
|
||||
that memory. Uses of ``new`` and ``delete`` in ``ispc`` programs are
|
||||
serviced by corresponding calls the system C library's ``malloc()`` and
|
||||
``free()`` functions.
|
||||
|
||||
After a pointer has been deleted, it is illegal to access the memory it
|
||||
points to. However, note that deletion happens on a per-program-instance
|
||||
basis. In other words, consider the following code:
|
||||
|
||||
::
|
||||
|
||||
int *ptr = new uniform int[count];
|
||||
// use ptr
|
||||
if (count > 1000)
|
||||
delete[] ptr;
|
||||
// ...
|
||||
|
||||
Here, the program instances where ``count`` is greater than 1000 have
|
||||
deleted the dynamically allocated memory pointed to by ``ptr``, but the
|
||||
other program instances have not. As such, it's illegal for the former set
|
||||
of program instances to access ``*ptr``, but it's perfectly fine for the
|
||||
latter set to continue to use the memory ``ptr`` points to. Note that it
|
||||
is illegal to delete a pointer value returned by ``new`` more than one
|
||||
time.
|
||||
|
||||
Sometimes, it's useful to be able to do a single allocation for the entire
|
||||
gang of program instances. A ``new`` statement can be qualified with
|
||||
``uniform`` to indicate a single memory allocation:
|
||||
|
||||
::
|
||||
|
||||
float * uniform ptr = uniform new float[10];
|
||||
|
||||
While a regular call to ``new`` returns a ``varying`` pointer (i.e. a
|
||||
distinct pointer to separately-allocated memory for each program instance),
|
||||
a ``uniform new`` performs a single allocation and returns a ``uniform``
|
||||
pointer.
|
||||
|
||||
When using ``uniform new``, it's important to be aware of a subtlety; if
|
||||
the returned pointer is stored in a varying pointer variable (as may be
|
||||
appropriate and useful for the particular program being written), then the
|
||||
varying pointer may inadvertently be passed to a subsequent ``delete``
|
||||
statement, which is an error: effectively
|
||||
|
||||
::
|
||||
|
||||
float *ptr = uniform new float[10];
|
||||
// use ptr...
|
||||
delete ptr; // ERROR: varying pointer is deleted
|
||||
|
||||
In this case, ``ptr`` will be deleted multiple times, once for each
|
||||
executing program instance, which is an error (unless it happens that only
|
||||
a single program instance is active in the above code.)
|
||||
|
||||
When using ``new`` statements, it's important to make an appropriate choice
|
||||
of ``uniform`` or ``varying`` (as always, the default), for both the
|
||||
``new`` operator itself as well as the type of data being allocated, based
|
||||
on the program's needs. Consider the following four memory allocations:
|
||||
|
||||
::
|
||||
|
||||
uniform float * uniform p1 = uniform new uniform float[10];
|
||||
float * uniform p2 = uniform new float[10];
|
||||
uniform float * p3 = new uniform float[10];
|
||||
float * p4 = new float[10];
|
||||
|
||||
Assuming that a ``float`` is 4 bytes in memory and if the gang size is 8
|
||||
program instances, then the first allocation represents a single allocation
|
||||
of 40 bytes, the second is a single allocation of 8*4*10 = 320 bytes, the
|
||||
third is 8 allocations of 40 bytes, and the last performs 8 allocations of
|
||||
80 bytes each.
|
||||
|
||||
Note in particular that varying allocations of varying data types are rarely
|
||||
desirable in practice. In that case, each program instance is performing a
|
||||
separate allocation of ``varying float`` memory. In this case, it's likely
|
||||
that the program instances will only access a single element of each
|
||||
``varying float``, which is wasteful.
|
||||
|
||||
Although ``ispc`` doesn't support constructors or destructors like C++, it
|
||||
is possible to provide initializer values with ``new`` statements:
|
||||
|
||||
::
|
||||
|
||||
struct Point { float x, y, z; };
|
||||
Point *pptr = new Point(10, 20, 30);
|
||||
|
||||
Here for example, the "x" element of the returned ``Point`` is initialized
|
||||
to have the value 10 and so forth. In general, the rules for how
|
||||
initializer values provided in ``new`` statements are used to initialize
|
||||
complex data types follow the same rules as initializers for variables
|
||||
described in `Declarations and Initializers`_.
|
||||
|
||||
Control Flow
|
||||
------------
|
||||
|
||||
``ispc`` supports most of C's control flow constructs, including ``if``,
|
||||
``for``, ``while``, ``do``. It also supports variants of C's control flow
|
||||
``switch``, ``for``, ``while``, ``do``. It has limited support for
|
||||
``goto``, detailed below. It also supports variants of C's control flow
|
||||
constructs that provide hints about the expected runtime coherence of the
|
||||
control flow at that statement. It also provides parallel looping
|
||||
constructs, ``foreach`` and ``foreach_tiled``, all of which will be
|
||||
detailed in this section.
|
||||
|
||||
``ispc`` does not currently support ``switch`` statements or ``goto``.
|
||||
|
||||
Conditional Statements: "if"
|
||||
----------------------------
|
||||
|
||||
@@ -3267,24 +3389,53 @@ Systems Programming Support
|
||||
Atomic Operations and Memory Fences
|
||||
-----------------------------------
|
||||
|
||||
The usual range of atomic memory operations are provided in ``ispc``,
|
||||
including variants to handle both uniform and varying types. As a first
|
||||
example, consider on variant of the 32-bit integer atomic add routine:
|
||||
The standard range of atomic memory operations are provided by the standard
|
||||
library``ispc``, including variants to handle both uniform and varying
|
||||
types as well as "local" and "global" atomics.
|
||||
|
||||
Local atomics provide atomic behavior across the program instances in a
|
||||
gang, but not across multiple gangs or memory operations in different
|
||||
hardware threads. To see why they are needed, consider a histogram
|
||||
calculation where each program instance in the gang computes which bucket a
|
||||
value lies in and then increments a corresponding counter. If the code is
|
||||
written like this:
|
||||
|
||||
::
|
||||
|
||||
int32 atomic_add_global(uniform int32 * uniform ptr, int32 delta)
|
||||
uniform int count[N_BUCKETS] = ...;
|
||||
float value = ...;
|
||||
int bucket = clamp(value / N_BUCKETS, 0, N_BUCKETS);
|
||||
++count[bucket]; // ERROR: undefined behavior if collisions
|
||||
|
||||
The semantics are the expected ones for an atomic add function: the pointer
|
||||
points to a single location in memory (the same one for all program
|
||||
instances), and for each executing program instance, the value stored in
|
||||
the location that ``ptr`` points to has that program instance's value
|
||||
"delta" added to it atomically, and the old value at that location is
|
||||
returned from the function. (Thus, if multiple processors simultaneously
|
||||
issue atomic adds to the same memory location, the adds will be serialized
|
||||
by the hardware so that the correct result is computed in the end.
|
||||
Furthermore, the atomic adds are serialized across the running program
|
||||
instances.)
|
||||
then the program's behavior is undefined: whenever multiple program
|
||||
instances have values that map to the same value of ``bucket``, then the
|
||||
effect of the increment is undefined. (See the discussion in the `Data
|
||||
Races Within a Gang`_ section; in the case here, there isn't a sequence
|
||||
point between one program instance updating ``count[bucket]`` and the other
|
||||
program instance reading its value.)
|
||||
|
||||
The ``atomic_add_local()`` function can be used in this case; as a local
|
||||
atomic it is atomic across the gang of program instances, such that the
|
||||
expected result is computed.
|
||||
|
||||
::
|
||||
|
||||
...
|
||||
int bucket = clamp(value / N_BUCKETS, 0, N_BUCKETS);
|
||||
atomic_add_local(&count[bucket], 1);
|
||||
|
||||
It uses this variant of the 32-bit integer atomic add routine:
|
||||
|
||||
::
|
||||
|
||||
int32 atomic_add_local(uniform int32 * uniform ptr, int32 delta)
|
||||
|
||||
The semantics of this routine are typical for an atomic add function: the
|
||||
pointer here points to a single location in memory (the same one for all
|
||||
program instances), and for each executing program instance, the value
|
||||
stored in the location that ``ptr`` points to has that program instance's
|
||||
value "delta" added to it atomically, and the old value at that location is
|
||||
returned from the function.
|
||||
|
||||
One thing to note is that that the type of the value being added to a
|
||||
``uniform`` integer, while the increment amount and the return value are
|
||||
@@ -3295,45 +3446,76 @@ atomics for the running program instances may be issued in arbitrary order;
|
||||
it's not guaranteed that they will be issued in ``programIndex`` order, for
|
||||
example.
|
||||
|
||||
Here are the declarations of the ``int32`` variants of these functions.
|
||||
There are also ``int64`` equivalents as well as variants that take
|
||||
``unsigned`` ``int32`` and ``int64`` values. (The ``atomic_swap_global()``
|
||||
function can be used with ``float`` and ``double`` types as well.)
|
||||
Global atomics are more powerful than local atomics; they are atomic across
|
||||
both the program instances in the gang as well as atomic across different
|
||||
gangs and different hardware threads. For example, for the global variant
|
||||
of the atomic used above,
|
||||
|
||||
::
|
||||
|
||||
int32 atomic_add_global(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_subtract_global(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_min_global(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_max_global(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_and_global(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_or_global(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_xor_global(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_swap_global(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_add_global(uniform int32 * uniform ptr, int32 delta)
|
||||
|
||||
There are also variants of these functions that take ``uniform`` values for
|
||||
the operand and return a ``uniform`` result. These correspond to a single
|
||||
if multiple processors simultaneously issue atomic adds to the same memory
|
||||
location, the adds will be serialized by the hardware so that the correct
|
||||
result is computed in the end.
|
||||
|
||||
Here are the declarations of the ``int32`` variants of these functions.
|
||||
There are also ``int64`` equivalents as well as variants that take
|
||||
``unsigned`` ``int32`` and ``int64`` values.
|
||||
|
||||
::
|
||||
|
||||
int32 atomic_add_{local,global}(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_subtract_{local,global}(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_min_{local,global}(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_max_{local,global}(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_and_{local,global}(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_or_{local,global}(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_xor_{local,global}(uniform int32 * uniform ptr, int32 value)
|
||||
int32 atomic_swap_{local,global}(uniform int32 * uniform ptr, int32 value)
|
||||
|
||||
Support for ``float`` and ``double`` types is also available. For local
|
||||
atomics, all but the logical operations are available. (There are
|
||||
corresponding ``double`` variants of these, not listed here.)
|
||||
|
||||
::
|
||||
|
||||
float atomic_add_local(uniform float * uniform ptr, float value)
|
||||
float atomic_subtract_local(uniform float * uniform ptr, float value)
|
||||
float atomic_min_local(uniform float * uniform ptr, float value)
|
||||
float atomic_max_local(uniform float * uniform ptr, float value)
|
||||
float atomic_swap_local(uniform float * uniform ptr, float value)
|
||||
|
||||
For global atomics, only atomic swap is available for these types:
|
||||
|
||||
::
|
||||
|
||||
float atomic_swap_global(uniform float * uniform ptr, float value)
|
||||
double atomic_swap_global(uniform double * uniform ptr, double value)
|
||||
|
||||
There are also variants of the atomic that take ``uniform`` values for the
|
||||
operand and return a ``uniform`` result. These correspond to a single
|
||||
atomic operation being performed for the entire gang of program instances,
|
||||
rather than one per program instance.
|
||||
|
||||
::
|
||||
|
||||
uniform int32 atomic_add_global(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_subtract_global(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_min_global(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_max_global(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_and_global(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_or_global(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_xor_global(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_swap_global(uniform int32 * uniform ptr,
|
||||
uniform int32 newval)
|
||||
uniform int32 atomic_add_{local,global}(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_subtract_{local,global}(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_min_{local,global}(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_max_{local,global}(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_and_{local,global}(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_or_{local,global}(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_xor_{local,global}(uniform int32 * uniform ptr,
|
||||
uniform int32 value)
|
||||
uniform int32 atomic_swap_{local,global}(uniform int32 * uniform ptr,
|
||||
uniform int32 newval)
|
||||
|
||||
Be careful that you use the atomic function that you mean to; consider the
|
||||
following code:
|
||||
@@ -3357,8 +3539,7 @@ will cause the desired atomic add function to be called.
|
||||
::
|
||||
|
||||
extern uniform int32 counter;
|
||||
int32 one = 1;
|
||||
int32 myCounter = atomic_add_global(&counter, one);
|
||||
int32 myCounter = atomic_add_global(&counter, (varying int32)1);
|
||||
|
||||
There is a third variant of each of these atomic functions that takes a
|
||||
``varying`` pointer; this allows each program instance to issue an atomic
|
||||
@@ -3368,30 +3549,27 @@ the same location in memory!)
|
||||
|
||||
::
|
||||
|
||||
int32 atomic_add_global(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_subtract_global(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_min_global(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_max_global(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_and_global(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_or_global(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_xor_global(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_swap_global(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_add_{local,global}(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_subtract_{local,global}(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_min_{local,global}(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_max_{local,global}(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_and_{local,global}(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_or_{local,global}(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_xor_{local,global}(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_swap_{local,global}(uniform int32 * varying ptr, int32 value)
|
||||
|
||||
There are also atomic swap and "compare and exchange" functions.
|
||||
Compare and exchange atomically compares the value in "val" to
|
||||
"compare"--if they match, it assigns "newval" to "val". In either case,
|
||||
the old value of "val" is returned. (As with the other atomic operations,
|
||||
there are also ``unsigned`` and 64-bit variants of this function.
|
||||
Furthermore, there are ``float`` and ``double`` variants as well.)
|
||||
There are also atomic "compare and exchange" functions. Compare and
|
||||
exchange atomically compares the value in "val" to "compare"--if they
|
||||
match, it assigns "newval" to "val". In either case, the old value of
|
||||
"val" is returned. (As with the other atomic operations, there are also
|
||||
``unsigned`` and 64-bit variants of this function. Furthermore, there are
|
||||
``float`` and ``double`` variants as well.)
|
||||
|
||||
::
|
||||
|
||||
int32 atomic_swap_global(uniform int32 * uniform ptr, int32 newvalue)
|
||||
uniform int32 atomic_swap_global(uniform int32 * uniform ptr,
|
||||
uniform int32 newvalue)
|
||||
int32 atomic_compare_exchange_global(uniform int32 * uniform ptr,
|
||||
int32 compare, int32 newval)
|
||||
uniform int32 atomic_compare_exchange_global(uniform int32 * uniform ptr,
|
||||
int32 atomic_compare_exchange_{local,global}(uniform int32 * uniform ptr,
|
||||
int32 compare, int32 newval)
|
||||
uniform int32 atomic_compare_exchange_{local,global}(uniform int32 * uniform ptr,
|
||||
uniform int32 compare, uniform int32 newval)
|
||||
|
||||
``ispc`` also has a standard library routine that inserts a memory barrier
|
||||
|
||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 1.1.3
|
||||
PROJECT_NUMBER = 1.1.4
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
||||
@@ -212,104 +212,44 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
|
||||
RNGState rngstate;
|
||||
|
||||
seed_rng(&rngstate, y0);
|
||||
float invSamples = 1.f / nsubsamples;
|
||||
|
||||
// Compute the mapping between the 'programCount'-wide program
|
||||
// instances running in parallel and samples in the image.
|
||||
//
|
||||
// For now, we'll always take four samples per pixel, so start by
|
||||
// initializing du and dv with offsets into subpixel samples. We'll
|
||||
// take care of further updating du and dv for the case where we're
|
||||
// doing more than 4 program instances in parallel shortly.
|
||||
uniform float uSteps[4] = { 0, 1, 0, 1 };
|
||||
uniform float vSteps[4] = { 0, 0, 1, 1 };
|
||||
float du = uSteps[programIndex % 4] / nsubsamples;
|
||||
float dv = vSteps[programIndex % 4] / nsubsamples;
|
||||
foreach_tiled(y = y0 ... y1, x = 0 ... w,
|
||||
u = 0 ... nsubsamples, v = 0 ... nsubsamples) {
|
||||
float du = (float)u * invSamples, dv = (float)v * invSamples;
|
||||
|
||||
// Now handle the case where we are able to do more than one pixel's
|
||||
// worth of work at once. nx records the number of pixels in the x
|
||||
// direction we do per iteration and ny the number in y.
|
||||
uniform int nx = 1, ny = 1;
|
||||
// Figure out x,y pixel in NDC
|
||||
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
|
||||
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
|
||||
float ret = 0.f;
|
||||
Ray ray;
|
||||
Isect isect;
|
||||
|
||||
// FIXME: We actually need ny to be 1 regardless of the decomposition,
|
||||
// since the task decomposition is one scanline high.
|
||||
ray.org = 0.f;
|
||||
|
||||
if (programCount == 8) {
|
||||
// Do two pixels at once in the x direction
|
||||
nx = 2;
|
||||
if (programIndex >= 4)
|
||||
// And shift the offsets for the second pixel's worth of work
|
||||
++du;
|
||||
}
|
||||
else if (programCount == 16) {
|
||||
nx = 4;
|
||||
ny = 1;
|
||||
if (programIndex >= 4 && programIndex < 8)
|
||||
++du;
|
||||
if (programIndex >= 8 && programIndex < 12)
|
||||
du += 2;
|
||||
if (programIndex >= 12)
|
||||
du += 3;
|
||||
}
|
||||
// Poor man's perspective projection
|
||||
ray.dir.x = px;
|
||||
ray.dir.y = py;
|
||||
ray.dir.z = -1.0;
|
||||
vnormalize(ray.dir);
|
||||
|
||||
// Now loop over all of the pixels, stepping in x and y as calculated
|
||||
// above. (Assumes that ny divides y and nx divides x...)
|
||||
for (uniform int y = y0; y < y1; y += ny) {
|
||||
for (uniform int x = 0; x < w; x += nx) {
|
||||
// Figure out x,y pixel in NDC
|
||||
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
|
||||
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
|
||||
float ret = 0.f;
|
||||
Ray ray;
|
||||
Isect isect;
|
||||
isect.t = 1.0e+17;
|
||||
isect.hit = 0;
|
||||
|
||||
ray.org = 0.f;
|
||||
for (uniform int snum = 0; snum < 3; ++snum)
|
||||
ray_sphere_intersect(isect, ray, spheres[snum]);
|
||||
ray_plane_intersect(isect, ray, plane);
|
||||
|
||||
// Poor man's perspective projection
|
||||
ray.dir.x = px;
|
||||
ray.dir.y = py;
|
||||
ray.dir.z = -1.0;
|
||||
vnormalize(ray.dir);
|
||||
// Note use of 'coherent' if statement; the set of rays we
|
||||
// trace will often all hit or all miss the scene
|
||||
cif (isect.hit) {
|
||||
ret = ambient_occlusion(isect, plane, spheres, rngstate);
|
||||
ret *= invSamples * invSamples;
|
||||
|
||||
isect.t = 1.0e+17;
|
||||
isect.hit = 0;
|
||||
|
||||
for (uniform int snum = 0; snum < 3; ++snum)
|
||||
ray_sphere_intersect(isect, ray, spheres[snum]);
|
||||
ray_plane_intersect(isect, ray, plane);
|
||||
|
||||
// Note use of 'coherent' if statement; the set of rays we
|
||||
// trace will often all hit or all miss the scene
|
||||
cif (isect.hit)
|
||||
ret = ambient_occlusion(isect, plane, spheres, rngstate);
|
||||
|
||||
// This is a little grungy; we have results for
|
||||
// programCount-worth of values. Because we're doing 2x2
|
||||
// subsamples, we need to peel them off in groups of four,
|
||||
// average the four values for each pixel, and update the
|
||||
// output image.
|
||||
//
|
||||
// Store the varying value to a uniform array of the same size.
|
||||
// See the discussion about communication among program
|
||||
// instances in the ispc user's manual for more discussion on
|
||||
// this idiom.
|
||||
uniform float retArray[programCount];
|
||||
retArray[programIndex] = ret;
|
||||
|
||||
// offset to the first pixel in the image
|
||||
uniform int offset = 3 * (y * w + x);
|
||||
for (uniform int p = 0; p < programCount; p += 4, offset += 3) {
|
||||
// Get the four sample values for this pixel
|
||||
uniform float sumret = retArray[p] + retArray[p+1] + retArray[p+2] +
|
||||
retArray[p+3];
|
||||
|
||||
// Normalize by number of samples taken
|
||||
sumret /= nsubsamples * nsubsamples;
|
||||
|
||||
// Store result in the image
|
||||
image[offset+0] = sumret;
|
||||
image[offset+1] = sumret;
|
||||
image[offset+2] = sumret;
|
||||
}
|
||||
int offset = 3 * (y * w + x);
|
||||
atomic_add_local(&image[offset], ret);
|
||||
atomic_add_local(&image[offset+1], ret);
|
||||
atomic_add_local(&image[offset+2], ret);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -14,7 +14,7 @@ CPP_OBJS=$(addprefix objs/, $(CPP_SRC:.cpp=.o) $(TASK_OBJ))
|
||||
|
||||
default: $(EXAMPLE)
|
||||
|
||||
all: $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16
|
||||
all: $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16 $(EXAMPLE)-scalar
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
@@ -57,3 +57,9 @@ objs/$(ISPC_SRC:.ispc=)_generic16.o: objs/$(ISPC_SRC:.ispc=)_generic16.cpp
|
||||
|
||||
$(EXAMPLE)-generic16: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_generic16.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
objs/$(ISPC_SRC:.ispc=)_scalar.o: $(ISPC_SRC)
|
||||
$(ISPC) $< -o $@ --target=generic-1
|
||||
|
||||
$(EXAMPLE)-scalar: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_scalar.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
|
||||
|
||||
@@ -158,38 +158,22 @@ IntersectLightsWithTileMinMax(
|
||||
uniform float gBufferScale_x = 0.5f * (float)gBufferWidth;
|
||||
uniform float gBufferScale_y = 0.5f * (float)gBufferHeight;
|
||||
|
||||
// Parallize across frustum planes.
|
||||
// We really only have four side planes here, but write the code to
|
||||
// handle programCount > 4 robustly
|
||||
uniform float frustumPlanes_xy[programCount];
|
||||
uniform float frustumPlanes_z[programCount];
|
||||
uniform float frustumPlanes_xy[4] = {
|
||||
-(cameraProj_11 * gBufferScale_x),
|
||||
(cameraProj_11 * gBufferScale_x),
|
||||
(cameraProj_22 * gBufferScale_y),
|
||||
-(cameraProj_22 * gBufferScale_y) };
|
||||
uniform float frustumPlanes_z[4] = {
|
||||
tileEndX - gBufferScale_x,
|
||||
-tileStartX + gBufferScale_x,
|
||||
tileEndY - gBufferScale_y,
|
||||
-tileStartY + gBufferScale_y };
|
||||
|
||||
// TODO: If programIndex < 4 here? Don't care about masking off the
|
||||
// rest but if interleaving ("x2" modes) the other lanes should ideally
|
||||
// not be emitted...
|
||||
{
|
||||
// This one is totally constant over the whole screen... worth pulling it up at all?
|
||||
float frustumPlanes_xy_v;
|
||||
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 0, -(cameraProj_11 * gBufferScale_x));
|
||||
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 1, (cameraProj_11 * gBufferScale_x));
|
||||
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 2, (cameraProj_22 * gBufferScale_y));
|
||||
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 3, -(cameraProj_22 * gBufferScale_y));
|
||||
|
||||
float frustumPlanes_z_v;
|
||||
frustumPlanes_z_v = insert(frustumPlanes_z_v, 0, tileEndX - gBufferScale_x);
|
||||
frustumPlanes_z_v = insert(frustumPlanes_z_v, 1, -tileStartX + gBufferScale_x);
|
||||
frustumPlanes_z_v = insert(frustumPlanes_z_v, 2, tileEndY - gBufferScale_y);
|
||||
frustumPlanes_z_v = insert(frustumPlanes_z_v, 3, -tileStartY + gBufferScale_y);
|
||||
|
||||
// Normalize
|
||||
float norm = rsqrt(frustumPlanes_xy_v * frustumPlanes_xy_v +
|
||||
frustumPlanes_z_v * frustumPlanes_z_v);
|
||||
frustumPlanes_xy_v *= norm;
|
||||
frustumPlanes_z_v *= norm;
|
||||
|
||||
// Save out for uniform use later
|
||||
frustumPlanes_xy[programIndex] = frustumPlanes_xy_v;
|
||||
frustumPlanes_z[programIndex] = frustumPlanes_z_v;
|
||||
for (uniform int i = 0; i < 4; ++i) {
|
||||
uniform float norm = rsqrt(frustumPlanes_xy[i] * frustumPlanes_xy[i] +
|
||||
frustumPlanes_z[i] * frustumPlanes_z[i]);
|
||||
frustumPlanes_xy[i] *= norm;
|
||||
frustumPlanes_z[i] *= norm;
|
||||
}
|
||||
|
||||
uniform int32 tileNumLights = 0;
|
||||
@@ -601,30 +585,20 @@ SplitTileMinMax(
|
||||
uniform float gBufferScale_x = 0.5f * (float)gBufferWidth;
|
||||
uniform float gBufferScale_y = 0.5f * (float)gBufferHeight;
|
||||
|
||||
// Parallize across frustum planes
|
||||
// Only have 2 frustum split planes here so may not be worth it, but
|
||||
// we'll do it for now for consistency
|
||||
uniform float frustumPlanes_xy[programCount];
|
||||
uniform float frustumPlanes_z[programCount];
|
||||
|
||||
// This one is totally constant over the whole screen... worth pulling it up at all?
|
||||
float frustumPlanes_xy_v;
|
||||
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 0, -(cameraProj_11 * gBufferScale_x));
|
||||
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 1, (cameraProj_22 * gBufferScale_y));
|
||||
|
||||
float frustumPlanes_z_v;
|
||||
frustumPlanes_z_v = insert(frustumPlanes_z_v, 0, tileMidX - gBufferScale_x);
|
||||
frustumPlanes_z_v = insert(frustumPlanes_z_v, 1, tileMidY - gBufferScale_y);
|
||||
uniform float frustumPlanes_xy[2] = { -(cameraProj_11 * gBufferScale_x),
|
||||
(cameraProj_22 * gBufferScale_y) };
|
||||
uniform float frustumPlanes_z[2] = { tileMidX - gBufferScale_x,
|
||||
tileMidY - gBufferScale_y };
|
||||
|
||||
// Normalize
|
||||
float norm = rsqrt(frustumPlanes_xy_v * frustumPlanes_xy_v +
|
||||
frustumPlanes_z_v * frustumPlanes_z_v);
|
||||
frustumPlanes_xy_v *= norm;
|
||||
frustumPlanes_z_v *= norm;
|
||||
|
||||
// Save out for uniform use later
|
||||
frustumPlanes_xy[programIndex] = frustumPlanes_xy_v;
|
||||
frustumPlanes_z[programIndex] = frustumPlanes_z_v;
|
||||
uniform float norm[2] = { rsqrt(frustumPlanes_xy[0] * frustumPlanes_xy[0] +
|
||||
frustumPlanes_z[0] * frustumPlanes_z[0]),
|
||||
rsqrt(frustumPlanes_xy[1] * frustumPlanes_xy[1] +
|
||||
frustumPlanes_z[1] * frustumPlanes_z[1]) };
|
||||
frustumPlanes_xy[0] *= norm[0];
|
||||
frustumPlanes_xy[1] *= norm[1];
|
||||
frustumPlanes_z[0] *= norm[0];
|
||||
frustumPlanes_z[1] *= norm[1];
|
||||
|
||||
// Initialize
|
||||
uniform int32 subtileLightOffset[4];
|
||||
|
||||
@@ -1106,7 +1106,7 @@ GATHER_GENERAL(__vec16_i64, int64_t, __vec16_i64, __gather64_i64)
|
||||
|
||||
// scatter
|
||||
|
||||
#define SCATTER_BASE_VARYINGOFFSET(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
#define SCATTER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
|
||||
static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
|
||||
uint32_t scale, OTYPE constOffset, \
|
||||
VTYPE val, __vec16_i1 mask) { \
|
||||
|
||||
@@ -941,10 +941,8 @@ static FORCEINLINE __vec4_i1 __not_equal(__vec4_i32 a, __vec4_i32 b) {
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i1 __unsigned_less_equal(__vec4_i32 a, __vec4_i32 b) {
|
||||
a.v = _mm_xor_si128(a.v, _mm_set1_epi32(0x80000000));
|
||||
b.v = _mm_xor_si128(b.v, _mm_set1_epi32(0x80000000));
|
||||
return _mm_or_si128(_mm_cmplt_epi32(a.v, b.v),
|
||||
_mm_cmpeq_epi32(a.v, b.v));
|
||||
// a<=b == (min(a,b) == a)
|
||||
return _mm_cmpeq_epi32(_mm_min_epu32(a.v, b.v), a.v);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i1 __signed_less_equal(__vec4_i32 a, __vec4_i32 b) {
|
||||
@@ -953,10 +951,8 @@ static FORCEINLINE __vec4_i1 __signed_less_equal(__vec4_i32 a, __vec4_i32 b) {
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i1 __unsigned_greater_equal(__vec4_i32 a, __vec4_i32 b) {
|
||||
a.v = _mm_xor_si128(a.v, _mm_set1_epi32(0x80000000));
|
||||
b.v = _mm_xor_si128(b.v, _mm_set1_epi32(0x80000000));
|
||||
return _mm_or_si128(_mm_cmpgt_epi32(a.v, b.v),
|
||||
_mm_cmpeq_epi32(a.v, b.v));
|
||||
// a>=b == (max(a,b) == a)
|
||||
return _mm_cmpeq_epi32(_mm_max_epu32(a.v, b.v), a.v);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i1 __signed_greater_equal(__vec4_i32 a, __vec4_i32 b) {
|
||||
|
||||
@@ -273,7 +273,7 @@ lAtomicCompareAndSwapPointer(void **v, void *newValue, void *oldValue) {
|
||||
#else
|
||||
void *result;
|
||||
#if (ISPC_POINTER_BYTES == 4)
|
||||
__asm__ __volatile__("lock\ncmpxchgd %2,%1"
|
||||
__asm__ __volatile__("lock\ncmpxchgl %2,%1"
|
||||
: "=a"(result), "=m"(*v)
|
||||
: "q"(newValue), "0"(oldValue)
|
||||
: "memory");
|
||||
|
||||
53
expr.h
53
expr.h
@@ -388,6 +388,10 @@ public:
|
||||
with values given by the "vales" parameter. */
|
||||
ConstExpr(ConstExpr *old, double *values);
|
||||
|
||||
/** Create ConstExpr with the same type and values as the given one,
|
||||
but at the given position. */
|
||||
ConstExpr(ConstExpr *old, SourcePos pos);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
void Print() const;
|
||||
@@ -680,11 +684,44 @@ public:
|
||||
const Type *GetType() const;
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
llvm::Constant *GetConstant(const Type *type) const;
|
||||
void Print() const;
|
||||
int EstimateCost() const;
|
||||
};
|
||||
|
||||
|
||||
/** An expression representing a "new" expression, used for dynamically
|
||||
allocating memory.
|
||||
*/
|
||||
class NewExpr : public Expr {
|
||||
public:
|
||||
NewExpr(int typeQual, const Type *type, Expr *initializer, Expr *count,
|
||||
SourcePos tqPos, SourcePos p);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
void Print() const;
|
||||
int EstimateCost() const;
|
||||
|
||||
/** Type of object to allocate storage for. */
|
||||
const Type *allocType;
|
||||
/** Expression giving the number of elements to allocate, when the
|
||||
"new Foo[expr]" form is used. This may be NULL, in which case a
|
||||
single element of the given type will be allocated. */
|
||||
Expr *countExpr;
|
||||
/** Optional initializer expression used to initialize the allocated
|
||||
memory. */
|
||||
Expr *initExpr;
|
||||
/** Indicates whether this is a "varying new" or "uniform new"
|
||||
(i.e. whether a separate allocation is performed per program
|
||||
instance, or whether a single allocation is performed for the
|
||||
entire gang of program instances.) */
|
||||
bool isVarying;
|
||||
};
|
||||
|
||||
|
||||
/** This function indicates whether it's legal to convert from fromType to
|
||||
toType. If the optional errorMsgBase and source position parameters
|
||||
are provided, then an error message is issued if the type conversion
|
||||
@@ -703,4 +740,20 @@ bool CanConvertTypes(const Type *fromType, const Type *toType,
|
||||
*/
|
||||
Expr *TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase);
|
||||
|
||||
/** Utility routine that emits code to initialize a symbol given an
|
||||
initializer expression.
|
||||
|
||||
@param lvalue Memory location of storage for the symbol's data
|
||||
@param symName Name of symbol (used in error messages)
|
||||
@param symType Type of variable being initialized
|
||||
@param initExpr Expression for the initializer
|
||||
@param ctx FunctionEmitContext to use for generating instructions
|
||||
@param pos Source file position of the variable being initialized
|
||||
*/
|
||||
void
|
||||
InitSymbol(llvm::Value *lvalue, const Type *symType, Expr *initExpr,
|
||||
FunctionEmitContext *ctx, SourcePos pos);
|
||||
|
||||
bool PossiblyResolveFunctionOverloads(Expr *expr, const Type *type);
|
||||
|
||||
#endif // ISPC_EXPR_H
|
||||
|
||||
15
ispc.cpp
15
ispc.cpp
@@ -185,6 +185,14 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->allOffMaskIsSafe = true;
|
||||
t->maskBitCount = 1;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-1")) {
|
||||
t->isa = Target::GENERIC;
|
||||
t->nativeVectorWidth = 1;
|
||||
t->vectorWidth = 1;
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
else if (!strcasecmp(isa, "avx")) {
|
||||
t->isa = Target::AVX;
|
||||
@@ -270,7 +278,7 @@ Target::SupportedTargetISAs() {
|
||||
#ifdef LLVM_3_1svn
|
||||
", avx2, avx2-x2"
|
||||
#endif // LLVM_3_1svn
|
||||
", generic-4, generic-8, generic-16";
|
||||
", generic-4, generic-8, generic-16, generic-1";
|
||||
}
|
||||
|
||||
|
||||
@@ -502,12 +510,15 @@ Globals::Globals() {
|
||||
debugPrint = false;
|
||||
disableWarnings = false;
|
||||
warningsAsErrors = false;
|
||||
quiet = false;
|
||||
disableLineWrap = false;
|
||||
emitPerfWarnings = true;
|
||||
emitInstrumentation = false;
|
||||
generateDebuggingSymbols = false;
|
||||
enableFuzzTest = false;
|
||||
fuzzTestSeed = -1;
|
||||
mangleFunctionsWithTarget = false;
|
||||
|
||||
|
||||
ctx = new llvm::LLVMContext;
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
|
||||
17
ispc.h
17
ispc.h
@@ -388,6 +388,9 @@ struct Globals {
|
||||
possible performance pitfalls. */
|
||||
bool emitPerfWarnings;
|
||||
|
||||
/** Indicates whether all printed output should be surpressed. */
|
||||
bool quiet;
|
||||
|
||||
/** Indicates whether calls should be emitted in the program to an
|
||||
externally-defined program instrumentation function. (See the
|
||||
"Instrumenting your ispc programs" section in the user's
|
||||
@@ -402,6 +405,14 @@ struct Globals {
|
||||
vector width to them. */
|
||||
bool mangleFunctionsWithTarget;
|
||||
|
||||
/** If enabled, the lexer will randomly replace some tokens returned
|
||||
with other tokens, in order to test error condition handling in the
|
||||
compiler. */
|
||||
bool enableFuzzTest;
|
||||
|
||||
/** Seed for random number generator used for fuzz testing. */
|
||||
int fuzzTestSeed;
|
||||
|
||||
/** Global LLVMContext object */
|
||||
llvm::LLVMContext *ctx;
|
||||
|
||||
@@ -412,12 +423,17 @@ struct Globals {
|
||||
/** Arguments to pass along to the C pre-processor, if it is run on the
|
||||
program before compilation. */
|
||||
std::vector<std::string> cppArgs;
|
||||
|
||||
/** Additional user-provided directories to search when processing
|
||||
#include directives in the preprocessor. */
|
||||
std::vector<std::string> includePath;
|
||||
};
|
||||
|
||||
enum {
|
||||
COST_ASSIGN = 1,
|
||||
COST_COHERENT_BREAK_CONTINE = 4,
|
||||
COST_COMPLEX_ARITH_OP = 4,
|
||||
COST_DELETE = 32,
|
||||
COST_DEREF = 4,
|
||||
COST_FUNCALL = 4,
|
||||
COST_FUNPTR_UNIFORM = 12,
|
||||
@@ -425,6 +441,7 @@ enum {
|
||||
COST_GATHER = 8,
|
||||
COST_GOTO = 4,
|
||||
COST_LOAD = 2,
|
||||
COST_NEW = 32,
|
||||
COST_REGULAR_BREAK_CONTINUE = 2,
|
||||
COST_RETURN = 4,
|
||||
COST_SELECT = 4,
|
||||
|
||||
14
ispc.vcxproj
14
ispc.vcxproj
@@ -25,6 +25,7 @@
|
||||
<ClCompile Include="gen-bitcode-c-32.cpp" />
|
||||
<ClCompile Include="gen-bitcode-c-64.cpp" />
|
||||
<ClCompile Include="gen-bitcode-dispatch.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-1.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-4.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-8.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-16.cpp" />
|
||||
@@ -211,6 +212,19 @@
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx2-x2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-generic-1.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll > gen-bitcode-generic-1.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-1.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll > gen-bitcode-generic-1.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-1.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-1.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-1.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-generic-4.ll">
|
||||
<FileType>Document</FileType>
|
||||
|
||||
514
lex.ll
514
lex.ll
@@ -50,20 +50,275 @@ static void lStringConst(YYSTYPE *, SourcePos *);
|
||||
static double lParseHexFloat(const char *ptr);
|
||||
|
||||
#define YY_USER_ACTION \
|
||||
yylloc->first_line = yylloc->last_line; \
|
||||
yylloc->first_column = yylloc->last_column; \
|
||||
yylloc->last_column += yyleng;
|
||||
yylloc.first_line = yylloc.last_line; \
|
||||
yylloc.first_column = yylloc.last_column; \
|
||||
yylloc.last_column += yyleng;
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
inline int isatty(int) { return 0; }
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
|
||||
static int allTokens[] = {
|
||||
TOKEN_ASSERT, TOKEN_BOOL, TOKEN_BREAK, TOKEN_CASE, TOKEN_CBREAK,
|
||||
TOKEN_CCONTINUE, TOKEN_CDO, TOKEN_CFOR, TOKEN_CIF, TOKEN_CWHILE,
|
||||
TOKEN_CONST, TOKEN_CONTINUE, TOKEN_CRETURN, TOKEN_DEFAULT, TOKEN_DO,
|
||||
TOKEN_DELETE, TOKEN_DOUBLE, TOKEN_ELSE, TOKEN_ENUM,
|
||||
TOKEN_EXPORT, TOKEN_EXTERN, TOKEN_FALSE, TOKEN_FLOAT, TOKEN_FOR,
|
||||
TOKEN_FOREACH, TOKEN_FOREACH_TILED, TOKEN_GOTO, TOKEN_IF, TOKEN_INLINE,
|
||||
TOKEN_INT, TOKEN_INT8, TOKEN_INT16, TOKEN_INT, TOKEN_INT64, TOKEN_LAUNCH,
|
||||
TOKEN_NEW, TOKEN_NULL, TOKEN_PRINT, TOKEN_RETURN, TOKEN_SOA, TOKEN_SIGNED,
|
||||
TOKEN_SIZEOF, TOKEN_STATIC, TOKEN_STRUCT, TOKEN_SWITCH, TOKEN_SYNC,
|
||||
TOKEN_TASK, TOKEN_TRUE, TOKEN_TYPEDEF, TOKEN_UNIFORM, TOKEN_UNSIGNED,
|
||||
TOKEN_VARYING, TOKEN_VOID, TOKEN_WHILE, TOKEN_STRING_C_LITERAL,
|
||||
TOKEN_DOTDOTDOT,
|
||||
TOKEN_FLOAT_CONSTANT,
|
||||
TOKEN_INT32_CONSTANT, TOKEN_UINT32_CONSTANT,
|
||||
TOKEN_INT64_CONSTANT, TOKEN_UINT64_CONSTANT,
|
||||
TOKEN_INC_OP, TOKEN_DEC_OP, TOKEN_LEFT_OP, TOKEN_RIGHT_OP, TOKEN_LE_OP,
|
||||
TOKEN_GE_OP, TOKEN_EQ_OP, TOKEN_NE_OP, TOKEN_AND_OP, TOKEN_OR_OP,
|
||||
TOKEN_MUL_ASSIGN, TOKEN_DIV_ASSIGN, TOKEN_MOD_ASSIGN, TOKEN_ADD_ASSIGN,
|
||||
TOKEN_SUB_ASSIGN, TOKEN_LEFT_ASSIGN, TOKEN_RIGHT_ASSIGN, TOKEN_AND_ASSIGN,
|
||||
TOKEN_XOR_ASSIGN, TOKEN_OR_ASSIGN, TOKEN_PTR_OP,
|
||||
';', '{', '}', ',', ':', '=', '(', ')', '[', ']', '.', '&', '!', '~', '-',
|
||||
'+', '*', '/', '%', '<', '>', '^', '|', '?',
|
||||
};
|
||||
|
||||
std::map<int, std::string> tokenToName;
|
||||
std::map<std::string, std::string> tokenNameRemap;
|
||||
|
||||
void ParserInit() {
|
||||
tokenToName[TOKEN_ASSERT] = "assert";
|
||||
tokenToName[TOKEN_BOOL] = "bool";
|
||||
tokenToName[TOKEN_BREAK] = "break";
|
||||
tokenToName[TOKEN_CASE] = "case";
|
||||
tokenToName[TOKEN_CBREAK] = "cbreak";
|
||||
tokenToName[TOKEN_CCONTINUE] = "ccontinue";
|
||||
tokenToName[TOKEN_CDO] = "cdo";
|
||||
tokenToName[TOKEN_CFOR] = "cfor";
|
||||
tokenToName[TOKEN_CIF] = "cif";
|
||||
tokenToName[TOKEN_CWHILE] = "cwhile";
|
||||
tokenToName[TOKEN_CONST] = "const";
|
||||
tokenToName[TOKEN_CONTINUE] = "continue";
|
||||
tokenToName[TOKEN_CRETURN] = "creturn";
|
||||
tokenToName[TOKEN_DEFAULT] = "default";
|
||||
tokenToName[TOKEN_DO] = "do";
|
||||
tokenToName[TOKEN_DELETE] = "delete";
|
||||
tokenToName[TOKEN_DOUBLE] = "double";
|
||||
tokenToName[TOKEN_ELSE] = "else";
|
||||
tokenToName[TOKEN_ENUM] = "enum";
|
||||
tokenToName[TOKEN_EXPORT] = "export";
|
||||
tokenToName[TOKEN_EXTERN] = "extern";
|
||||
tokenToName[TOKEN_FALSE] = "false";
|
||||
tokenToName[TOKEN_FLOAT] = "float";
|
||||
tokenToName[TOKEN_FOR] = "for";
|
||||
tokenToName[TOKEN_FOREACH] = "foreach";
|
||||
tokenToName[TOKEN_FOREACH_TILED] = "foreach_tiled";
|
||||
tokenToName[TOKEN_GOTO] = "goto";
|
||||
tokenToName[TOKEN_IF] = "if";
|
||||
tokenToName[TOKEN_INLINE] = "inline";
|
||||
tokenToName[TOKEN_INT] = "int";
|
||||
tokenToName[TOKEN_INT8] = "int8";
|
||||
tokenToName[TOKEN_INT16] = "int16";
|
||||
tokenToName[TOKEN_INT] = "int";
|
||||
tokenToName[TOKEN_INT64] = "int64";
|
||||
tokenToName[TOKEN_LAUNCH] = "launch";
|
||||
tokenToName[TOKEN_NEW] = "new";
|
||||
tokenToName[TOKEN_NULL] = "NULL";
|
||||
tokenToName[TOKEN_PRINT] = "print";
|
||||
tokenToName[TOKEN_RETURN] = "return";
|
||||
tokenToName[TOKEN_SOA] = "soa";
|
||||
tokenToName[TOKEN_SIGNED] = "signed";
|
||||
tokenToName[TOKEN_SIZEOF] = "sizeof";
|
||||
tokenToName[TOKEN_STATIC] = "static";
|
||||
tokenToName[TOKEN_STRUCT] = "struct";
|
||||
tokenToName[TOKEN_SWITCH] = "switch";
|
||||
tokenToName[TOKEN_SYNC] = "sync";
|
||||
tokenToName[TOKEN_TASK] = "task";
|
||||
tokenToName[TOKEN_TRUE] = "true";
|
||||
tokenToName[TOKEN_TYPEDEF] = "typedef";
|
||||
tokenToName[TOKEN_UNIFORM] = "uniform";
|
||||
tokenToName[TOKEN_UNSIGNED] = "unsigned";
|
||||
tokenToName[TOKEN_VARYING] = "varying";
|
||||
tokenToName[TOKEN_VOID] = "void";
|
||||
tokenToName[TOKEN_WHILE] = "while";
|
||||
tokenToName[TOKEN_STRING_C_LITERAL] = "\"C\"";
|
||||
tokenToName[TOKEN_DOTDOTDOT] = "...";
|
||||
tokenToName[TOKEN_FLOAT_CONSTANT] = "TOKEN_FLOAT_CONSTANT";
|
||||
tokenToName[TOKEN_INT32_CONSTANT] = "TOKEN_INT32_CONSTANT";
|
||||
tokenToName[TOKEN_UINT32_CONSTANT] = "TOKEN_UINT32_CONSTANT";
|
||||
tokenToName[TOKEN_INT64_CONSTANT] = "TOKEN_INT64_CONSTANT";
|
||||
tokenToName[TOKEN_UINT64_CONSTANT] = "TOKEN_UINT64_CONSTANT";
|
||||
tokenToName[TOKEN_INC_OP] = "++";
|
||||
tokenToName[TOKEN_DEC_OP] = "--";
|
||||
tokenToName[TOKEN_LEFT_OP] = "<<";
|
||||
tokenToName[TOKEN_RIGHT_OP] = ">>";
|
||||
tokenToName[TOKEN_LE_OP] = "<=";
|
||||
tokenToName[TOKEN_GE_OP] = ">=";
|
||||
tokenToName[TOKEN_EQ_OP] = "==";
|
||||
tokenToName[TOKEN_NE_OP] = "!=";
|
||||
tokenToName[TOKEN_AND_OP] = "&&";
|
||||
tokenToName[TOKEN_OR_OP] = "||";
|
||||
tokenToName[TOKEN_MUL_ASSIGN] = "*=";
|
||||
tokenToName[TOKEN_DIV_ASSIGN] = "/=";
|
||||
tokenToName[TOKEN_MOD_ASSIGN] = "%=";
|
||||
tokenToName[TOKEN_ADD_ASSIGN] = "+=";
|
||||
tokenToName[TOKEN_SUB_ASSIGN] = "-=";
|
||||
tokenToName[TOKEN_LEFT_ASSIGN] = "<<=";
|
||||
tokenToName[TOKEN_RIGHT_ASSIGN] = ">>=";
|
||||
tokenToName[TOKEN_AND_ASSIGN] = "&=";
|
||||
tokenToName[TOKEN_XOR_ASSIGN] = "^=";
|
||||
tokenToName[TOKEN_OR_ASSIGN] = "|=";
|
||||
tokenToName[TOKEN_PTR_OP] = "->";
|
||||
tokenToName[';'] = ";";
|
||||
tokenToName['{'] = "{";
|
||||
tokenToName['}'] = "}";
|
||||
tokenToName[','] = ",";
|
||||
tokenToName[':'] = ":";
|
||||
tokenToName['='] = "=";
|
||||
tokenToName['('] = "(";
|
||||
tokenToName[')'] = ")";
|
||||
tokenToName['['] = "[";
|
||||
tokenToName[']'] = "]";
|
||||
tokenToName['.'] = ".";
|
||||
tokenToName['&'] = "&";
|
||||
tokenToName['!'] = "!";
|
||||
tokenToName['~'] = "~";
|
||||
tokenToName['-'] = "-";
|
||||
tokenToName['+'] = "+";
|
||||
tokenToName['*'] = "*";
|
||||
tokenToName['/'] = "/";
|
||||
tokenToName['%'] = "%";
|
||||
tokenToName['<'] = "<";
|
||||
tokenToName['>'] = ">";
|
||||
tokenToName['^'] = "^";
|
||||
tokenToName['|'] = "|";
|
||||
tokenToName['?'] = "?";
|
||||
tokenToName[';'] = ";";
|
||||
|
||||
tokenNameRemap["TOKEN_ASSERT"] = "\'assert\'";
|
||||
tokenNameRemap["TOKEN_BOOL"] = "\'bool\'";
|
||||
tokenNameRemap["TOKEN_BREAK"] = "\'break\'";
|
||||
tokenNameRemap["TOKEN_CASE"] = "\'case\'";
|
||||
tokenNameRemap["TOKEN_CBREAK"] = "\'cbreak\'";
|
||||
tokenNameRemap["TOKEN_CCONTINUE"] = "\'ccontinue\'";
|
||||
tokenNameRemap["TOKEN_CDO"] = "\'cdo\'";
|
||||
tokenNameRemap["TOKEN_CFOR"] = "\'cfor\'";
|
||||
tokenNameRemap["TOKEN_CIF"] = "\'cif\'";
|
||||
tokenNameRemap["TOKEN_CWHILE"] = "\'cwhile\'";
|
||||
tokenNameRemap["TOKEN_CONST"] = "\'const\'";
|
||||
tokenNameRemap["TOKEN_CONTINUE"] = "\'continue\'";
|
||||
tokenNameRemap["TOKEN_CRETURN"] = "\'creturn\'";
|
||||
tokenNameRemap["TOKEN_DEFAULT"] = "\'default\'";
|
||||
tokenNameRemap["TOKEN_DO"] = "\'do\'";
|
||||
tokenNameRemap["TOKEN_DELETE"] = "\'delete\'";
|
||||
tokenNameRemap["TOKEN_DOUBLE"] = "\'double\'";
|
||||
tokenNameRemap["TOKEN_ELSE"] = "\'else\'";
|
||||
tokenNameRemap["TOKEN_ENUM"] = "\'enum\'";
|
||||
tokenNameRemap["TOKEN_EXPORT"] = "\'export\'";
|
||||
tokenNameRemap["TOKEN_EXTERN"] = "\'extern\'";
|
||||
tokenNameRemap["TOKEN_FALSE"] = "\'false\'";
|
||||
tokenNameRemap["TOKEN_FLOAT"] = "\'float\'";
|
||||
tokenNameRemap["TOKEN_FOR"] = "\'for\'";
|
||||
tokenNameRemap["TOKEN_FOREACH"] = "\'foreach\'";
|
||||
tokenNameRemap["TOKEN_FOREACH_TILED"] = "\'foreach_tiled\'";
|
||||
tokenNameRemap["TOKEN_GOTO"] = "\'goto\'";
|
||||
tokenNameRemap["TOKEN_IDENTIFIER"] = "identifier";
|
||||
tokenNameRemap["TOKEN_IF"] = "\'if\'";
|
||||
tokenNameRemap["TOKEN_INLINE"] = "\'inline\'";
|
||||
tokenNameRemap["TOKEN_INT"] = "\'int\'";
|
||||
tokenNameRemap["TOKEN_INT8"] = "\'int8\'";
|
||||
tokenNameRemap["TOKEN_INT16"] = "\'int16\'";
|
||||
tokenNameRemap["TOKEN_INT"] = "\'int\'";
|
||||
tokenNameRemap["TOKEN_INT64"] = "\'int64\'";
|
||||
tokenNameRemap["TOKEN_LAUNCH"] = "\'launch\'";
|
||||
tokenNameRemap["TOKEN_NEW"] = "\'new\'";
|
||||
tokenNameRemap["TOKEN_NULL"] = "\'NULL\'";
|
||||
tokenNameRemap["TOKEN_PRINT"] = "\'print\'";
|
||||
tokenNameRemap["TOKEN_RETURN"] = "\'return\'";
|
||||
tokenNameRemap["TOKEN_SOA"] = "\'soa\'";
|
||||
tokenNameRemap["TOKEN_SIGNED"] = "\'signed\'";
|
||||
tokenNameRemap["TOKEN_SIZEOF"] = "\'sizeof\'";
|
||||
tokenNameRemap["TOKEN_STATIC"] = "\'static\'";
|
||||
tokenNameRemap["TOKEN_STRUCT"] = "\'struct\'";
|
||||
tokenNameRemap["TOKEN_SWITCH"] = "\'switch\'";
|
||||
tokenNameRemap["TOKEN_SYNC"] = "\'sync\'";
|
||||
tokenNameRemap["TOKEN_TASK"] = "\'task\'";
|
||||
tokenNameRemap["TOKEN_TRUE"] = "\'true\'";
|
||||
tokenNameRemap["TOKEN_TYPEDEF"] = "\'typedef\'";
|
||||
tokenNameRemap["TOKEN_UNIFORM"] = "\'uniform\'";
|
||||
tokenNameRemap["TOKEN_UNSIGNED"] = "\'unsigned\'";
|
||||
tokenNameRemap["TOKEN_VARYING"] = "\'varying\'";
|
||||
tokenNameRemap["TOKEN_VOID"] = "\'void\'";
|
||||
tokenNameRemap["TOKEN_WHILE"] = "\'while\'";
|
||||
tokenNameRemap["TOKEN_STRING_C_LITERAL"] = "\"C\"";
|
||||
tokenNameRemap["TOKEN_DOTDOTDOT"] = "\'...\'";
|
||||
tokenNameRemap["TOKEN_FLOAT_CONSTANT"] = "float constant";
|
||||
tokenNameRemap["TOKEN_INT32_CONSTANT"] = "int32 constant";
|
||||
tokenNameRemap["TOKEN_UINT32_CONSTANT"] = "unsigned int32 constant";
|
||||
tokenNameRemap["TOKEN_INT64_CONSTANT"] = "int64 constant";
|
||||
tokenNameRemap["TOKEN_UINT64_CONSTANT"] = "unsigned int64 constant";
|
||||
tokenNameRemap["TOKEN_INC_OP"] = "\'++\'";
|
||||
tokenNameRemap["TOKEN_DEC_OP"] = "\'--\'";
|
||||
tokenNameRemap["TOKEN_LEFT_OP"] = "\'<<\'";
|
||||
tokenNameRemap["TOKEN_RIGHT_OP"] = "\'>>\'";
|
||||
tokenNameRemap["TOKEN_LE_OP"] = "\'<=\'";
|
||||
tokenNameRemap["TOKEN_GE_OP"] = "\'>=\'";
|
||||
tokenNameRemap["TOKEN_EQ_OP"] = "\'==\'";
|
||||
tokenNameRemap["TOKEN_NE_OP"] = "\'!=\'";
|
||||
tokenNameRemap["TOKEN_AND_OP"] = "\'&&\'";
|
||||
tokenNameRemap["TOKEN_OR_OP"] = "\'||\'";
|
||||
tokenNameRemap["TOKEN_MUL_ASSIGN"] = "\'*=\'";
|
||||
tokenNameRemap["TOKEN_DIV_ASSIGN"] = "\'/=\'";
|
||||
tokenNameRemap["TOKEN_MOD_ASSIGN"] = "\'%=\'";
|
||||
tokenNameRemap["TOKEN_ADD_ASSIGN"] = "\'+=\'";
|
||||
tokenNameRemap["TOKEN_SUB_ASSIGN"] = "\'-=\'";
|
||||
tokenNameRemap["TOKEN_LEFT_ASSIGN"] = "\'<<=\'";
|
||||
tokenNameRemap["TOKEN_RIGHT_ASSIGN"] = "\'>>=\'";
|
||||
tokenNameRemap["TOKEN_AND_ASSIGN"] = "\'&=\'";
|
||||
tokenNameRemap["TOKEN_XOR_ASSIGN"] = "\'^=\'";
|
||||
tokenNameRemap["TOKEN_OR_ASSIGN"] = "\'|=\'";
|
||||
tokenNameRemap["TOKEN_PTR_OP"] = "\'->\'";
|
||||
tokenNameRemap["$end"] = "end of file";
|
||||
}
|
||||
|
||||
|
||||
inline int ispcRand() {
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
return rand();
|
||||
#else
|
||||
return lrand48();
|
||||
#endif
|
||||
}
|
||||
|
||||
#define RT \
|
||||
if (g->enableFuzzTest) { \
|
||||
int r = ispcRand() % 40; \
|
||||
if (r == 0) { \
|
||||
Warning(yylloc, "Fuzz test dropping token"); \
|
||||
} \
|
||||
else if (r == 1) { \
|
||||
Assert (tokenToName.size() > 0); \
|
||||
int nt = sizeof(allTokens) / sizeof(allTokens[0]); \
|
||||
int tn = ispcRand() % nt; \
|
||||
yylval.stringVal = new std::string(yytext); /* just in case */\
|
||||
Warning(yylloc, "Fuzz test replaced token with \"%s\"", tokenToName[allTokens[tn]].c_str()); \
|
||||
return allTokens[tn]; \
|
||||
} \
|
||||
else if (r == 2) { \
|
||||
Symbol *sym = m->symbolTable->RandomSymbol(); \
|
||||
if (sym != NULL) { \
|
||||
yylval.stringVal = new std::string(sym->name); \
|
||||
Warning(yylloc, "Fuzz test replaced with identifier \"%s\".", sym->name.c_str()); \
|
||||
return TOKEN_IDENTIFIER; \
|
||||
} \
|
||||
} \
|
||||
/* TOKEN_TYPE_NAME */ \
|
||||
} else /* swallow semicolon */
|
||||
|
||||
%}
|
||||
|
||||
%option nounput
|
||||
%option noyywrap
|
||||
%option bison-bridge
|
||||
%option bison-locations
|
||||
%option nounistd
|
||||
|
||||
WHITESPACE [ \t\r]+
|
||||
@@ -75,73 +330,77 @@ IDENT [a-zA-Z_][a-zA-Z_0-9]*
|
||||
ZO_SWIZZLE ([01]+[w-z]+)+|([01]+[rgba]+)+|([01]+[uv]+)+
|
||||
|
||||
%%
|
||||
"/*" { lCComment(yylloc); }
|
||||
"//" { lCppComment(yylloc); }
|
||||
"/*" { lCComment(&yylloc); }
|
||||
"//" { lCppComment(&yylloc); }
|
||||
|
||||
__assert { return TOKEN_ASSERT; }
|
||||
bool { return TOKEN_BOOL; }
|
||||
break { return TOKEN_BREAK; }
|
||||
case { return TOKEN_CASE; }
|
||||
cbreak { return TOKEN_CBREAK; }
|
||||
ccontinue { return TOKEN_CCONTINUE; }
|
||||
cdo { return TOKEN_CDO; }
|
||||
cfor { return TOKEN_CFOR; }
|
||||
cif { return TOKEN_CIF; }
|
||||
cwhile { return TOKEN_CWHILE; }
|
||||
const { return TOKEN_CONST; }
|
||||
continue { return TOKEN_CONTINUE; }
|
||||
creturn { return TOKEN_CRETURN; }
|
||||
default { return TOKEN_DEFAULT; }
|
||||
do { return TOKEN_DO; }
|
||||
double { return TOKEN_DOUBLE; }
|
||||
else { return TOKEN_ELSE; }
|
||||
enum { return TOKEN_ENUM; }
|
||||
export { return TOKEN_EXPORT; }
|
||||
extern { return TOKEN_EXTERN; }
|
||||
false { return TOKEN_FALSE; }
|
||||
float { return TOKEN_FLOAT; }
|
||||
for { return TOKEN_FOR; }
|
||||
foreach { return TOKEN_FOREACH; }
|
||||
foreach_tiled { return TOKEN_FOREACH_TILED; }
|
||||
goto { return TOKEN_GOTO; }
|
||||
if { return TOKEN_IF; }
|
||||
inline { return TOKEN_INLINE; }
|
||||
int { return TOKEN_INT; }
|
||||
int8 { return TOKEN_INT8; }
|
||||
int16 { return TOKEN_INT16; }
|
||||
int32 { return TOKEN_INT; }
|
||||
int64 { return TOKEN_INT64; }
|
||||
launch { return TOKEN_LAUNCH; }
|
||||
NULL { return TOKEN_NULL; }
|
||||
print { return TOKEN_PRINT; }
|
||||
reference { Error(*yylloc, "\"reference\" qualifier is no longer supported; "
|
||||
"please use C++-style '&' syntax for references "
|
||||
"instead."); }
|
||||
return { return TOKEN_RETURN; }
|
||||
soa { return TOKEN_SOA; }
|
||||
signed { return TOKEN_SIGNED; }
|
||||
sizeof { return TOKEN_SIZEOF; }
|
||||
static { return TOKEN_STATIC; }
|
||||
struct { return TOKEN_STRUCT; }
|
||||
switch { return TOKEN_SWITCH; }
|
||||
sync { return TOKEN_SYNC; }
|
||||
task { return TOKEN_TASK; }
|
||||
true { return TOKEN_TRUE; }
|
||||
typedef { return TOKEN_TYPEDEF; }
|
||||
uniform { return TOKEN_UNIFORM; }
|
||||
unsigned { return TOKEN_UNSIGNED; }
|
||||
varying { return TOKEN_VARYING; }
|
||||
void { return TOKEN_VOID; }
|
||||
while { return TOKEN_WHILE; }
|
||||
\"C\" { return TOKEN_STRING_C_LITERAL; }
|
||||
\.\.\. { return TOKEN_DOTDOTDOT; }
|
||||
__assert { RT; return TOKEN_ASSERT; }
|
||||
bool { RT; return TOKEN_BOOL; }
|
||||
break { RT; return TOKEN_BREAK; }
|
||||
case { RT; return TOKEN_CASE; }
|
||||
cbreak { RT; return TOKEN_CBREAK; }
|
||||
ccontinue { RT; return TOKEN_CCONTINUE; }
|
||||
cdo { RT; return TOKEN_CDO; }
|
||||
cfor { RT; return TOKEN_CFOR; }
|
||||
cif { RT; return TOKEN_CIF; }
|
||||
cwhile { RT; return TOKEN_CWHILE; }
|
||||
const { RT; return TOKEN_CONST; }
|
||||
continue { RT; return TOKEN_CONTINUE; }
|
||||
creturn { RT; return TOKEN_CRETURN; }
|
||||
default { RT; return TOKEN_DEFAULT; }
|
||||
do { RT; return TOKEN_DO; }
|
||||
delete { RT; return TOKEN_DELETE; }
|
||||
delete\[\] { RT; return TOKEN_DELETE; }
|
||||
double { RT; return TOKEN_DOUBLE; }
|
||||
else { RT; return TOKEN_ELSE; }
|
||||
enum { RT; return TOKEN_ENUM; }
|
||||
export { RT; return TOKEN_EXPORT; }
|
||||
extern { RT; return TOKEN_EXTERN; }
|
||||
false { RT; return TOKEN_FALSE; }
|
||||
float { RT; return TOKEN_FLOAT; }
|
||||
for { RT; return TOKEN_FOR; }
|
||||
foreach { RT; return TOKEN_FOREACH; }
|
||||
foreach_tiled { RT; return TOKEN_FOREACH_TILED; }
|
||||
goto { RT; return TOKEN_GOTO; }
|
||||
if { RT; return TOKEN_IF; }
|
||||
inline { RT; return TOKEN_INLINE; }
|
||||
int { RT; return TOKEN_INT; }
|
||||
int8 { RT; return TOKEN_INT8; }
|
||||
int16 { RT; return TOKEN_INT16; }
|
||||
int32 { RT; return TOKEN_INT; }
|
||||
int64 { RT; return TOKEN_INT64; }
|
||||
launch { RT; return TOKEN_LAUNCH; }
|
||||
new { RT; return TOKEN_NEW; }
|
||||
NULL { RT; return TOKEN_NULL; }
|
||||
print { RT; return TOKEN_PRINT; }
|
||||
reference { Error(yylloc, "\"reference\" qualifier is no longer supported; "
|
||||
"please use C++-style '&' syntax for references "
|
||||
"instead."); }
|
||||
return { RT; return TOKEN_RETURN; }
|
||||
soa { RT; return TOKEN_SOA; }
|
||||
signed { RT; return TOKEN_SIGNED; }
|
||||
sizeof { RT; return TOKEN_SIZEOF; }
|
||||
static { RT; return TOKEN_STATIC; }
|
||||
struct { RT; return TOKEN_STRUCT; }
|
||||
switch { RT; return TOKEN_SWITCH; }
|
||||
sync { RT; return TOKEN_SYNC; }
|
||||
task { RT; return TOKEN_TASK; }
|
||||
true { RT; return TOKEN_TRUE; }
|
||||
typedef { RT; return TOKEN_TYPEDEF; }
|
||||
uniform { RT; return TOKEN_UNIFORM; }
|
||||
unsigned { RT; return TOKEN_UNSIGNED; }
|
||||
varying { RT; return TOKEN_VARYING; }
|
||||
void { RT; return TOKEN_VOID; }
|
||||
while { RT; return TOKEN_WHILE; }
|
||||
\"C\" { RT; return TOKEN_STRING_C_LITERAL; }
|
||||
\.\.\. { RT; return TOKEN_DOTDOTDOT; }
|
||||
|
||||
L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL; }
|
||||
L?\"(\\.|[^\\"])*\" { lStringConst(&yylval, &yylloc); return TOKEN_STRING_LITERAL; }
|
||||
|
||||
{IDENT} {
|
||||
RT;
|
||||
/* We have an identifier--is it a type name or an identifier?
|
||||
The symbol table will straighten us out... */
|
||||
yylval->stringVal = new std::string(yytext);
|
||||
yylval.stringVal = new std::string(yytext);
|
||||
if (m->symbolTable->LookupType(yytext) != NULL)
|
||||
return TOKEN_TYPE_NAME;
|
||||
else
|
||||
@@ -149,18 +408,19 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
|
||||
}
|
||||
|
||||
{INT_NUMBER}+(u|U|l|L)*? {
|
||||
RT;
|
||||
int ls = 0, us = 0;
|
||||
|
||||
char *endPtr = NULL;
|
||||
if (yytext[0] == '0' && yytext[1] == 'b')
|
||||
yylval->intVal = lParseBinary(yytext+2, *yylloc, &endPtr);
|
||||
yylval.intVal = lParseBinary(yytext+2, yylloc, &endPtr);
|
||||
else {
|
||||
#if defined(ISPC_IS_WINDOWS) && !defined(__MINGW32__)
|
||||
yylval->intVal = _strtoi64(yytext, &endPtr, 0);
|
||||
yylval.intVal = _strtoui64(yytext, &endPtr, 0);
|
||||
#else
|
||||
// FIXME: should use strtouq and then issue an error if we can't
|
||||
// fit into 64 bits...
|
||||
yylval->intVal = strtoull(yytext, &endPtr, 0);
|
||||
yylval.intVal = strtoull(yytext, &endPtr, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
@@ -178,11 +438,11 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
|
||||
us++;
|
||||
}
|
||||
if (kilo)
|
||||
yylval->intVal *= 1024;
|
||||
yylval.intVal *= 1024;
|
||||
if (mega)
|
||||
yylval->intVal *= 1024*1024;
|
||||
yylval.intVal *= 1024*1024;
|
||||
if (giga)
|
||||
yylval->intVal *= 1024*1024*1024;
|
||||
yylval.intVal *= 1024*1024*1024;
|
||||
|
||||
if (ls >= 2)
|
||||
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
|
||||
@@ -190,7 +450,7 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
|
||||
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
|
||||
|
||||
// See if we can fit this into a 32-bit integer...
|
||||
if ((yylval->intVal & 0xffffffff) == yylval->intVal)
|
||||
if ((yylval.intVal & 0xffffffff) == yylval.intVal)
|
||||
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
|
||||
else
|
||||
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
|
||||
@@ -198,74 +458,76 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
|
||||
|
||||
|
||||
{FLOAT_NUMBER} {
|
||||
yylval->floatVal = (float)atof(yytext);
|
||||
RT;
|
||||
yylval.floatVal = (float)atof(yytext);
|
||||
return TOKEN_FLOAT_CONSTANT;
|
||||
}
|
||||
|
||||
{HEX_FLOAT_NUMBER} {
|
||||
yylval->floatVal = (float)lParseHexFloat(yytext);
|
||||
RT;
|
||||
yylval.floatVal = (float)lParseHexFloat(yytext);
|
||||
return TOKEN_FLOAT_CONSTANT;
|
||||
}
|
||||
|
||||
"++" { return TOKEN_INC_OP; }
|
||||
"--" { return TOKEN_DEC_OP; }
|
||||
"<<" { return TOKEN_LEFT_OP; }
|
||||
">>" { return TOKEN_RIGHT_OP; }
|
||||
"<=" { return TOKEN_LE_OP; }
|
||||
">=" { return TOKEN_GE_OP; }
|
||||
"==" { return TOKEN_EQ_OP; }
|
||||
"!=" { return TOKEN_NE_OP; }
|
||||
"&&" { return TOKEN_AND_OP; }
|
||||
"||" { return TOKEN_OR_OP; }
|
||||
"*=" { return TOKEN_MUL_ASSIGN; }
|
||||
"/=" { return TOKEN_DIV_ASSIGN; }
|
||||
"%=" { return TOKEN_MOD_ASSIGN; }
|
||||
"+=" { return TOKEN_ADD_ASSIGN; }
|
||||
"-=" { return TOKEN_SUB_ASSIGN; }
|
||||
"<<=" { return TOKEN_LEFT_ASSIGN; }
|
||||
">>=" { return TOKEN_RIGHT_ASSIGN; }
|
||||
"&=" { return TOKEN_AND_ASSIGN; }
|
||||
"^=" { return TOKEN_XOR_ASSIGN; }
|
||||
"|=" { return TOKEN_OR_ASSIGN; }
|
||||
"->" { return TOKEN_PTR_OP; }
|
||||
";" { return ';'; }
|
||||
("{"|"<%") { return '{'; }
|
||||
("}"|"%>") { return '}'; }
|
||||
"," { return ','; }
|
||||
":" { return ':'; }
|
||||
"=" { return '='; }
|
||||
"(" { return '('; }
|
||||
")" { return ')'; }
|
||||
("["|"<:") { return '['; }
|
||||
("]"|":>") { return ']'; }
|
||||
"." { return '.'; }
|
||||
"&" { return '&'; }
|
||||
"!" { return '!'; }
|
||||
"~" { return '~'; }
|
||||
"-" { return '-'; }
|
||||
"+" { return '+'; }
|
||||
"*" { return '*'; }
|
||||
"/" { return '/'; }
|
||||
"%" { return '%'; }
|
||||
"<" { return '<'; }
|
||||
">" { return '>'; }
|
||||
"^" { return '^'; }
|
||||
"|" { return '|'; }
|
||||
"?" { return '?'; }
|
||||
"++" { RT; return TOKEN_INC_OP; }
|
||||
"--" { RT; return TOKEN_DEC_OP; }
|
||||
"<<" { RT; return TOKEN_LEFT_OP; }
|
||||
">>" { RT; return TOKEN_RIGHT_OP; }
|
||||
"<=" { RT; return TOKEN_LE_OP; }
|
||||
">=" { RT; return TOKEN_GE_OP; }
|
||||
"==" { RT; return TOKEN_EQ_OP; }
|
||||
"!=" { RT; return TOKEN_NE_OP; }
|
||||
"&&" { RT; return TOKEN_AND_OP; }
|
||||
"||" { RT; return TOKEN_OR_OP; }
|
||||
"*=" { RT; return TOKEN_MUL_ASSIGN; }
|
||||
"/=" { RT; return TOKEN_DIV_ASSIGN; }
|
||||
"%=" { RT; return TOKEN_MOD_ASSIGN; }
|
||||
"+=" { RT; return TOKEN_ADD_ASSIGN; }
|
||||
"-=" { RT; return TOKEN_SUB_ASSIGN; }
|
||||
"<<=" { RT; return TOKEN_LEFT_ASSIGN; }
|
||||
">>=" { RT; return TOKEN_RIGHT_ASSIGN; }
|
||||
"&=" { RT; return TOKEN_AND_ASSIGN; }
|
||||
"^=" { RT; return TOKEN_XOR_ASSIGN; }
|
||||
"|=" { RT; return TOKEN_OR_ASSIGN; }
|
||||
"->" { RT; return TOKEN_PTR_OP; }
|
||||
";" { RT; return ';'; }
|
||||
("{"|"<%") { RT; return '{'; }
|
||||
("}"|"%>") { RT; return '}'; }
|
||||
"," { RT; return ','; }
|
||||
":" { RT; return ':'; }
|
||||
"=" { RT; return '='; }
|
||||
"(" { RT; return '('; }
|
||||
")" { RT; return ')'; }
|
||||
("["|"<:") { RT; return '['; }
|
||||
("]"|":>") { RT; return ']'; }
|
||||
"." { RT; return '.'; }
|
||||
"&" { RT; return '&'; }
|
||||
"!" { RT; return '!'; }
|
||||
"~" { RT; return '~'; }
|
||||
"-" { RT; return '-'; }
|
||||
"+" { RT; return '+'; }
|
||||
"*" { RT; return '*'; }
|
||||
"/" { RT; return '/'; }
|
||||
"%" { RT; return '%'; }
|
||||
"<" { RT; return '<'; }
|
||||
">" { RT; return '>'; }
|
||||
"^" { RT; return '^'; }
|
||||
"|" { RT; return '|'; }
|
||||
"?" { RT; return '?'; }
|
||||
|
||||
{WHITESPACE} { }
|
||||
|
||||
\n {
|
||||
yylloc->last_line++;
|
||||
yylloc->last_column = 1;
|
||||
yylloc.last_line++;
|
||||
yylloc.last_column = 1;
|
||||
}
|
||||
|
||||
#(line)?[ ][0-9]+[ ]\"(\\.|[^\\"])*\"[^\n]* {
|
||||
lHandleCppHash(yylloc);
|
||||
lHandleCppHash(&yylloc);
|
||||
}
|
||||
|
||||
. {
|
||||
Error(*yylloc, "Illegal character: %c (0x%x)", yytext[0], int(yytext[0]));
|
||||
Error(yylloc, "Illegal character: %c (0x%x)", yytext[0], int(yytext[0]));
|
||||
YY_USER_ACTION
|
||||
}
|
||||
|
||||
@@ -306,8 +568,10 @@ lParseBinary(const char *ptr, SourcePos pos, char **endPtr) {
|
||||
static void
|
||||
lCComment(SourcePos *pos) {
|
||||
char c, prev = 0;
|
||||
|
||||
|
||||
while ((c = yyinput()) != 0) {
|
||||
++pos->last_column;
|
||||
|
||||
if (c == '\n') {
|
||||
pos->last_line++;
|
||||
pos->last_column = 1;
|
||||
|
||||
13
llvmutil.cpp
13
llvmutil.cpp
@@ -597,6 +597,9 @@ LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
|
||||
bool
|
||||
LLVMVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
std::vector<llvm::PHINode *> &seenPhis) {
|
||||
if (vectorLength == 1)
|
||||
return true;
|
||||
|
||||
if (llvm::isa<llvm::ConstantAggregateZero>(v))
|
||||
return true;
|
||||
|
||||
@@ -604,6 +607,12 @@ LLVMVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
if (cv != NULL)
|
||||
return (cv->getSplatValue() != NULL);
|
||||
|
||||
#ifdef LLVM_3_1svn
|
||||
llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(v);
|
||||
if (cdv != NULL)
|
||||
return (cdv->getSplatValue() != NULL);
|
||||
#endif
|
||||
|
||||
llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(v);
|
||||
if (bop != NULL)
|
||||
return (LLVMVectorValuesAllEqual(bop->getOperand(0), vectorLength,
|
||||
@@ -669,6 +678,10 @@ LLVMVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
return true;
|
||||
}
|
||||
|
||||
if (llvm::isa<llvm::UndefValue>(v))
|
||||
// ?
|
||||
return false;
|
||||
|
||||
Assert(!llvm::isa<llvm::Constant>(v));
|
||||
|
||||
if (llvm::isa<llvm::CallInst>(v) || llvm::isa<llvm::LoadInst>(v) ||
|
||||
|
||||
80
main.cpp
80
main.cpp
@@ -41,6 +41,9 @@
|
||||
#include "type.h"
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#include <time.h>
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
#include <llvm/Support/PrettyStackTrace.h>
|
||||
#include <llvm/Support/Signals.h>
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
@@ -90,7 +93,6 @@ usage(int ret) {
|
||||
printf(" [--cpu=<cpu>]\t\t\tSelect target CPU type\n");
|
||||
printf(" <cpu>={%s}\n", Target::SupportedTargetCPUs());
|
||||
printf(" [-D<foo>]\t\t\t\t#define given value when running preprocessor\n");
|
||||
printf(" [--debug]\t\t\t\tPrint information useful for debugging ispc\n");
|
||||
printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n");
|
||||
#ifndef LLVM_2_9
|
||||
printf(" [--emit-c++]\t\t\tEmit a C++ source file as output\n");
|
||||
@@ -99,7 +101,9 @@ usage(int ret) {
|
||||
printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n");
|
||||
printf(" [-g]\t\t\t\tGenerate debugging information\n");
|
||||
printf(" [--help]\t\t\t\tPrint help\n");
|
||||
printf(" [--help-dev]\t\t\tPrint help for developer options\n");
|
||||
printf(" [-h <name>/--header-outfile=<name>]\tOutput filename for header\n");
|
||||
printf(" [-I <path>]\t\t\t\tAdd <path> to #include file search path\n");
|
||||
printf(" [--instrument]\t\t\tEmit instrumentation to gather performance data\n");
|
||||
printf(" [--math-lib=<option>]\t\tSelect math library\n");
|
||||
printf(" default\t\t\t\tUse ispc's built-in math functions\n");
|
||||
@@ -115,20 +119,10 @@ usage(int ret) {
|
||||
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
|
||||
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
|
||||
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
||||
#if 0
|
||||
printf(" disable-all-on-optimizations\n");
|
||||
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
|
||||
printf(" disable-blending-removal\t\tDisable eliminating blend at same scope\n");
|
||||
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
||||
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
|
||||
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
|
||||
printf(" disable-handle-pseudo-memory-ops\n");
|
||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
||||
#endif
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
printf(" [--pic]\t\t\t\tGenerate position-independent code\n");
|
||||
#endif // !ISPC_IS_WINDOWS
|
||||
printf(" [--quiet]\t\t\t\tSuppress all output\n");
|
||||
printf(" [--target=<isa>]\t\t\tSelect target ISA. <isa>={%s}\n", Target::SupportedTargetISAs());
|
||||
printf(" [--version]\t\t\t\tPrint ispc version\n");
|
||||
printf(" [--werror]\t\t\t\tTreat warnings as errors\n");
|
||||
@@ -139,11 +133,32 @@ usage(int ret) {
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
devUsage(int ret) {
|
||||
lPrintVersion();
|
||||
printf("\nusage (developer options): ispc\n");
|
||||
printf(" [--debug]\t\t\t\tPrint information useful for debugging ispc\n");
|
||||
printf(" [--fuzz-test]\t\t\tRandomly perturb program input to test error conditions\n");
|
||||
printf(" [--fuzz-seed=<value>]\t\tSeed value for RNG for fuzz testing\n");
|
||||
printf(" [--opt=<option>]\t\t\tSet optimization option\n");
|
||||
printf(" disable-all-on-optimizations\n");
|
||||
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
|
||||
printf(" disable-blending-removal\t\tDisable eliminating blend at same scope\n");
|
||||
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
||||
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
|
||||
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
|
||||
printf(" disable-handle-pseudo-memory-ops\n");
|
||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
||||
printf(" [--yydebug]\t\t\tPrint debugging information during parsing\n");
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
|
||||
/** We take arguments from both the command line as well as from the
|
||||
ISPC_ARGS environment variable. This function returns a new set of
|
||||
arguments representing the ones from those two sources merged together.
|
||||
*/
|
||||
*/
|
||||
static void lGetAllArgs(int Argc, char *Argv[], int &argc, char *argv[128]) {
|
||||
// Copy over the command line arguments (passed in)
|
||||
for (int i = 0; i < Argc; ++i)
|
||||
@@ -227,6 +242,8 @@ int main(int Argc, char *Argv[]) {
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (!strcmp(argv[i], "--help"))
|
||||
usage(0);
|
||||
if (!strcmp(argv[i], "--help-dev"))
|
||||
devUsage(0);
|
||||
else if (!strncmp(argv[i], "-D", 2))
|
||||
g->cppArgs.push_back(argv[i]);
|
||||
else if (!strncmp(argv[i], "--addressing=", 13)) {
|
||||
@@ -271,6 +288,19 @@ int main(int Argc, char *Argv[]) {
|
||||
ot = Module::Bitcode;
|
||||
else if (!strcmp(argv[i], "--emit-obj"))
|
||||
ot = Module::Object;
|
||||
else if (!strcmp(argv[i], "-I")) {
|
||||
if (++i == argc) {
|
||||
fprintf(stderr, "No path specified after -I option.\n");
|
||||
usage(1);
|
||||
}
|
||||
g->includePath.push_back(argv[i]);
|
||||
}
|
||||
else if (!strncmp(argv[i], "-I", 2))
|
||||
g->includePath.push_back(argv[i]+2);
|
||||
else if (!strcmp(argv[i], "--fuzz-test"))
|
||||
g->enableFuzzTest = true;
|
||||
else if (!strncmp(argv[i], "--fuzz-seed=", 12))
|
||||
g->fuzzTestSeed = atoi(argv[i] + 12);
|
||||
else if (!strcmp(argv[i], "--target")) {
|
||||
// FIXME: should remove this way of specifying the target...
|
||||
if (++i == argc) {
|
||||
@@ -383,6 +413,12 @@ int main(int Argc, char *Argv[]) {
|
||||
else if (!strcmp(argv[i], "--pic"))
|
||||
generatePIC = true;
|
||||
#endif // !ISPC_IS_WINDOWS
|
||||
else if (!strcmp(argv[i], "--quiet"))
|
||||
g->quiet = true;
|
||||
else if (!strcmp(argv[i], "--yydebug")) {
|
||||
extern int yydebug;
|
||||
yydebug = 1;
|
||||
}
|
||||
else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) {
|
||||
lPrintVersion();
|
||||
return 0;
|
||||
@@ -408,6 +444,24 @@ int main(int Argc, char *Argv[]) {
|
||||
if (debugSet && !optSet)
|
||||
g->opt.level = 0;
|
||||
|
||||
if (g->enableFuzzTest) {
|
||||
if (g->fuzzTestSeed == -1) {
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
int seed = (unsigned)time(NULL);
|
||||
#else
|
||||
int seed = getpid();
|
||||
#endif
|
||||
g->fuzzTestSeed = seed;
|
||||
Warning(SourcePos(), "Using seed %d for fuzz testing",
|
||||
g->fuzzTestSeed);
|
||||
}
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
srand(g->fuzzTestSeed);
|
||||
#else
|
||||
srand48(g->fuzzTestSeed);
|
||||
#endif
|
||||
}
|
||||
|
||||
if (outFileName == NULL && headerFileName == NULL)
|
||||
Warning(SourcePos(), "No output file or header file name specified. "
|
||||
"Program will be compiled and warnings/errors will "
|
||||
|
||||
33
module.cpp
33
module.cpp
@@ -161,6 +161,9 @@ Module::CompileFile() {
|
||||
|
||||
bool runPreprocessor = g->runCPP;
|
||||
|
||||
extern void ParserInit();
|
||||
ParserInit();
|
||||
|
||||
if (runPreprocessor) {
|
||||
if (filename != NULL) {
|
||||
// Try to open the file first, since otherwise we crash in the
|
||||
@@ -227,13 +230,19 @@ Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) {
|
||||
}
|
||||
|
||||
if (symbolTable->LookupFunction(sym->name.c_str())) {
|
||||
Error(sym->pos, "Global variable \"%s\" shadows previously-declared function.",
|
||||
sym->name.c_str());
|
||||
Error(sym->pos, "Global variable \"%s\" shadows previously-declared "
|
||||
"function.", sym->name.c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
if (sym->storageClass == SC_EXTERN_C) {
|
||||
Error(sym->pos, "extern \"C\" qualifier can only be used for functions.");
|
||||
Error(sym->pos, "extern \"C\" qualifier can only be used for "
|
||||
"functions.");
|
||||
return;
|
||||
}
|
||||
|
||||
if (sym->type == AtomicType::Void) {
|
||||
Error(sym->pos, "\"void\" type global variable is illegal.");
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1165,6 +1174,24 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre
|
||||
inst.createSourceManager(inst.getFileManager());
|
||||
inst.InitializeSourceManager(infilename);
|
||||
|
||||
// Don't remove comments in the preprocessor, so that we can accurately
|
||||
// track the source file position by handling them ourselves.
|
||||
inst.getPreprocessorOutputOpts().ShowComments = 1;
|
||||
|
||||
clang::HeaderSearchOptions &headerOpts = inst.getHeaderSearchOpts();
|
||||
headerOpts.UseBuiltinIncludes = 0;
|
||||
#ifndef LLVM_2_9
|
||||
headerOpts.UseStandardSystemIncludes = 0;
|
||||
#endif // !LLVM_2_9
|
||||
headerOpts.UseStandardCXXIncludes = 0;
|
||||
if (g->debugPrint)
|
||||
headerOpts.Verbose = 1;
|
||||
for (int i = 0; i < (int)g->includePath.size(); ++i)
|
||||
headerOpts.AddPath(g->includePath[i], clang::frontend::Angled,
|
||||
true /* is user supplied */,
|
||||
false /* not a framework */,
|
||||
true /* ignore sys root */);
|
||||
|
||||
clang::PreprocessorOptions &opts = inst.getPreprocessorOpts();
|
||||
|
||||
// Add defs for ISPC and PI
|
||||
|
||||
187
opt.cpp
187
opt.cpp
@@ -184,7 +184,7 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
|
||||
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[2]);
|
||||
return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
|
||||
#else
|
||||
return llvm::CallInst::Create(func, &newArgs[0], &newArgs[2],
|
||||
return llvm::CallInst::Create(func, &args[0], &args[2],
|
||||
name, insertBefore);
|
||||
#endif
|
||||
}
|
||||
@@ -199,7 +199,7 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
|
||||
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[3]);
|
||||
return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
|
||||
#else
|
||||
return llvm::CallInst::Create(func, &newArgs[0], &newArgs[3],
|
||||
return llvm::CallInst::Create(func, &args[0], &args[3],
|
||||
name, insertBefore);
|
||||
#endif
|
||||
}
|
||||
@@ -215,7 +215,7 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
|
||||
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[4]);
|
||||
return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
|
||||
#else
|
||||
return llvm::CallInst::Create(func, &newArgs[0], &newArgs[4],
|
||||
return llvm::CallInst::Create(func, &args[0], &args[4],
|
||||
name, insertBefore);
|
||||
#endif
|
||||
}
|
||||
@@ -230,7 +230,7 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
|
||||
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[5]);
|
||||
return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
|
||||
#else
|
||||
return llvm::CallInst::Create(func, &newArgs[0], &newArgs[5],
|
||||
return llvm::CallInst::Create(func, &args[0], &args[5],
|
||||
name, insertBefore);
|
||||
#endif
|
||||
}
|
||||
@@ -245,7 +245,7 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
|
||||
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[6]);
|
||||
return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
|
||||
#else
|
||||
return llvm::CallInst::Create(func, &newArgs[0], &newArgs[6],
|
||||
return llvm::CallInst::Create(func, &args[0], &args[6],
|
||||
name, insertBefore);
|
||||
#endif
|
||||
}
|
||||
@@ -368,8 +368,10 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
optPM.add(CreateMaskedStoreOptPass());
|
||||
optPM.add(CreateMaskedLoadOptPass());
|
||||
}
|
||||
optPM.add(CreatePseudoMaskedStorePass());
|
||||
if (!g->opt.disableGatherScatterOptimizations)
|
||||
if (g->opt.disableHandlePseudoMemoryOps == false)
|
||||
optPM.add(CreatePseudoMaskedStorePass());
|
||||
if (g->opt.disableGatherScatterOptimizations == false &&
|
||||
g->opt.disableHandlePseudoMemoryOps == false)
|
||||
optPM.add(CreateGSToLoadStorePass());
|
||||
if (g->opt.disableHandlePseudoMemoryOps == false) {
|
||||
optPM.add(CreatePseudoMaskedStorePass());
|
||||
@@ -628,11 +630,20 @@ lGetMask(llvm::Value *factor) {
|
||||
"known and all bits on". */
|
||||
Assert(g->target.vectorWidth < 32);
|
||||
|
||||
#ifdef LLVM_3_1svn
|
||||
llvm::ConstantDataVector *cv = llvm::dyn_cast<llvm::ConstantDataVector>(factor);
|
||||
#else
|
||||
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(factor);
|
||||
#endif
|
||||
if (cv) {
|
||||
int mask = 0;
|
||||
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
|
||||
#ifdef LLVM_3_1svn
|
||||
for (int i = 0; i < (int)cv->getNumElements(); ++i)
|
||||
elements.push_back(cv->getElementAsConstant(i));
|
||||
#else
|
||||
cv->getVectorElements(elements);
|
||||
#endif
|
||||
|
||||
for (unsigned int i = 0; i < elements.size(); ++i) {
|
||||
llvm::APInt intMaskValue;
|
||||
@@ -1125,7 +1136,17 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets,
|
||||
// Indexing into global arrays can lead to this form, with
|
||||
// ConstantVectors..
|
||||
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
|
||||
#ifdef LLVM_3_1svn
|
||||
for (int i = 0; i < (int)cv->getNumOperands(); ++i) {
|
||||
llvm::Constant *c =
|
||||
llvm::dyn_cast<llvm::Constant>(cv->getOperand(i));
|
||||
if (c == NULL)
|
||||
return NULL;
|
||||
elements.push_back(c);
|
||||
}
|
||||
#else
|
||||
cv->getVectorElements(elements);
|
||||
#endif
|
||||
|
||||
llvm::Constant *delta[ISPC_MAX_NVEC];
|
||||
for (unsigned int i = 0; i < elements.size(); ++i) {
|
||||
@@ -1235,6 +1256,9 @@ lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset,
|
||||
llvm::Value **variableOffset,
|
||||
llvm::Instruction *insertBefore) {
|
||||
if (llvm::isa<llvm::ConstantVector>(vec) ||
|
||||
#ifdef LLVM_3_1svn
|
||||
llvm::isa<llvm::ConstantDataVector>(vec) ||
|
||||
#endif
|
||||
llvm::isa<llvm::ConstantAggregateZero>(vec)) {
|
||||
*constOffset = vec;
|
||||
*variableOffset = NULL;
|
||||
@@ -1353,7 +1377,12 @@ lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset,
|
||||
in *splat, if so). */
|
||||
static bool
|
||||
lIs248Splat(llvm::Value *v, int *splat) {
|
||||
#ifdef LLVM_3_1svn
|
||||
llvm::ConstantDataVector *cvec =
|
||||
llvm::dyn_cast<llvm::ConstantDataVector>(v);
|
||||
#else
|
||||
llvm::ConstantVector *cvec = llvm::dyn_cast<llvm::ConstantVector>(v);
|
||||
#endif
|
||||
if (cvec == NULL)
|
||||
return false;
|
||||
|
||||
@@ -1460,6 +1489,9 @@ lExtractUniforms(llvm::Value **vec, llvm::Instruction *insertBefore) {
|
||||
fprintf(stderr, "\n");
|
||||
|
||||
if (llvm::isa<llvm::ConstantVector>(*vec) ||
|
||||
#ifdef LLVM_3_1svn
|
||||
llvm::isa<llvm::ConstantDataVector>(*vec) ||
|
||||
#endif
|
||||
llvm::isa<llvm::ConstantAggregateZero>(*vec))
|
||||
return NULL;
|
||||
|
||||
@@ -1855,6 +1887,7 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
|
||||
return modifiedAny;
|
||||
}
|
||||
|
||||
@@ -2092,6 +2125,7 @@ PseudoMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
|
||||
return modifiedAny;
|
||||
}
|
||||
|
||||
@@ -2139,11 +2173,22 @@ char GSToLoadStorePass::ID = 0;
|
||||
elements.
|
||||
*/
|
||||
static bool
|
||||
lVectorIsLinearConstantInts(llvm::ConstantVector *cv, int vectorLength,
|
||||
lVectorIsLinearConstantInts(
|
||||
#ifdef LLVM_3_1svn
|
||||
llvm::ConstantDataVector *cv,
|
||||
#else
|
||||
llvm::ConstantVector *cv,
|
||||
#endif
|
||||
int vectorLength,
|
||||
int stride) {
|
||||
// Flatten the vector out into the elements array
|
||||
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
|
||||
#ifdef LLVM_3_1svn
|
||||
for (int i = 0; i < (int)cv->getNumElements(); ++i)
|
||||
elements.push_back(cv->getElementAsConstant(i));
|
||||
#else
|
||||
cv->getVectorElements(elements);
|
||||
#endif
|
||||
Assert((int)elements.size() == vectorLength);
|
||||
|
||||
llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(elements[0]);
|
||||
@@ -2182,11 +2227,19 @@ lCheckMulForLinear(llvm::Value *op0, llvm::Value *op1, int vectorLength,
|
||||
int stride, std::vector<llvm::PHINode *> &seenPhis) {
|
||||
// Is the first operand a constant integer value splatted across all of
|
||||
// the lanes?
|
||||
#ifdef LLVM_3_1svn
|
||||
llvm::ConstantDataVector *cv = llvm::dyn_cast<llvm::ConstantDataVector>(op0);
|
||||
#else
|
||||
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(op0);
|
||||
#endif
|
||||
if (cv == NULL)
|
||||
return false;
|
||||
llvm::ConstantInt *splat =
|
||||
llvm::dyn_cast<llvm::ConstantInt>(cv->getSplatValue());
|
||||
|
||||
llvm::Constant *csplat = cv->getSplatValue();
|
||||
if (csplat == NULL)
|
||||
return false;
|
||||
|
||||
llvm::ConstantInt *splat = llvm::dyn_cast<llvm::ConstantInt>(csplat);
|
||||
if (splat == NULL)
|
||||
return false;
|
||||
|
||||
@@ -2214,7 +2267,11 @@ lVectorIsLinear(llvm::Value *v, int vectorLength, int stride,
|
||||
std::vector<llvm::PHINode *> &seenPhis) {
|
||||
// First try the easy case: if the values are all just constant
|
||||
// integers and have the expected stride between them, then we're done.
|
||||
#ifdef LLVM_3_1svn
|
||||
llvm::ConstantDataVector *cv = llvm::dyn_cast<llvm::ConstantDataVector>(v);
|
||||
#else
|
||||
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v);
|
||||
#endif
|
||||
if (cv != NULL)
|
||||
return lVectorIsLinearConstantInts(cv, vectorLength, stride);
|
||||
|
||||
@@ -2471,7 +2528,6 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
constOffsets, "varying+const_offsets",
|
||||
callInst);
|
||||
|
||||
{
|
||||
std::vector<llvm::PHINode *> seenPhis;
|
||||
if (LLVMVectorValuesAllEqual(fullOffsets, g->target.vectorWidth, seenPhis)) {
|
||||
// If all the offsets are equal, then compute the single
|
||||
@@ -2493,66 +2549,61 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
"load_braodcast");
|
||||
lCopyMetadata(newCall, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
else {
|
||||
// A scatter with everyone going to the same location is
|
||||
// undefined. Issue a warning and arbitrarily let the
|
||||
// first guy win.
|
||||
Warning(pos, "Undefined behavior: all program instances are "
|
||||
"writing to the same location!");
|
||||
// undefined (if there's more than one program instance in
|
||||
// the gang). Issue a warning.
|
||||
if (g->target.vectorWidth > 1)
|
||||
Warning(pos, "Undefined behavior: all program instances are "
|
||||
"writing to the same location!");
|
||||
|
||||
llvm::Value *first =
|
||||
llvm::ExtractElementInst::Create(storeValue, LLVMInt32(0), "rvalue_first",
|
||||
callInst);
|
||||
lCopyMetadata(first, callInst);
|
||||
// We could do something similar to the gather case, where
|
||||
// we arbitrarily write one of the values, but we need to
|
||||
// a) check to be sure the mask isn't all off and b) pick
|
||||
// the value from an executing program instance in that
|
||||
// case. We'll just let a bunch of the program instances
|
||||
// do redundant writes, since this isn't important to make
|
||||
// fast anyway...
|
||||
}
|
||||
}
|
||||
else {
|
||||
int step = gatherInfo ? gatherInfo->align : scatterInfo->align;
|
||||
|
||||
ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(first->getType(), 0),
|
||||
"ptr2rvalue_type", callInst);
|
||||
std::vector<llvm::PHINode *> seenPhis;
|
||||
if (step > 0 && lVectorIsLinear(fullOffsets, g->target.vectorWidth,
|
||||
step, seenPhis)) {
|
||||
// We have a linear sequence of memory locations being accessed
|
||||
// starting with the location given by the offset from
|
||||
// offsetElements[0], with stride of 4 or 8 bytes (for 32 bit
|
||||
// and 64 bit gather/scatters, respectively.)
|
||||
llvm::Value *ptr = lComputeCommonPointer(base, fullOffsets, callInst);
|
||||
lCopyMetadata(ptr, callInst);
|
||||
|
||||
llvm::Instruction *sinst = new llvm::StoreInst(first, ptr, false,
|
||||
scatterInfo->align);
|
||||
lCopyMetadata(sinst, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, sinst);
|
||||
if (gatherInfo != NULL) {
|
||||
Debug(pos, "Transformed gather to unaligned vector load!");
|
||||
llvm::Instruction *newCall =
|
||||
lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, "masked_load");
|
||||
lCopyMetadata(newCall, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||
}
|
||||
else {
|
||||
Debug(pos, "Transformed scatter to unaligned vector store!");
|
||||
ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast",
|
||||
callInst);
|
||||
llvm::Instruction *newCall =
|
||||
lCallInst(scatterInfo->maskedStoreFunc, ptr, storeValue,
|
||||
mask, "");
|
||||
lCopyMetadata(newCall, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||
}
|
||||
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
|
||||
int step = gatherInfo ? gatherInfo->align : scatterInfo->align;
|
||||
|
||||
std::vector<llvm::PHINode *> seenPhis;
|
||||
if (step > 0 && lVectorIsLinear(fullOffsets, g->target.vectorWidth,
|
||||
step, seenPhis)) {
|
||||
// We have a linear sequence of memory locations being accessed
|
||||
// starting with the location given by the offset from
|
||||
// offsetElements[0], with stride of 4 or 8 bytes (for 32 bit
|
||||
// and 64 bit gather/scatters, respectively.)
|
||||
llvm::Value *ptr = lComputeCommonPointer(base, fullOffsets, callInst);
|
||||
lCopyMetadata(ptr, callInst);
|
||||
|
||||
if (gatherInfo != NULL) {
|
||||
Debug(pos, "Transformed gather to unaligned vector load!");
|
||||
llvm::Instruction *newCall =
|
||||
lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, "masked_load");
|
||||
lCopyMetadata(newCall, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||
}
|
||||
else {
|
||||
Debug(pos, "Transformed scatter to unaligned vector store!");
|
||||
ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast",
|
||||
callInst);
|
||||
llvm::Instruction *newCall =
|
||||
lCallInst(scatterInfo->maskedStoreFunc, ptr, storeValue,
|
||||
mask, "");
|
||||
lCopyMetadata(newCall, callInst);
|
||||
llvm::ReplaceInstWithInst(callInst, newCall);
|
||||
}
|
||||
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -2675,10 +2726,12 @@ PseudoGSToGSPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
Assert(ok);
|
||||
|
||||
callInst->setCalledFunction(info->actualFunc);
|
||||
if (info->isGather)
|
||||
PerformanceWarning(pos, "Gather required to compute value in expression.");
|
||||
else
|
||||
PerformanceWarning(pos, "Scatter required for storing value.");
|
||||
if (g->target.vectorWidth > 1) {
|
||||
if (info->isGather)
|
||||
PerformanceWarning(pos, "Gather required to compute value in expression.");
|
||||
else
|
||||
PerformanceWarning(pos, "Scatter required for storing value.");
|
||||
}
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
|
||||
609
parse.yy
609
parse.yy
@@ -37,10 +37,13 @@
|
||||
/* one for 'if', one for 'cif' */
|
||||
%expect 2
|
||||
|
||||
%pure-parser
|
||||
%error-verbose
|
||||
|
||||
%code requires {
|
||||
|
||||
#define yytnamerr lYYTNameErr
|
||||
|
||||
|
||||
#define YYLTYPE SourcePos
|
||||
|
||||
# define YYLLOC_DEFAULT(Current, Rhs, N) \
|
||||
@@ -87,11 +90,16 @@ struct ForeachDimension;
|
||||
__FILE__, __LINE__);
|
||||
|
||||
union YYSTYPE;
|
||||
extern int yylex(YYSTYPE *, SourcePos *);
|
||||
extern int yylex();
|
||||
|
||||
extern char *yytext;
|
||||
|
||||
void yyerror(const char *s) { fprintf(stderr, "Parse error: %s\n", s); }
|
||||
void yyerror(const char *s);
|
||||
|
||||
static int lYYTNameErr(char *yyres, const char *yystr);
|
||||
|
||||
static void lSuggestBuiltinAlternates();
|
||||
static void lSuggestParamListAlternates();
|
||||
|
||||
static void lAddDeclaration(DeclSpecs *ds, Declarator *decl);
|
||||
static void lAddFunctionParams(Declarator *decl);
|
||||
@@ -106,13 +114,14 @@ static void lFinalizeEnumeratorSymbols(std::vector<Symbol *> &enums,
|
||||
const EnumType *enumType);
|
||||
|
||||
static const char *lBuiltinTokens[] = {
|
||||
"assert", "bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor",
|
||||
"cif", "cwhile", "const", "continue", "creturn", "default", "do", "double",
|
||||
"else", "enum", "export", "extern", "false", "float", "for", "foreach",
|
||||
"foreach_tiled", "goto", "if", "inline", "int", "int8", "int16",
|
||||
"int32", "int64", "launch", "NULL", "print", "return", "signed", "sizeof",
|
||||
"static", "struct", "switch", "sync", "task", "true", "typedef", "uniform",
|
||||
"unsigned", "varying", "void", "while", NULL
|
||||
"assert", "bool", "break", "case", "cbreak", "ccontinue", "cdo",
|
||||
"cfor", "cif", "cwhile", "const", "continue", "creturn", "default",
|
||||
"do", "delete", "double", "else", "enum", "export", "extern", "false",
|
||||
"float", "for", "foreach", "foreach_tiled", "goto", "if", "inline",
|
||||
"int", "int8", "int16", "int32", "int64", "launch", "new", "NULL",
|
||||
"print", "return", "signed", "sizeof", "static", "struct", "switch",
|
||||
"sync", "task", "true", "typedef", "uniform", "unsigned", "varying",
|
||||
"void", "while", NULL
|
||||
};
|
||||
|
||||
static const char *lParamListTokens[] = {
|
||||
@@ -170,7 +179,7 @@ struct ForeachDimension {
|
||||
%token TOKEN_AND_OP TOKEN_OR_OP TOKEN_MUL_ASSIGN TOKEN_DIV_ASSIGN TOKEN_MOD_ASSIGN
|
||||
%token TOKEN_ADD_ASSIGN TOKEN_SUB_ASSIGN TOKEN_LEFT_ASSIGN TOKEN_RIGHT_ASSIGN
|
||||
%token TOKEN_AND_ASSIGN TOKEN_OR_ASSIGN TOKEN_XOR_ASSIGN
|
||||
%token TOKEN_SIZEOF
|
||||
%token TOKEN_SIZEOF TOKEN_NEW TOKEN_DELETE
|
||||
|
||||
%token TOKEN_EXTERN TOKEN_EXPORT TOKEN_STATIC TOKEN_INLINE TOKEN_TASK
|
||||
%token TOKEN_UNIFORM TOKEN_VARYING TOKEN_TYPEDEF TOKEN_SOA
|
||||
@@ -189,7 +198,7 @@ struct ForeachDimension {
|
||||
%type <expr> multiplicative_expression additive_expression shift_expression
|
||||
%type <expr> relational_expression equality_expression and_expression
|
||||
%type <expr> exclusive_or_expression inclusive_or_expression
|
||||
%type <expr> logical_and_expression logical_or_expression
|
||||
%type <expr> logical_and_expression logical_or_expression new_expression
|
||||
%type <expr> conditional_expression assignment_expression expression
|
||||
%type <expr> initializer constant_expression for_test
|
||||
%type <exprList> argument_expression_list initializer_list
|
||||
@@ -197,7 +206,7 @@ struct ForeachDimension {
|
||||
%type <stmt> statement labeled_statement compound_statement for_init_statement
|
||||
%type <stmt> expression_statement selection_statement iteration_statement
|
||||
%type <stmt> jump_statement statement_list declaration_statement print_statement
|
||||
%type <stmt> assert_statement sync_statement
|
||||
%type <stmt> assert_statement sync_statement delete_statement
|
||||
|
||||
%type <declaration> declaration parameter_declaration
|
||||
%type <declarators> init_declarator_list
|
||||
@@ -215,7 +224,7 @@ struct ForeachDimension {
|
||||
%type <enumType> enum_specifier
|
||||
|
||||
%type <type> specifier_qualifier_list struct_or_union_specifier
|
||||
%type <type> type_specifier type_name
|
||||
%type <type> type_specifier type_name rate_qualified_new_type
|
||||
%type <type> short_vec_specifier
|
||||
%type <atomicType> atomic_var_type_specifier
|
||||
|
||||
@@ -225,7 +234,7 @@ struct ForeachDimension {
|
||||
|
||||
%type <stringVal> string_constant
|
||||
%type <constCharPtr> struct_or_union_name enum_identifier goto_identifier
|
||||
%type <intVal> int_constant soa_width_specifier
|
||||
%type <intVal> int_constant soa_width_specifier rate_qualified_new
|
||||
|
||||
%type <foreachDimension> foreach_dimension_specifier
|
||||
%type <foreachDimensionList> foreach_dimension_list
|
||||
@@ -284,6 +293,7 @@ primary_expression
|
||||
/* | TOKEN_STRING_LITERAL
|
||||
{ UNIMPLEMENTED }*/
|
||||
| '(' expression ')' { $$ = $2; }
|
||||
| '(' error ')' { $$ = NULL; }
|
||||
;
|
||||
|
||||
launch_expression
|
||||
@@ -307,10 +317,14 @@ postfix_expression
|
||||
: primary_expression
|
||||
| postfix_expression '[' expression ']'
|
||||
{ $$ = new IndexExpr($1, $3, Union(@1,@4)); }
|
||||
| postfix_expression '[' error ']'
|
||||
{ $$ = NULL; }
|
||||
| postfix_expression '(' ')'
|
||||
{ $$ = new FunctionCallExpr($1, new ExprList(Union(@1,@2)), Union(@1,@3)); }
|
||||
| postfix_expression '(' argument_expression_list ')'
|
||||
{ $$ = new FunctionCallExpr($1, $3, Union(@1,@4)); }
|
||||
| postfix_expression '(' error ')'
|
||||
{ $$ = NULL; }
|
||||
| launch_expression
|
||||
| postfix_expression '.' TOKEN_IDENTIFIER
|
||||
{ $$ = MemberExpr::create($1, yytext, Union(@1,@3), @3, false); }
|
||||
@@ -327,7 +341,10 @@ argument_expression_list
|
||||
| argument_expression_list ',' assignment_expression
|
||||
{
|
||||
ExprList *argList = dynamic_cast<ExprList *>($1);
|
||||
Assert(argList != NULL);
|
||||
if (argList == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
argList = new ExprList(@3);
|
||||
}
|
||||
argList->exprs.push_back($3);
|
||||
argList->pos = Union(argList->pos, @3);
|
||||
$$ = argList;
|
||||
@@ -448,8 +465,36 @@ conditional_expression
|
||||
{ $$ = new SelectExpr($1, $3, $5, Union(@1,@5)); }
|
||||
;
|
||||
|
||||
assignment_expression
|
||||
rate_qualified_new
|
||||
: TOKEN_NEW { $$ = 0; }
|
||||
| TOKEN_UNIFORM TOKEN_NEW { $$ = TYPEQUAL_UNIFORM; }
|
||||
| TOKEN_VARYING TOKEN_NEW { $$ = TYPEQUAL_VARYING; }
|
||||
;
|
||||
|
||||
rate_qualified_new_type
|
||||
: type_specifier { $$ = $1; }
|
||||
| TOKEN_UNIFORM type_specifier { $$ = $2 ? $2->GetAsUniformType() : NULL; }
|
||||
| TOKEN_VARYING type_specifier { $$ = $2 ? $2->GetAsVaryingType() : NULL; }
|
||||
;
|
||||
|
||||
new_expression
|
||||
: conditional_expression
|
||||
| rate_qualified_new rate_qualified_new_type
|
||||
{
|
||||
$$ = new NewExpr($1, $2, NULL, NULL, @1, Union(@1, @2));
|
||||
}
|
||||
| rate_qualified_new rate_qualified_new_type '(' initializer_list ')'
|
||||
{
|
||||
$$ = new NewExpr($1, $2, $4, NULL, @1, Union(@1, @2));
|
||||
}
|
||||
| rate_qualified_new rate_qualified_new_type '[' expression ']'
|
||||
{
|
||||
$$ = new NewExpr($1, $2, NULL, $4, @1, Union(@1, @4));
|
||||
}
|
||||
;
|
||||
|
||||
assignment_expression
|
||||
: new_expression
|
||||
| unary_expression '=' assignment_expression
|
||||
{ $$ = new AssignExpr(AssignExpr::Assign, $1, $3, Union(@1, @3)); }
|
||||
| unary_expression TOKEN_MUL_ASSIGN assignment_expression
|
||||
@@ -487,9 +532,16 @@ constant_expression
|
||||
declaration_statement
|
||||
: declaration
|
||||
{
|
||||
if ($1->declSpecs->storageClass == SC_TYPEDEF) {
|
||||
if ($1 == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
$$ = NULL;
|
||||
}
|
||||
else if ($1->declSpecs->storageClass == SC_TYPEDEF) {
|
||||
for (unsigned int i = 0; i < $1->declarators.size(); ++i) {
|
||||
m->AddTypeDef($1->declarators[i]->GetSymbol());
|
||||
if ($1->declarators[i] == NULL)
|
||||
Assert(m->errorCount > 0);
|
||||
else
|
||||
m->AddTypeDef($1->declarators[i]->GetSymbol());
|
||||
}
|
||||
$$ = NULL;
|
||||
}
|
||||
@@ -590,15 +642,20 @@ init_declarator_list
|
||||
: init_declarator
|
||||
{
|
||||
std::vector<Declarator *> *dl = new std::vector<Declarator *>;
|
||||
dl->push_back($1);
|
||||
if ($1 != NULL)
|
||||
dl->push_back($1);
|
||||
$$ = dl;
|
||||
}
|
||||
| init_declarator_list ',' init_declarator
|
||||
{
|
||||
std::vector<Declarator *> *dl = (std::vector<Declarator *> *)$1;
|
||||
if (dl != NULL && $3 != NULL)
|
||||
if (dl == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
dl = new std::vector<Declarator *>;
|
||||
}
|
||||
if ($3 != NULL)
|
||||
dl->push_back($3);
|
||||
$$ = $1;
|
||||
$$ = dl;
|
||||
}
|
||||
;
|
||||
|
||||
@@ -623,10 +680,10 @@ storage_class_specifier
|
||||
type_specifier
|
||||
: atomic_var_type_specifier { $$ = $1; }
|
||||
| TOKEN_TYPE_NAME
|
||||
{ const Type *t = m->symbolTable->LookupType(yytext);
|
||||
Assert(t != NULL);
|
||||
{
|
||||
const Type *t = m->symbolTable->LookupType(yytext);
|
||||
$$ = t;
|
||||
}
|
||||
}
|
||||
| struct_or_union_specifier { $$ = $1; }
|
||||
| enum_specifier { $$ = $1; }
|
||||
;
|
||||
@@ -644,41 +701,47 @@ atomic_var_type_specifier
|
||||
|
||||
short_vec_specifier
|
||||
: atomic_var_type_specifier '<' int_constant '>'
|
||||
{
|
||||
Type* vt =
|
||||
new VectorType($1, (int32_t)$3);
|
||||
$$ = vt;
|
||||
}
|
||||
{
|
||||
$$ = $1 ? new VectorType($1, (int32_t)$3) : NULL;
|
||||
}
|
||||
;
|
||||
|
||||
struct_or_union_name
|
||||
: TOKEN_IDENTIFIER { $$ = strdup(yytext); }
|
||||
| TOKEN_TYPE_NAME { $$ = strdup(yytext); }
|
||||
| TOKEN_TYPE_NAME { $$ = strdup(yytext); }
|
||||
;
|
||||
|
||||
struct_or_union_specifier
|
||||
: struct_or_union struct_or_union_name '{' struct_declaration_list '}'
|
||||
{
|
||||
std::vector<const Type *> elementTypes;
|
||||
std::vector<std::string> elementNames;
|
||||
std::vector<SourcePos> elementPositions;
|
||||
GetStructTypesNamesPositions(*$4, &elementTypes, &elementNames,
|
||||
&elementPositions);
|
||||
StructType *st = new StructType($2, elementTypes, elementNames,
|
||||
elementPositions, false, Type::Unbound, @2);
|
||||
m->symbolTable->AddType($2, st, @2);
|
||||
$$ = st;
|
||||
{
|
||||
if ($4 != NULL) {
|
||||
std::vector<const Type *> elementTypes;
|
||||
std::vector<std::string> elementNames;
|
||||
std::vector<SourcePos> elementPositions;
|
||||
GetStructTypesNamesPositions(*$4, &elementTypes, &elementNames,
|
||||
&elementPositions);
|
||||
StructType *st = new StructType($2, elementTypes, elementNames,
|
||||
elementPositions, false, Type::Unbound, @2);
|
||||
m->symbolTable->AddType($2, st, @2);
|
||||
$$ = st;
|
||||
}
|
||||
else
|
||||
$$ = NULL;
|
||||
}
|
||||
| struct_or_union '{' struct_declaration_list '}'
|
||||
{
|
||||
std::vector<const Type *> elementTypes;
|
||||
std::vector<std::string> elementNames;
|
||||
std::vector<SourcePos> elementPositions;
|
||||
GetStructTypesNamesPositions(*$3, &elementTypes, &elementNames,
|
||||
&elementPositions);
|
||||
// FIXME: should be unbound
|
||||
$$ = new StructType("", elementTypes, elementNames, elementPositions,
|
||||
false, Type::Unbound, @1);
|
||||
if ($3 != NULL) {
|
||||
std::vector<const Type *> elementTypes;
|
||||
std::vector<std::string> elementNames;
|
||||
std::vector<SourcePos> elementPositions;
|
||||
GetStructTypesNamesPositions(*$3, &elementTypes, &elementNames,
|
||||
&elementPositions);
|
||||
// FIXME: should be unbound
|
||||
$$ = new StructType("", elementTypes, elementNames, elementPositions,
|
||||
false, Type::Unbound, @1);
|
||||
}
|
||||
else
|
||||
$$ = NULL;
|
||||
}
|
||||
| struct_or_union '{' '}'
|
||||
{
|
||||
@@ -689,16 +752,17 @@ struct_or_union_specifier
|
||||
Error(@1, "Empty struct definitions not allowed.");
|
||||
}
|
||||
| struct_or_union struct_or_union_name
|
||||
{ const Type *st = m->symbolTable->LookupType($2);
|
||||
if (!st) {
|
||||
std::vector<std::string> alternates = m->symbolTable->ClosestTypeMatch($2);
|
||||
std::string alts = lGetAlternates(alternates);
|
||||
Error(@2, "Struct type \"%s\" unknown.%s", $2, alts.c_str());
|
||||
}
|
||||
else if (dynamic_cast<const StructType *>(st) == NULL)
|
||||
Error(@2, "Type \"%s\" is not a struct type! (%s)", $2,
|
||||
st->GetString().c_str());
|
||||
$$ = st;
|
||||
{
|
||||
const Type *st = m->symbolTable->LookupType($2);
|
||||
if (!st) {
|
||||
std::vector<std::string> alternates = m->symbolTable->ClosestTypeMatch($2);
|
||||
std::string alts = lGetAlternates(alternates);
|
||||
Error(@2, "Struct type \"%s\" unknown.%s", $2, alts.c_str());
|
||||
}
|
||||
else if (dynamic_cast<const StructType *>(st) == NULL)
|
||||
Error(@2, "Type \"%s\" is not a struct type! (%s)", $2,
|
||||
st->GetString().c_str());
|
||||
$$ = st;
|
||||
}
|
||||
;
|
||||
|
||||
@@ -710,22 +774,26 @@ struct_declaration_list
|
||||
: struct_declaration
|
||||
{
|
||||
std::vector<StructDeclaration *> *sdl = new std::vector<StructDeclaration *>;
|
||||
if (sdl != NULL && $1 != NULL)
|
||||
if ($1 != NULL)
|
||||
sdl->push_back($1);
|
||||
$$ = sdl;
|
||||
}
|
||||
| struct_declaration_list struct_declaration
|
||||
{
|
||||
std::vector<StructDeclaration *> *sdl = (std::vector<StructDeclaration *> *)$1;
|
||||
if (sdl != NULL && $2 != NULL)
|
||||
if (sdl == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
sdl = new std::vector<StructDeclaration *>;
|
||||
}
|
||||
if ($2 != NULL)
|
||||
sdl->push_back($2);
|
||||
$$ = $1;
|
||||
$$ = sdl;
|
||||
}
|
||||
;
|
||||
|
||||
struct_declaration
|
||||
: specifier_qualifier_list struct_declarator_list ';'
|
||||
{ $$ = new StructDeclaration($1, $2); }
|
||||
{ $$ = ($1 != NULL && $2 != NULL) ? new StructDeclaration($1, $2) : NULL; }
|
||||
;
|
||||
|
||||
specifier_qualifier_list
|
||||
@@ -791,9 +859,13 @@ struct_declarator_list
|
||||
| struct_declarator_list ',' struct_declarator
|
||||
{
|
||||
std::vector<Declarator *> *sdl = (std::vector<Declarator *> *)$1;
|
||||
if (sdl != NULL && $3 != NULL)
|
||||
if (sdl == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
sdl = new std::vector<Declarator *>;
|
||||
}
|
||||
if ($3 != NULL)
|
||||
sdl->push_back($3);
|
||||
$$ = $1;
|
||||
$$ = sdl;
|
||||
}
|
||||
;
|
||||
|
||||
@@ -860,9 +932,14 @@ enumerator_list
|
||||
}
|
||||
| enumerator_list ',' enumerator
|
||||
{
|
||||
if ($1 != NULL && $3 != NULL)
|
||||
$1->push_back($3);
|
||||
$$ = $1;
|
||||
std::vector<Symbol *> *symList = $1;
|
||||
if (symList == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
symList = new std::vector<Symbol *>;
|
||||
}
|
||||
if ($3 != NULL)
|
||||
symList->push_back($3);
|
||||
$$ = symList;
|
||||
}
|
||||
;
|
||||
|
||||
@@ -910,19 +987,27 @@ type_qualifier_list
|
||||
declarator
|
||||
: pointer direct_declarator
|
||||
{
|
||||
Declarator *tail = $1;
|
||||
while (tail->child != NULL)
|
||||
tail = tail->child;
|
||||
tail->child = $2;
|
||||
$$ = $1;
|
||||
if ($1 != NULL) {
|
||||
Declarator *tail = $1;
|
||||
while (tail->child != NULL)
|
||||
tail = tail->child;
|
||||
tail->child = $2;
|
||||
$$ = $1;
|
||||
}
|
||||
else
|
||||
$$ = NULL;
|
||||
}
|
||||
| reference direct_declarator
|
||||
{
|
||||
Declarator *tail = $1;
|
||||
while (tail->child != NULL)
|
||||
tail = tail->child;
|
||||
tail->child = $2;
|
||||
$$ = $1;
|
||||
if ($1 != NULL) {
|
||||
Declarator *tail = $1;
|
||||
while (tail->child != NULL)
|
||||
tail = tail->child;
|
||||
tail->child = $2;
|
||||
$$ = $1;
|
||||
}
|
||||
else
|
||||
$$ = NULL;
|
||||
}
|
||||
| direct_declarator
|
||||
;
|
||||
@@ -971,12 +1056,17 @@ direct_declarator
|
||||
else
|
||||
$$ = NULL;
|
||||
}
|
||||
| direct_declarator '[' error ']'
|
||||
{
|
||||
$$ = NULL;
|
||||
}
|
||||
| direct_declarator '(' parameter_type_list ')'
|
||||
{
|
||||
if ($1 != NULL) {
|
||||
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @4));
|
||||
d->child = $1;
|
||||
if ($3 != NULL) d->functionParams = *$3;
|
||||
if ($3 != NULL)
|
||||
d->functionParams = *$3;
|
||||
$$ = d;
|
||||
}
|
||||
else
|
||||
@@ -992,6 +1082,10 @@ direct_declarator
|
||||
else
|
||||
$$ = NULL;
|
||||
}
|
||||
| direct_declarator '(' error ')'
|
||||
{
|
||||
$$ = NULL;
|
||||
}
|
||||
;
|
||||
|
||||
|
||||
@@ -1046,27 +1140,14 @@ parameter_list
|
||||
{
|
||||
std::vector<Declaration *> *dl = (std::vector<Declaration *> *)$1;
|
||||
if (dl == NULL)
|
||||
// dl may be NULL due to an earlier parse error...
|
||||
dl = new std::vector<Declaration *>;
|
||||
if ($3 != NULL)
|
||||
dl->push_back($3);
|
||||
$$ = dl;
|
||||
}
|
||||
| error
|
||||
| error ','
|
||||
{
|
||||
std::vector<std::string> builtinTokens;
|
||||
const char **token = lParamListTokens;
|
||||
while (*token) {
|
||||
builtinTokens.push_back(*token);
|
||||
++token;
|
||||
}
|
||||
if (strlen(yytext) == 0)
|
||||
Error(@1, "Syntax error--premature end of file.");
|
||||
else {
|
||||
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
|
||||
std::string alts = lGetAlternates(alternates);
|
||||
Error(@1, "Syntax error--token \"%s\" unexpected.%s", yytext, alts.c_str());
|
||||
}
|
||||
lSuggestParamListAlternates();
|
||||
$$ = NULL;
|
||||
}
|
||||
;
|
||||
@@ -1078,18 +1159,26 @@ parameter_declaration
|
||||
}
|
||||
| declaration_specifiers declarator '=' initializer
|
||||
{
|
||||
if ($2 != NULL)
|
||||
if ($1 != NULL && $2 != NULL) {
|
||||
$2->initExpr = $4;
|
||||
$$ = new Declaration($1, $2);
|
||||
|
||||
$$ = new Declaration($1, $2);
|
||||
}
|
||||
else
|
||||
$$ = NULL;
|
||||
}
|
||||
| declaration_specifiers abstract_declarator
|
||||
{
|
||||
$$ = new Declaration($1, $2);
|
||||
if ($1 != NULL && $2 != NULL)
|
||||
$$ = new Declaration($1, $2);
|
||||
else
|
||||
$$ = NULL;
|
||||
}
|
||||
| declaration_specifiers
|
||||
{
|
||||
$$ = new Declaration($1);
|
||||
if ($1 == NULL)
|
||||
$$ = NULL;
|
||||
else
|
||||
$$ = new Declaration($1);
|
||||
}
|
||||
;
|
||||
|
||||
@@ -1104,7 +1193,10 @@ type_name
|
||||
: specifier_qualifier_list
|
||||
| specifier_qualifier_list abstract_declarator
|
||||
{
|
||||
$$ = $2->GetType($1, NULL);
|
||||
if ($1 == NULL || $2 == NULL)
|
||||
$$ = NULL;
|
||||
else
|
||||
$$ = $2->GetType($1, NULL);
|
||||
}
|
||||
;
|
||||
|
||||
@@ -1116,20 +1208,27 @@ abstract_declarator
|
||||
| direct_abstract_declarator
|
||||
| pointer direct_abstract_declarator
|
||||
{
|
||||
Declarator *d = new Declarator(DK_POINTER, Union(@1, @2));
|
||||
d->child = $2;
|
||||
$$ = d;
|
||||
if ($2 == NULL)
|
||||
$$ = NULL;
|
||||
else {
|
||||
Declarator *d = new Declarator(DK_POINTER, Union(@1, @2));
|
||||
d->child = $2;
|
||||
$$ = d;
|
||||
}
|
||||
}
|
||||
| reference
|
||||
{
|
||||
Declarator *d = new Declarator(DK_REFERENCE, @1);
|
||||
$$ = d;
|
||||
$$ = new Declarator(DK_REFERENCE, @1);
|
||||
}
|
||||
| reference direct_abstract_declarator
|
||||
{
|
||||
Declarator *d = new Declarator(DK_REFERENCE, Union(@1, @2));
|
||||
d->child = $2;
|
||||
$$ = d;
|
||||
if ($2 == NULL)
|
||||
$$ = NULL;
|
||||
else {
|
||||
Declarator *d = new Declarator(DK_REFERENCE, Union(@1, @2));
|
||||
d->child = $2;
|
||||
$$ = d;
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
@@ -1161,15 +1260,19 @@ direct_abstract_declarator
|
||||
}
|
||||
| direct_abstract_declarator '[' ']'
|
||||
{
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @3));
|
||||
d->arraySize = 0;
|
||||
d->child = $1;
|
||||
$$ = d;
|
||||
if ($1 == NULL)
|
||||
$$ = NULL;
|
||||
else {
|
||||
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @3));
|
||||
d->arraySize = 0;
|
||||
d->child = $1;
|
||||
$$ = d;
|
||||
}
|
||||
}
|
||||
| direct_abstract_declarator '[' constant_expression ']'
|
||||
{
|
||||
int size;
|
||||
if ($3 != NULL && lGetConstantInt($3, &size, @3, "Array dimension")) {
|
||||
if ($1 != NULL && $3 != NULL && lGetConstantInt($3, &size, @3, "Array dimension")) {
|
||||
if (size < 0) {
|
||||
Error(@3, "Array dimension must be non-negative.");
|
||||
$$ = NULL;
|
||||
@@ -1190,19 +1293,28 @@ direct_abstract_declarator
|
||||
{
|
||||
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @3));
|
||||
if ($2 != NULL) d->functionParams = *$2;
|
||||
$$ = d;
|
||||
}
|
||||
| direct_abstract_declarator '(' ')'
|
||||
{
|
||||
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @3));
|
||||
d->child = $1;
|
||||
$$ = d;
|
||||
if ($1 == NULL)
|
||||
$$ = NULL;
|
||||
else {
|
||||
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @3));
|
||||
d->child = $1;
|
||||
$$ = d;
|
||||
}
|
||||
}
|
||||
| direct_abstract_declarator '(' parameter_type_list ')'
|
||||
{
|
||||
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @4));
|
||||
d->child = $1;
|
||||
if ($3 != NULL) d->functionParams = *$3;
|
||||
$$ = d;
|
||||
if ($1 == NULL)
|
||||
$$ = NULL;
|
||||
else {
|
||||
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @4));
|
||||
d->child = $1;
|
||||
if ($3 != NULL) d->functionParams = *$3;
|
||||
$$ = d;
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
@@ -1217,15 +1329,14 @@ initializer_list
|
||||
{ $$ = new ExprList($1, @1); }
|
||||
| initializer_list ',' initializer
|
||||
{
|
||||
if ($1 == NULL)
|
||||
$$ = NULL;
|
||||
else {
|
||||
ExprList *exprList = dynamic_cast<ExprList *>($1);
|
||||
Assert(exprList);
|
||||
exprList->exprs.push_back($3);
|
||||
exprList->pos = Union(exprList->pos, @3);
|
||||
$$ = exprList;
|
||||
ExprList *exprList = $1;
|
||||
if (exprList == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
exprList = new ExprList(@3);
|
||||
}
|
||||
exprList->exprs.push_back($3);
|
||||
exprList->pos = Union(exprList->pos, @3);
|
||||
$$ = exprList;
|
||||
}
|
||||
;
|
||||
|
||||
@@ -1240,21 +1351,10 @@ statement
|
||||
| print_statement
|
||||
| assert_statement
|
||||
| sync_statement
|
||||
| error
|
||||
| delete_statement
|
||||
| error ';'
|
||||
{
|
||||
std::vector<std::string> builtinTokens;
|
||||
const char **token = lBuiltinTokens;
|
||||
while (*token) {
|
||||
builtinTokens.push_back(*token);
|
||||
++token;
|
||||
}
|
||||
if (strlen(yytext) == 0)
|
||||
Error(@1, "Syntax error--premature end of file.");
|
||||
else {
|
||||
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
|
||||
std::string alts = lGetAlternates(alternates);
|
||||
Error(@1, "Syntax error--token \"%s\" unexpected.%s", yytext, alts.c_str());
|
||||
}
|
||||
lSuggestBuiltinAlternates();
|
||||
$$ = NULL;
|
||||
}
|
||||
;
|
||||
@@ -1300,15 +1400,19 @@ statement_list
|
||||
}
|
||||
| statement_list statement
|
||||
{
|
||||
if ($1 != NULL)
|
||||
((StmtList *)$1)->Add($2);
|
||||
$$ = $1;
|
||||
StmtList *sl = (StmtList *)$1;
|
||||
if (sl == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
sl = new StmtList(@2);
|
||||
}
|
||||
sl->Add($2);
|
||||
$$ = sl;
|
||||
}
|
||||
;
|
||||
|
||||
expression_statement
|
||||
: ';' { $$ = NULL; }
|
||||
| expression ';' { $$ = new ExprStmt($1, @1); }
|
||||
| expression ';' { $$ = $1 ? new ExprStmt($1, @1) : NULL; }
|
||||
;
|
||||
|
||||
selection_statement
|
||||
@@ -1374,7 +1478,14 @@ foreach_dimension_list
|
||||
}
|
||||
| foreach_dimension_list ',' foreach_dimension_specifier
|
||||
{
|
||||
$$->push_back($3);
|
||||
std::vector<ForeachDimension *> *dv = $1;
|
||||
if (dv == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
dv = new std::vector<ForeachDimension *>;
|
||||
}
|
||||
if ($3 != NULL)
|
||||
dv->push_back($3);
|
||||
$$ = dv;
|
||||
}
|
||||
;
|
||||
|
||||
@@ -1405,38 +1516,57 @@ iteration_statement
|
||||
}
|
||||
| foreach_scope '(' foreach_dimension_list ')'
|
||||
{
|
||||
std::vector<ForeachDimension *> &dims = *$3;
|
||||
for (unsigned int i = 0; i < dims.size(); ++i)
|
||||
m->symbolTable->AddVariable(dims[i]->sym);
|
||||
std::vector<ForeachDimension *> *dims = $3;
|
||||
if (dims == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
dims = new std::vector<ForeachDimension *>;
|
||||
}
|
||||
for (unsigned int i = 0; i < dims->size(); ++i)
|
||||
m->symbolTable->AddVariable((*dims)[i]->sym);
|
||||
}
|
||||
statement
|
||||
{
|
||||
std::vector<ForeachDimension *> &dims = *$3;
|
||||
std::vector<ForeachDimension *> *dims = $3;
|
||||
if (dims == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
dims = new std::vector<ForeachDimension *>;
|
||||
}
|
||||
|
||||
std::vector<Symbol *> syms;
|
||||
std::vector<Expr *> begins, ends;
|
||||
for (unsigned int i = 0; i < dims.size(); ++i) {
|
||||
syms.push_back(dims[i]->sym);
|
||||
begins.push_back(dims[i]->beginExpr);
|
||||
ends.push_back(dims[i]->endExpr);
|
||||
for (unsigned int i = 0; i < dims->size(); ++i) {
|
||||
syms.push_back((*dims)[i]->sym);
|
||||
begins.push_back((*dims)[i]->beginExpr);
|
||||
ends.push_back((*dims)[i]->endExpr);
|
||||
}
|
||||
$$ = new ForeachStmt(syms, begins, ends, $6, false, @1);
|
||||
m->symbolTable->PopScope();
|
||||
}
|
||||
| foreach_tiled_scope '(' foreach_dimension_list ')'
|
||||
{
|
||||
std::vector<ForeachDimension *> &dims = *$3;
|
||||
for (unsigned int i = 0; i < dims.size(); ++i)
|
||||
m->symbolTable->AddVariable(dims[i]->sym);
|
||||
std::vector<ForeachDimension *> *dims = $3;
|
||||
if (dims == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
dims = new std::vector<ForeachDimension *>;
|
||||
}
|
||||
|
||||
for (unsigned int i = 0; i < dims->size(); ++i)
|
||||
m->symbolTable->AddVariable((*dims)[i]->sym);
|
||||
}
|
||||
statement
|
||||
{
|
||||
std::vector<ForeachDimension *> &dims = *$3;
|
||||
std::vector<ForeachDimension *> *dims = $3;
|
||||
if (dims == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
dims = new std::vector<ForeachDimension *>;
|
||||
}
|
||||
|
||||
std::vector<Symbol *> syms;
|
||||
std::vector<Expr *> begins, ends;
|
||||
for (unsigned int i = 0; i < dims.size(); ++i) {
|
||||
syms.push_back(dims[i]->sym);
|
||||
begins.push_back(dims[i]->beginExpr);
|
||||
ends.push_back(dims[i]->endExpr);
|
||||
for (unsigned int i = 0; i < dims->size(); ++i) {
|
||||
syms.push_back((*dims)[i]->sym);
|
||||
begins.push_back((*dims)[i]->beginExpr);
|
||||
ends.push_back((*dims)[i]->endExpr);
|
||||
}
|
||||
$$ = new ForeachStmt(syms, begins, ends, $6, true, @1);
|
||||
m->symbolTable->PopScope();
|
||||
@@ -1469,23 +1599,30 @@ jump_statement
|
||||
;
|
||||
|
||||
sync_statement
|
||||
: TOKEN_SYNC
|
||||
: TOKEN_SYNC ';'
|
||||
{ $$ = new ExprStmt(new SyncExpr(@1), @1); }
|
||||
;
|
||||
|
||||
delete_statement
|
||||
: TOKEN_DELETE expression ';'
|
||||
{
|
||||
$$ = new DeleteStmt($2, Union(@1, @2));
|
||||
}
|
||||
;
|
||||
|
||||
print_statement
|
||||
: TOKEN_PRINT '(' string_constant ')'
|
||||
: TOKEN_PRINT '(' string_constant ')' ';'
|
||||
{
|
||||
$$ = new PrintStmt(*$3, NULL, @1);
|
||||
}
|
||||
| TOKEN_PRINT '(' string_constant ',' argument_expression_list ')'
|
||||
| TOKEN_PRINT '(' string_constant ',' argument_expression_list ')' ';'
|
||||
{
|
||||
$$ = new PrintStmt(*$3, $5, @1);
|
||||
}
|
||||
;
|
||||
|
||||
assert_statement
|
||||
: TOKEN_ASSERT '(' string_constant ',' expression ')'
|
||||
: TOKEN_ASSERT '(' string_constant ',' expression ')' ';'
|
||||
{
|
||||
$$ = new AssertStmt(*$3, $5, @1);
|
||||
}
|
||||
@@ -1494,22 +1631,7 @@ assert_statement
|
||||
translation_unit
|
||||
: external_declaration
|
||||
| translation_unit external_declaration
|
||||
| error
|
||||
{
|
||||
std::vector<std::string> builtinTokens;
|
||||
const char **token = lBuiltinTokens;
|
||||
while (*token) {
|
||||
builtinTokens.push_back(*token);
|
||||
++token;
|
||||
}
|
||||
if (strlen(yytext) == 0)
|
||||
Error(@1, "Syntax error--premature end of file.");
|
||||
else {
|
||||
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
|
||||
std::string alts = lGetAlternates(alternates);
|
||||
Error(@1, "Syntax error--token \"%s\" unexpected.%s", yytext, alts.c_str());
|
||||
}
|
||||
}
|
||||
| error ';'
|
||||
;
|
||||
|
||||
external_declaration
|
||||
@@ -1535,9 +1657,11 @@ function_definition
|
||||
compound_statement
|
||||
{
|
||||
std::vector<Symbol *> args;
|
||||
Symbol *sym = $2->GetFunctionInfo($1, &args);
|
||||
if (sym != NULL)
|
||||
m->AddFunctionDefinition(sym, args, $4);
|
||||
if ($2 != NULL) {
|
||||
Symbol *sym = $2->GetFunctionInfo($1, &args);
|
||||
if (sym != NULL)
|
||||
m->AddFunctionDefinition(sym, args, $4);
|
||||
}
|
||||
m->symbolTable->PopScope(); // push in lAddFunctionParams();
|
||||
}
|
||||
/* function with no declared return type??
|
||||
@@ -1553,6 +1677,93 @@ func(...)
|
||||
%%
|
||||
|
||||
|
||||
void yyerror(const char *s) {
|
||||
if (strlen(yytext) == 0)
|
||||
Error(yylloc, "Premature end of file: %s.", s);
|
||||
else
|
||||
Error(yylloc, "%s.", s);
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
lYYTNameErr (char *yyres, const char *yystr)
|
||||
{
|
||||
extern std::map<std::string, std::string> tokenNameRemap;
|
||||
Assert(tokenNameRemap.size() > 0);
|
||||
if (tokenNameRemap.find(yystr) != tokenNameRemap.end()) {
|
||||
std::string n = tokenNameRemap[yystr];
|
||||
if (yyres == NULL)
|
||||
return n.size();
|
||||
else
|
||||
return yystpcpy(yyres, n.c_str()) - yyres;
|
||||
}
|
||||
|
||||
if (*yystr == '"')
|
||||
{
|
||||
YYSIZE_T yyn = 0;
|
||||
char const *yyp = yystr;
|
||||
|
||||
for (;;)
|
||||
switch (*++yyp)
|
||||
{
|
||||
case '\'':
|
||||
case ',':
|
||||
goto do_not_strip_quotes;
|
||||
|
||||
case '\\':
|
||||
if (*++yyp != '\\')
|
||||
goto do_not_strip_quotes;
|
||||
/* Fall through. */
|
||||
default:
|
||||
if (yyres)
|
||||
yyres[yyn] = *yyp;
|
||||
yyn++;
|
||||
break;
|
||||
|
||||
case '"':
|
||||
if (yyres)
|
||||
yyres[yyn] = '\0';
|
||||
return yyn;
|
||||
}
|
||||
do_not_strip_quotes: ;
|
||||
}
|
||||
|
||||
if (! yyres)
|
||||
return yystrlen (yystr);
|
||||
|
||||
return yystpcpy (yyres, yystr) - yyres;
|
||||
}
|
||||
|
||||
static void
|
||||
lSuggestBuiltinAlternates() {
|
||||
std::vector<std::string> builtinTokens;
|
||||
const char **token = lBuiltinTokens;
|
||||
while (*token) {
|
||||
builtinTokens.push_back(*token);
|
||||
++token;
|
||||
}
|
||||
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
|
||||
std::string alts = lGetAlternates(alternates);
|
||||
if (alts.size() > 0)
|
||||
Error(yylloc, "%s", alts.c_str());
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lSuggestParamListAlternates() {
|
||||
std::vector<std::string> builtinTokens;
|
||||
const char **token = lParamListTokens;
|
||||
while (*token) {
|
||||
builtinTokens.push_back(*token);
|
||||
++token;
|
||||
}
|
||||
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
|
||||
std::string alts = lGetAlternates(alternates);
|
||||
if (alts.size() > 0)
|
||||
Error(yylloc, "%s", alts.c_str());
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lAddDeclaration(DeclSpecs *ds, Declarator *decl) {
|
||||
if (ds == NULL || decl == NULL)
|
||||
@@ -1576,7 +1787,10 @@ lAddDeclaration(DeclSpecs *ds, Declarator *decl) {
|
||||
m->AddFunctionDeclaration(sym, isInline);
|
||||
}
|
||||
else {
|
||||
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
|
||||
if (sym->type == NULL)
|
||||
Assert(m->errorCount > 0);
|
||||
else
|
||||
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
|
||||
bool isConst = (ds->typeQualifiers & TYPEQUAL_CONST) != 0;
|
||||
m->AddGlobalVariable(sym, decl->initExpr, isConst);
|
||||
}
|
||||
@@ -1591,6 +1805,11 @@ static void
|
||||
lAddFunctionParams(Declarator *decl) {
|
||||
m->symbolTable->PushScope();
|
||||
|
||||
if (decl == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
// walk down to the declarator for the function itself
|
||||
while (decl->kind != DK_FUNCTION && decl->child != NULL)
|
||||
decl = decl->child;
|
||||
@@ -1605,14 +1824,18 @@ lAddFunctionParams(Declarator *decl) {
|
||||
continue;
|
||||
Assert(pdecl->declarators.size() == 1);
|
||||
Symbol *sym = pdecl->declarators[0]->GetSymbol();
|
||||
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
|
||||
#ifndef NDEBUG
|
||||
bool ok = m->symbolTable->AddVariable(sym);
|
||||
if (ok == false)
|
||||
if (sym == NULL || sym->type == NULL)
|
||||
Assert(m->errorCount > 0);
|
||||
else {
|
||||
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
|
||||
#ifndef NDEBUG
|
||||
bool ok = m->symbolTable->AddVariable(sym);
|
||||
if (ok == false)
|
||||
Assert(m->errorCount > 0);
|
||||
#else
|
||||
m->symbolTable->AddVariable(sym);
|
||||
m->symbolTable->AddVariable(sym);
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
// The corresponding pop scope happens in function_definition rules
|
||||
@@ -1622,7 +1845,7 @@ lAddFunctionParams(Declarator *decl) {
|
||||
|
||||
/** Add a symbol for the built-in mask variable to the symbol table */
|
||||
static void lAddMaskToSymbolTable(SourcePos pos) {
|
||||
const Type *t = g->target.isa == Target::GENERIC ?
|
||||
const Type *t = g->target.maskBitCount == 1 ?
|
||||
AtomicType::VaryingConstBool : AtomicType::VaryingConstUInt32;
|
||||
Symbol *maskSymbol = new Symbol("__mask", pos, t);
|
||||
m->symbolTable->AddVariable(maskSymbol);
|
||||
|
||||
43
run_tests.py
43
run_tests.py
@@ -42,23 +42,19 @@ parser.add_option('-j', '--jobs', dest='num_jobs', help='Maximum number of jobs
|
||||
default="1024", type="int")
|
||||
parser.add_option('-v', '--verbose', dest='verbose', help='Enable verbose output',
|
||||
default=False, action="store_true")
|
||||
if not is_windows:
|
||||
parser.add_option('--valgrind', dest='valgrind', help='Run tests with valgrind',
|
||||
default=False, action="store_true")
|
||||
parser.add_option('--wrap-exe', dest='wrapexe',
|
||||
help='Executable to wrap test runs with (e.g. "valgrind")',
|
||||
default="")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
if not is_windows and options.valgrind:
|
||||
valgrind_exe = "valgrind "
|
||||
else:
|
||||
valgrind_exe = ""
|
||||
|
||||
if not is_windows:
|
||||
ispc_exe = "./ispc"
|
||||
else:
|
||||
ispc_exe = "../Release/ispc.exe"
|
||||
|
||||
is_generic_target = options.target.find("generic-") != -1
|
||||
is_generic_target = (options.target.find("generic-") != -1 and
|
||||
options.target != "generic-1")
|
||||
if is_generic_target and options.include_file == None:
|
||||
if options.target == "generic-4":
|
||||
sys.stderr.write("No generics #include specified; using examples/intrinsics/sse4.h\n")
|
||||
@@ -76,14 +72,31 @@ if options.compiler_exe == None:
|
||||
else:
|
||||
options.compiler_exe = "g++"
|
||||
|
||||
# if no specific test files are specified, run all of the tests in tests/
|
||||
# and failing_tests/
|
||||
def fix_windows_paths(files):
|
||||
ret = [ ]
|
||||
for fn in files:
|
||||
ret += [ string.replace(fn, '\\', '/') ]
|
||||
return ret
|
||||
|
||||
|
||||
# if no specific test files are specified, run all of the tests in tests/,
|
||||
# failing_tests/, and tests_errors/
|
||||
if len(args) == 0:
|
||||
files = glob.glob("tests/*ispc") + glob.glob("failing_tests/*ispc") + \
|
||||
glob.glob("tests_errors/*ispc")
|
||||
files = fix_windows_paths(files)
|
||||
else:
|
||||
if is_windows:
|
||||
argfiles = [ ]
|
||||
for f in args:
|
||||
# we have to glob ourselves if this is being run under a DOS
|
||||
# shell..
|
||||
argfiles += glob.glob(f)
|
||||
else:
|
||||
argfiles = args
|
||||
|
||||
files = [ ]
|
||||
for f in args:
|
||||
for f in argfiles:
|
||||
if os.path.splitext(string.lower(f))[1] != ".ispc":
|
||||
sys.stdout.write("Ignoring file %s, which doesn't have an .ispc extension.\n" % f)
|
||||
else:
|
||||
@@ -103,6 +116,7 @@ finished_tests_counter_lock = multiprocessing.Lock()
|
||||
# utility routine to print an update on the number of tests that have been
|
||||
# finished. Should be called with the lock held..
|
||||
def update_progress(fn):
|
||||
global total_tests
|
||||
finished_tests_counter.value = finished_tests_counter.value + 1
|
||||
progress_str = " Done %d / %d [%s]" % (finished_tests_counter.value, total_tests, fn)
|
||||
# spaces to clear out detrius from previous printing...
|
||||
@@ -211,7 +225,7 @@ def run_test(filename):
|
||||
"in test %s\n" % filename)
|
||||
return (1, 0)
|
||||
else:
|
||||
is_generic_target = options.target.find("generic-") != -1
|
||||
global is_generic_target
|
||||
if is_generic_target:
|
||||
obj_name = "%s.cpp" % filename
|
||||
|
||||
@@ -248,9 +262,8 @@ def run_test(filename):
|
||||
ispc_cmd += " --emit-c++ --c++-include-file=%s" % options.include_file
|
||||
|
||||
# compile the ispc code, make the executable, and run it...
|
||||
global valgrind_exe
|
||||
(compile_error, run_error) = run_cmds([ispc_cmd, cc_cmd],
|
||||
valgrind_exe + " " + exe_name, \
|
||||
options.wrapexe + " " + exe_name, \
|
||||
filename, should_fail)
|
||||
|
||||
# clean up after running the test
|
||||
|
||||
649
stdlib.ispc
649
stdlib.ispc
@@ -795,217 +795,6 @@ static inline uniform int64 clock() {
|
||||
return __clock();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Atomics and memory barriers
|
||||
|
||||
static inline void memory_barrier() {
|
||||
__memory_barrier();
|
||||
}
|
||||
|
||||
#define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE) \
|
||||
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
||||
memory_barrier(); \
|
||||
TA ret = __atomic_##OPB##_##TB##_global(ptr, value, (MASKTYPE)__mask); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
||||
uniform TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
|
||||
uniform TA * uniform ptrArray[programCount]; \
|
||||
ptrArray[programIndex] = ptr; \
|
||||
memory_barrier(); \
|
||||
TA ret; \
|
||||
uniform int mask = lanemask(); \
|
||||
for (uniform int i = 0; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
uniform TA * uniform p = ptrArray[i]; \
|
||||
uniform TA v = extract(value, i); \
|
||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
|
||||
#define DEFINE_ATOMIC_SWAP(TA,TB) \
|
||||
static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform int i = 0; \
|
||||
TA ret[programCount]; \
|
||||
TA memVal; \
|
||||
uniform int lastSwap; \
|
||||
uniform int mask = lanemask(); \
|
||||
/* First, have the first running program instance (if any) perform \
|
||||
the swap with memory with its value of "value"; record the \
|
||||
value returned. */ \
|
||||
for (; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
memVal = __atomic_swap_uniform_##TB##_global(ptr, extract(value, i)); \
|
||||
lastSwap = i; \
|
||||
break; \
|
||||
} \
|
||||
/* Now, for all of the remaining running program instances, set the \
|
||||
return value of the last instance that did a swap with this \
|
||||
instance's value of "value"; this gives the same effect as if the \
|
||||
current instance had executed a hardware atomic swap right before \
|
||||
the last one that did a swap. */ \
|
||||
for (; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
ret[lastSwap] = extract(value, i); \
|
||||
lastSwap = i; \
|
||||
} \
|
||||
/* And the last instance that wanted to swap gets the value we \
|
||||
originally got back from memory... */ \
|
||||
ret[lastSwap] = memVal; \
|
||||
memory_barrier(); \
|
||||
return ret[programIndex]; \
|
||||
} \
|
||||
static inline uniform TA atomic_swap_global(uniform TA * uniform ptr, \
|
||||
uniform TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = __atomic_swap_uniform_##TB##_global(ptr, value); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
|
||||
uniform TA * uniform ptrArray[programCount]; \
|
||||
ptrArray[programIndex] = ptr; \
|
||||
memory_barrier(); \
|
||||
TA ret; \
|
||||
uniform int mask = lanemask(); \
|
||||
for (uniform int i = 0; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
uniform TA * uniform p = ptrArray[i]; \
|
||||
uniform TA v = extract(value, i); \
|
||||
uniform TA r = __atomic_swap_uniform_##TB##_global(p, v); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
|
||||
#define DEFINE_ATOMIC_MINMAX_OP(TA,TB,OPA,OPB) \
|
||||
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
||||
uniform TA oneval = reduce_##OPA(value); \
|
||||
TA ret; \
|
||||
if (lanemask() != 0) { \
|
||||
memory_barrier(); \
|
||||
ret = __atomic_##OPB##_uniform_##TB##_global(ptr, oneval); \
|
||||
memory_barrier(); \
|
||||
} \
|
||||
return ret; \
|
||||
} \
|
||||
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
||||
uniform TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
|
||||
TA value) { \
|
||||
uniform TA * uniform ptrArray[programCount]; \
|
||||
ptrArray[programIndex] = ptr; \
|
||||
memory_barrier(); \
|
||||
TA ret; \
|
||||
uniform int mask = lanemask(); \
|
||||
for (uniform int i = 0; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
uniform TA * uniform p = ptrArray[i]; \
|
||||
uniform TA v = extract(value, i); \
|
||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
DEFINE_ATOMIC_OP(int32,int32,add,add,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int32,int32,subtract,sub,IntMaskType)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int32,int32,min,min)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int32,int32,max,max)
|
||||
DEFINE_ATOMIC_OP(int32,int32,and,and,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int32,int32,or,or,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int32,int32,xor,xor,IntMaskType)
|
||||
DEFINE_ATOMIC_SWAP(int32,int32)
|
||||
|
||||
// For everything but atomic min and max, we can use the same
|
||||
// implementations for unsigned as for signed.
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,add,add,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,subtract,sub,UIntMaskType)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,min,umin)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,max,umax)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,and,and,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,or,or,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor,UIntMaskType)
|
||||
DEFINE_ATOMIC_SWAP(unsigned int32,int32)
|
||||
|
||||
DEFINE_ATOMIC_SWAP(float,float)
|
||||
|
||||
DEFINE_ATOMIC_OP(int64,int64,add,add,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int64,int64,subtract,sub,IntMaskType)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int64,int64,min,min)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int64,int64,max,max)
|
||||
DEFINE_ATOMIC_OP(int64,int64,and,and,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int64,int64,or,or,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int64,int64,xor,xor,IntMaskType)
|
||||
DEFINE_ATOMIC_SWAP(int64,int64)
|
||||
|
||||
// For everything but atomic min and max, we can use the same
|
||||
// implementations for unsigned as for signed.
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,add,add,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,subtract,sub,UIntMaskType)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,min,umin)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,max,umax)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,and,and,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,or,or,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor,UIntMaskType)
|
||||
DEFINE_ATOMIC_SWAP(unsigned int64,int64)
|
||||
|
||||
DEFINE_ATOMIC_SWAP(double,double)
|
||||
|
||||
#undef DEFINE_ATOMIC_OP
|
||||
#undef DEFINE_ATOMIC_MINMAX_OP
|
||||
#undef DEFINE_ATOMIC_SWAP
|
||||
|
||||
#define ATOMIC_DECL_CMPXCHG(TA, TB, MASKTYPE) \
|
||||
static inline TA atomic_compare_exchange_global( \
|
||||
uniform TA * uniform ptr, TA oldval, TA newval) { \
|
||||
memory_barrier(); \
|
||||
TA ret = __atomic_compare_exchange_##TB##_global(ptr, oldval, newval, \
|
||||
(MASKTYPE)__mask); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline uniform TA atomic_compare_exchange_global( \
|
||||
uniform TA * uniform ptr, uniform TA oldval, uniform TA newval) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = \
|
||||
__atomic_compare_exchange_uniform_##TB##_global(ptr, oldval, newval); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
ATOMIC_DECL_CMPXCHG(int32, int32, IntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(unsigned int32, int32, UIntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(float, float, IntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(int64, int64, IntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(unsigned int64, int64, UIntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(double, double, IntMaskType)
|
||||
|
||||
#undef ATOMIC_DECL_CMPXCHG
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Floating-Point Math
|
||||
|
||||
@@ -1389,6 +1178,419 @@ static inline uniform int64 clamp(uniform int64 v, uniform int64 low,
|
||||
return min(max(v, low), high);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Global atomics and memory barriers
|
||||
|
||||
static inline void memory_barrier() {
|
||||
__memory_barrier();
|
||||
}
|
||||
|
||||
#define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE) \
|
||||
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
||||
memory_barrier(); \
|
||||
TA ret = __atomic_##OPB##_##TB##_global(ptr, value, (MASKTYPE)__mask); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
||||
uniform TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
|
||||
uniform TA * uniform ptrArray[programCount]; \
|
||||
ptrArray[programIndex] = ptr; \
|
||||
memory_barrier(); \
|
||||
TA ret; \
|
||||
uniform int mask = lanemask(); \
|
||||
for (uniform int i = 0; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
uniform TA * uniform p = ptrArray[i]; \
|
||||
uniform TA v = extract(value, i); \
|
||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
|
||||
#define DEFINE_ATOMIC_SWAP(TA,TB) \
|
||||
static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform int i = 0; \
|
||||
TA ret[programCount]; \
|
||||
TA memVal; \
|
||||
uniform int lastSwap; \
|
||||
uniform int mask = lanemask(); \
|
||||
/* First, have the first running program instance (if any) perform \
|
||||
the swap with memory with its value of "value"; record the \
|
||||
value returned. */ \
|
||||
for (; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
memVal = __atomic_swap_uniform_##TB##_global(ptr, extract(value, i)); \
|
||||
lastSwap = i; \
|
||||
break; \
|
||||
} \
|
||||
/* Now, for all of the remaining running program instances, set the \
|
||||
return value of the last instance that did a swap with this \
|
||||
instance's value of "value"; this gives the same effect as if the \
|
||||
current instance had executed a hardware atomic swap right before \
|
||||
the last one that did a swap. */ \
|
||||
for (; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
ret[lastSwap] = extract(value, i); \
|
||||
lastSwap = i; \
|
||||
} \
|
||||
/* And the last instance that wanted to swap gets the value we \
|
||||
originally got back from memory... */ \
|
||||
ret[lastSwap] = memVal; \
|
||||
memory_barrier(); \
|
||||
return ret[programIndex]; \
|
||||
} \
|
||||
static inline uniform TA atomic_swap_global(uniform TA * uniform ptr, \
|
||||
uniform TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = __atomic_swap_uniform_##TB##_global(ptr, value); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
|
||||
uniform TA * uniform ptrArray[programCount]; \
|
||||
ptrArray[programIndex] = ptr; \
|
||||
memory_barrier(); \
|
||||
TA ret; \
|
||||
uniform int mask = lanemask(); \
|
||||
for (uniform int i = 0; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
uniform TA * uniform p = ptrArray[i]; \
|
||||
uniform TA v = extract(value, i); \
|
||||
uniform TA r = __atomic_swap_uniform_##TB##_global(p, v); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
|
||||
#define DEFINE_ATOMIC_MINMAX_OP(TA,TB,OPA,OPB) \
|
||||
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
||||
uniform TA oneval = reduce_##OPA(value); \
|
||||
TA ret; \
|
||||
if (lanemask() != 0) { \
|
||||
memory_barrier(); \
|
||||
ret = __atomic_##OPB##_uniform_##TB##_global(ptr, oneval); \
|
||||
memory_barrier(); \
|
||||
} \
|
||||
return ret; \
|
||||
} \
|
||||
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
||||
uniform TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
|
||||
TA value) { \
|
||||
uniform TA * uniform ptrArray[programCount]; \
|
||||
ptrArray[programIndex] = ptr; \
|
||||
memory_barrier(); \
|
||||
TA ret; \
|
||||
uniform int mask = lanemask(); \
|
||||
for (uniform int i = 0; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
uniform TA * uniform p = ptrArray[i]; \
|
||||
uniform TA v = extract(value, i); \
|
||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
DEFINE_ATOMIC_OP(int32,int32,add,add,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int32,int32,subtract,sub,IntMaskType)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int32,int32,min,min)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int32,int32,max,max)
|
||||
DEFINE_ATOMIC_OP(int32,int32,and,and,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int32,int32,or,or,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int32,int32,xor,xor,IntMaskType)
|
||||
DEFINE_ATOMIC_SWAP(int32,int32)
|
||||
|
||||
// For everything but atomic min and max, we can use the same
|
||||
// implementations for unsigned as for signed.
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,add,add,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,subtract,sub,UIntMaskType)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,min,umin)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,max,umax)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,and,and,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,or,or,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor,UIntMaskType)
|
||||
DEFINE_ATOMIC_SWAP(unsigned int32,int32)
|
||||
|
||||
DEFINE_ATOMIC_SWAP(float,float)
|
||||
|
||||
DEFINE_ATOMIC_OP(int64,int64,add,add,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int64,int64,subtract,sub,IntMaskType)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int64,int64,min,min)
|
||||
DEFINE_ATOMIC_MINMAX_OP(int64,int64,max,max)
|
||||
DEFINE_ATOMIC_OP(int64,int64,and,and,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int64,int64,or,or,IntMaskType)
|
||||
DEFINE_ATOMIC_OP(int64,int64,xor,xor,IntMaskType)
|
||||
DEFINE_ATOMIC_SWAP(int64,int64)
|
||||
|
||||
// For everything but atomic min and max, we can use the same
|
||||
// implementations for unsigned as for signed.
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,add,add,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,subtract,sub,UIntMaskType)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,min,umin)
|
||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,max,umax)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,and,and,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,or,or,UIntMaskType)
|
||||
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor,UIntMaskType)
|
||||
DEFINE_ATOMIC_SWAP(unsigned int64,int64)
|
||||
|
||||
DEFINE_ATOMIC_SWAP(double,double)
|
||||
|
||||
#undef DEFINE_ATOMIC_OP
|
||||
#undef DEFINE_ATOMIC_MINMAX_OP
|
||||
#undef DEFINE_ATOMIC_SWAP
|
||||
|
||||
#define ATOMIC_DECL_CMPXCHG(TA, TB, MASKTYPE) \
|
||||
static inline uniform TA atomic_compare_exchange_global( \
|
||||
uniform TA * uniform ptr, uniform TA oldval, uniform TA newval) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = \
|
||||
__atomic_compare_exchange_uniform_##TB##_global(ptr, oldval, newval); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_compare_exchange_global( \
|
||||
uniform TA * uniform ptr, TA oldval, TA newval) { \
|
||||
memory_barrier(); \
|
||||
TA ret = __atomic_compare_exchange_##TB##_global(ptr, oldval, newval, \
|
||||
(MASKTYPE)__mask); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_compare_exchange_global( \
|
||||
uniform TA * varying ptr, TA oldval, TA newval) { \
|
||||
uniform TA * uniform ptrArray[programCount]; \
|
||||
ptrArray[programIndex] = ptr; \
|
||||
memory_barrier(); \
|
||||
TA ret; \
|
||||
uniform int mask = lanemask(); \
|
||||
for (uniform int i = 0; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
uniform TA r = \
|
||||
__atomic_compare_exchange_uniform_##TB##_global(ptrArray[i], \
|
||||
extract(oldval, i), \
|
||||
extract(newval, i)); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
ATOMIC_DECL_CMPXCHG(int32, int32, IntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(unsigned int32, int32, UIntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(float, float, IntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(int64, int64, IntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(unsigned int64, int64, UIntMaskType)
|
||||
ATOMIC_DECL_CMPXCHG(double, double, IntMaskType)
|
||||
|
||||
#undef ATOMIC_DECL_CMPXCHG
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// local atomics
|
||||
|
||||
#define LOCAL_ATOMIC(TYPE,NAME,OPFUNC) \
|
||||
static inline uniform TYPE atomic_##NAME##_local(uniform TYPE * uniform ptr, \
|
||||
uniform TYPE value) { \
|
||||
uniform TYPE ret = *ptr; \
|
||||
*ptr = OPFUNC(*ptr, value); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TYPE atomic_##NAME##_local(uniform TYPE * uniform ptr, TYPE value) { \
|
||||
TYPE ret; \
|
||||
uniform int mask = lanemask(); \
|
||||
for (uniform int i = 0; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
ret = insert(ret, i, *ptr); \
|
||||
*ptr = OPFUNC(*ptr, extract(value, i)); \
|
||||
} \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TYPE atomic_##NAME##_local(uniform TYPE * p, TYPE value) { \
|
||||
TYPE ret; \
|
||||
uniform TYPE * uniform ptrs[programCount]; \
|
||||
ptrs[programIndex] = p; \
|
||||
uniform int mask = lanemask(); \
|
||||
for (uniform int i = 0; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
ret = insert(ret, i, *ptrs[i]); \
|
||||
*ptrs[i] = OPFUNC(*ptrs[i], extract(value, i)); \
|
||||
} \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
static inline uniform int32 __add(uniform int32 a, uniform int32 b) { return a+b; }
|
||||
static inline uniform int32 __sub(uniform int32 a, uniform int32 b) { return a-b; }
|
||||
static inline uniform int32 __and(uniform int32 a, uniform int32 b) { return a & b; }
|
||||
static inline uniform int32 __or(uniform int32 a, uniform int32 b) { return a | b; }
|
||||
static inline uniform int32 __xor(uniform int32 a, uniform int32 b) { return a ^ b; }
|
||||
static inline uniform int32 __swap(uniform int32 a, uniform int32 b) { return b; }
|
||||
|
||||
static inline uniform unsigned int32 __add(uniform unsigned int32 a,
|
||||
uniform unsigned int32 b) { return a+b; }
|
||||
static inline uniform unsigned int32 __sub(uniform unsigned int32 a,
|
||||
uniform unsigned int32 b) { return a-b; }
|
||||
static inline uniform unsigned int32 __and(uniform unsigned int32 a,
|
||||
uniform unsigned int32 b) { return a & b; }
|
||||
static inline uniform unsigned int32 __or(uniform unsigned int32 a,
|
||||
uniform unsigned int32 b) { return a | b; }
|
||||
static inline uniform unsigned int32 __xor(uniform unsigned int32 a,
|
||||
uniform unsigned int32 b) { return a ^ b; }
|
||||
static inline uniform unsigned int32 __swap(uniform unsigned int32 a,
|
||||
uniform unsigned int32 b) { return b; }
|
||||
|
||||
|
||||
static inline uniform float __add(uniform float a, uniform float b) { return a+b; }
|
||||
static inline uniform float __sub(uniform float a, uniform float b) { return a-b; }
|
||||
static inline uniform float __swap(uniform float a, uniform float b) { return b; }
|
||||
|
||||
static inline uniform int64 __add(uniform int64 a, uniform int64 b) { return a+b; }
|
||||
static inline uniform int64 __sub(uniform int64 a, uniform int64 b) { return a-b; }
|
||||
static inline uniform int64 __and(uniform int64 a, uniform int64 b) { return a & b; }
|
||||
static inline uniform int64 __or(uniform int64 a, uniform int64 b) { return a | b; }
|
||||
static inline uniform int64 __xor(uniform int64 a, uniform int64 b) { return a ^ b; }
|
||||
static inline uniform int64 __swap(uniform int64 a, uniform int64 b) { return b; }
|
||||
|
||||
static inline uniform unsigned int64 __add(uniform unsigned int64 a,
|
||||
uniform unsigned int64 b) { return a+b; }
|
||||
static inline uniform unsigned int64 __sub(uniform unsigned int64 a,
|
||||
uniform unsigned int64 b) { return a-b; }
|
||||
static inline uniform unsigned int64 __and(uniform unsigned int64 a,
|
||||
uniform unsigned int64 b) { return a & b; }
|
||||
static inline uniform unsigned int64 __or(uniform unsigned int64 a,
|
||||
uniform unsigned int64 b) { return a | b; }
|
||||
static inline uniform unsigned int64 __xor(uniform unsigned int64 a,
|
||||
uniform unsigned int64 b) { return a ^ b; }
|
||||
static inline uniform unsigned int64 __swap(uniform unsigned int64 a,
|
||||
uniform unsigned int64 b) { return b; }
|
||||
|
||||
static inline uniform double __add(uniform double a, uniform double b) { return a+b; }
|
||||
static inline uniform double __sub(uniform double a, uniform double b) { return a-b; }
|
||||
static inline uniform double __swap(uniform double a, uniform double b) { return a-b; }
|
||||
|
||||
LOCAL_ATOMIC(int32, add, __add)
|
||||
LOCAL_ATOMIC(int32, subtract, __sub)
|
||||
LOCAL_ATOMIC(int32, and, __and)
|
||||
LOCAL_ATOMIC(int32, or, __or)
|
||||
LOCAL_ATOMIC(int32, xor, __xor)
|
||||
LOCAL_ATOMIC(int32, min, min)
|
||||
LOCAL_ATOMIC(int32, max, max)
|
||||
LOCAL_ATOMIC(int32, swap, __swap)
|
||||
|
||||
LOCAL_ATOMIC(unsigned int32, add, __add)
|
||||
LOCAL_ATOMIC(unsigned int32, subtract, __sub)
|
||||
LOCAL_ATOMIC(unsigned int32, and, __and)
|
||||
LOCAL_ATOMIC(unsigned int32, or, __or)
|
||||
LOCAL_ATOMIC(unsigned int32, xor, __xor)
|
||||
LOCAL_ATOMIC(unsigned int32, min, min)
|
||||
LOCAL_ATOMIC(unsigned int32, max, max)
|
||||
LOCAL_ATOMIC(unsigned int32, swap, __swap)
|
||||
|
||||
LOCAL_ATOMIC(float, add, __add)
|
||||
LOCAL_ATOMIC(float, subtract, __sub)
|
||||
LOCAL_ATOMIC(float, min, min)
|
||||
LOCAL_ATOMIC(float, max, max)
|
||||
LOCAL_ATOMIC(float, swap, __swap)
|
||||
|
||||
LOCAL_ATOMIC(int64, add, __add)
|
||||
LOCAL_ATOMIC(int64, subtract, __sub)
|
||||
LOCAL_ATOMIC(int64, and, __and)
|
||||
LOCAL_ATOMIC(int64, or, __or)
|
||||
LOCAL_ATOMIC(int64, xor, __xor)
|
||||
LOCAL_ATOMIC(int64, min, min)
|
||||
LOCAL_ATOMIC(int64, max, max)
|
||||
LOCAL_ATOMIC(int64, swap, __swap)
|
||||
|
||||
LOCAL_ATOMIC(unsigned int64, add, __add)
|
||||
LOCAL_ATOMIC(unsigned int64, subtract, __sub)
|
||||
LOCAL_ATOMIC(unsigned int64, and, __and)
|
||||
LOCAL_ATOMIC(unsigned int64, or, __or)
|
||||
LOCAL_ATOMIC(unsigned int64, xor, __xor)
|
||||
LOCAL_ATOMIC(unsigned int64, min, min)
|
||||
LOCAL_ATOMIC(unsigned int64, max, max)
|
||||
LOCAL_ATOMIC(unsigned int64, swap, __swap)
|
||||
|
||||
LOCAL_ATOMIC(double, add, __add)
|
||||
LOCAL_ATOMIC(double, subtract, __sub)
|
||||
LOCAL_ATOMIC(double, min, min)
|
||||
LOCAL_ATOMIC(double, max, max)
|
||||
LOCAL_ATOMIC(double, swap, __swap)
|
||||
|
||||
// compare exchange
|
||||
#define LOCAL_CMPXCHG(TYPE) \
|
||||
static inline uniform TYPE atomic_compare_exchange_local(uniform TYPE * uniform ptr, \
|
||||
uniform TYPE cmp, \
|
||||
uniform TYPE update) { \
|
||||
uniform TYPE old = *ptr; \
|
||||
if (old == cmp) \
|
||||
*ptr = update; \
|
||||
return old; \
|
||||
} \
|
||||
static inline TYPE atomic_compare_exchange_local(uniform TYPE * uniform ptr, \
|
||||
TYPE cmp, TYPE update) { \
|
||||
TYPE ret; \
|
||||
uniform int mask = lanemask(); \
|
||||
for (uniform int i = 0; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
uniform TYPE old = *ptr; \
|
||||
if (old == extract(cmp, i)) \
|
||||
*ptr = extract(update, i); \
|
||||
ret = insert(ret, i, old); \
|
||||
} \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TYPE atomic_compare_exchange_local(uniform TYPE * varying p, \
|
||||
TYPE cmp, TYPE update) { \
|
||||
uniform TYPE * uniform ptrs[programCount]; \
|
||||
ptrs[programIndex] = p; \
|
||||
TYPE ret; \
|
||||
uniform int mask = lanemask(); \
|
||||
for (uniform int i = 0; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
continue; \
|
||||
uniform TYPE old = *ptrs[i]; \
|
||||
if (old == extract(cmp, i)) \
|
||||
*ptrs[i] = extract(update, i); \
|
||||
ret = insert(ret, i, old); \
|
||||
} \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
LOCAL_CMPXCHG(int32)
|
||||
LOCAL_CMPXCHG(unsigned int32)
|
||||
LOCAL_CMPXCHG(float)
|
||||
LOCAL_CMPXCHG(int64)
|
||||
LOCAL_CMPXCHG(unsigned int64)
|
||||
LOCAL_CMPXCHG(double)
|
||||
|
||||
#undef LOCAL_ATOMIC
|
||||
#undef LOCAL_CMPXCHG
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Transcendentals (float precision)
|
||||
|
||||
@@ -3246,14 +3448,23 @@ static inline uniform unsigned int __seed4(RNGState * uniform state,
|
||||
}
|
||||
|
||||
static inline void seed_rng(uniform RNGState * uniform state, uniform unsigned int seed) {
|
||||
seed = __seed4(state, 0, seed);
|
||||
if (programCount == 8)
|
||||
__seed4(state, 4, seed ^ 0xbeeff00d);
|
||||
if (programCount == 16) {
|
||||
__seed4(state, 4, seed ^ 0xbeeff00d);
|
||||
__seed4(state, 8, ((seed & 0xffff) << 16) | (seed >> 16));
|
||||
__seed4(state, 12, (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
|
||||
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24));
|
||||
if (programCount == 1) {
|
||||
state->z1 = seed;
|
||||
state->z2 = seed ^ 0xbeeff00d;
|
||||
state->z3 = ((seed & 0xffff) << 16) | (seed >> 16);
|
||||
state->z4 = (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
|
||||
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24);
|
||||
}
|
||||
else {
|
||||
seed = __seed4(state, 0, seed);
|
||||
if (programCount == 8)
|
||||
__seed4(state, 4, seed ^ 0xbeeff00d);
|
||||
if (programCount == 16) {
|
||||
__seed4(state, 4, seed ^ 0xbeeff00d);
|
||||
__seed4(state, 8, ((seed & 0xffff) << 16) | (seed >> 16));
|
||||
__seed4(state, 12, (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
|
||||
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
341
stmt.cpp
341
stmt.cpp
@@ -119,153 +119,6 @@ DeclStmt::DeclStmt(const std::vector<VariableDeclaration> &v, SourcePos p)
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
lPossiblyResolveFunctionOverloads(Expr *expr, const Type *type) {
|
||||
FunctionSymbolExpr *fse = NULL;
|
||||
const FunctionType *funcType = NULL;
|
||||
if (dynamic_cast<const PointerType *>(type) != NULL &&
|
||||
(funcType = dynamic_cast<const FunctionType *>(type->GetBaseType())) &&
|
||||
(fse = dynamic_cast<FunctionSymbolExpr *>(expr)) != NULL) {
|
||||
// We're initializing a function pointer with a function symbol,
|
||||
// which in turn may represent an overloaded function. So we need
|
||||
// to try to resolve the overload based on the type of the symbol
|
||||
// we're initializing here.
|
||||
std::vector<const Type *> paramTypes;
|
||||
for (int i = 0; i < funcType->GetNumParameters(); ++i)
|
||||
paramTypes.push_back(funcType->GetParameterType(i));
|
||||
|
||||
if (fse->ResolveOverloads(expr->pos, paramTypes) == false)
|
||||
return false;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/** Utility routine that emits code to initialize a symbol given an
|
||||
initializer expression.
|
||||
|
||||
@param lvalue Memory location of storage for the symbol's data
|
||||
@param symName Name of symbol (used in error messages)
|
||||
@param symType Type of variable being initialized
|
||||
@param initExpr Expression for the initializer
|
||||
@param ctx FunctionEmitContext to use for generating instructions
|
||||
@param pos Source file position of the variable being initialized
|
||||
*/
|
||||
static void
|
||||
lInitSymbol(llvm::Value *lvalue, const char *symName, const Type *symType,
|
||||
Expr *initExpr, FunctionEmitContext *ctx, SourcePos pos) {
|
||||
if (initExpr == NULL)
|
||||
// leave it uninitialized
|
||||
return;
|
||||
|
||||
// If the initializer is a straight up expression that isn't an
|
||||
// ExprList, then we'll see if we can type convert it to the type of
|
||||
// the variable.
|
||||
if (dynamic_cast<ExprList *>(initExpr) == NULL) {
|
||||
if (lPossiblyResolveFunctionOverloads(initExpr, symType) == false)
|
||||
return;
|
||||
initExpr = TypeConvertExpr(initExpr, symType, "initializer");
|
||||
|
||||
if (initExpr != NULL) {
|
||||
llvm::Value *initializerValue = initExpr->GetValue(ctx);
|
||||
if (initializerValue != NULL)
|
||||
// Bingo; store the value in the variable's storage
|
||||
ctx->StoreInst(initializerValue, lvalue);
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Atomic types and enums can't be initialized with { ... } initializer
|
||||
// expressions, so print an error and return if that's what we've got
|
||||
// here..
|
||||
if (dynamic_cast<const AtomicType *>(symType) != NULL ||
|
||||
dynamic_cast<const EnumType *>(symType) != NULL ||
|
||||
dynamic_cast<const PointerType *>(symType) != NULL) {
|
||||
ExprList *elist = dynamic_cast<ExprList *>(initExpr);
|
||||
if (elist != NULL) {
|
||||
if (elist->exprs.size() == 1)
|
||||
lInitSymbol(lvalue, symName, symType, elist->exprs[0], ctx,
|
||||
pos);
|
||||
else
|
||||
Error(initExpr->pos, "Expression list initializers can't be used for "
|
||||
"variable \"%s\' with type \"%s\".", symName,
|
||||
symType->GetString().c_str());
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
const ReferenceType *rt = dynamic_cast<const ReferenceType *>(symType);
|
||||
if (rt) {
|
||||
if (!Type::Equal(initExpr->GetType(), rt)) {
|
||||
Error(initExpr->pos, "Initializer for reference type \"%s\" must have same "
|
||||
"reference type itself. \"%s\" is incompatible.",
|
||||
rt->GetString().c_str(), initExpr->GetType()->GetString().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
llvm::Value *initializerValue = initExpr->GetValue(ctx);
|
||||
if (initializerValue)
|
||||
ctx->StoreInst(initializerValue, lvalue);
|
||||
return;
|
||||
}
|
||||
|
||||
// There are two cases for initializing structs, arrays and vectors;
|
||||
// either a single initializer may be provided (float foo[3] = 0;), in
|
||||
// which case all of the elements are initialized to the given value,
|
||||
// or an initializer list may be provided (float foo[3] = { 1,2,3 }),
|
||||
// in which case the elements are initialized with the corresponding
|
||||
// values.
|
||||
const CollectionType *collectionType =
|
||||
dynamic_cast<const CollectionType *>(symType);
|
||||
if (collectionType != NULL) {
|
||||
std::string name;
|
||||
if (dynamic_cast<const StructType *>(symType) != NULL)
|
||||
name = "struct";
|
||||
else if (dynamic_cast<const ArrayType *>(symType) != NULL)
|
||||
name = "array";
|
||||
else if (dynamic_cast<const VectorType *>(symType) != NULL)
|
||||
name = "vector";
|
||||
else
|
||||
FATAL("Unexpected CollectionType in lInitSymbol()");
|
||||
|
||||
ExprList *exprList = dynamic_cast<ExprList *>(initExpr);
|
||||
if (exprList != NULL) {
|
||||
// The { ... } case; make sure we have the same number of
|
||||
// expressions in the ExprList as we have struct members
|
||||
int nInits = exprList->exprs.size();
|
||||
if (nInits != collectionType->GetElementCount()) {
|
||||
Error(initExpr->pos, "Initializer for %s \"%s\" requires "
|
||||
"%d values; %d provided.", name.c_str(), symName,
|
||||
collectionType->GetElementCount(), nInits);
|
||||
return;
|
||||
}
|
||||
|
||||
// Initialize each element with the corresponding value from
|
||||
// the ExprList
|
||||
for (int i = 0; i < nInits; ++i) {
|
||||
llvm::Value *ep;
|
||||
if (dynamic_cast<const StructType *>(symType) != NULL)
|
||||
ep = ctx->AddElementOffset(lvalue, i, NULL, "element");
|
||||
else
|
||||
ep = ctx->GetElementPtrInst(lvalue, LLVMInt32(0), LLVMInt32(i),
|
||||
PointerType::GetUniform(collectionType->GetElementType(i)),
|
||||
"gep");
|
||||
|
||||
lInitSymbol(ep, symName, collectionType->GetElementType(i),
|
||||
exprList->exprs[i], ctx, pos);
|
||||
}
|
||||
}
|
||||
else
|
||||
Error(initExpr->pos, "Can't assign type \"%s\" to \"%s\".",
|
||||
initExpr->GetType()->GetString().c_str(),
|
||||
collectionType->GetString().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
FATAL("Unexpected Type in lInitSymbol()");
|
||||
}
|
||||
|
||||
|
||||
static bool
|
||||
lHasUnsizedArrays(const Type *type) {
|
||||
const ArrayType *at = dynamic_cast<const ArrayType *>(type);
|
||||
@@ -333,7 +186,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// zero value.
|
||||
llvm::Constant *cinit = NULL;
|
||||
if (initExpr != NULL) {
|
||||
if (lPossiblyResolveFunctionOverloads(initExpr, sym->type) == false)
|
||||
if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false)
|
||||
continue;
|
||||
// FIXME: we only need this for function pointers; it was
|
||||
// already done for atomic types and enums in
|
||||
@@ -377,8 +230,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
|
||||
// And then get it initialized...
|
||||
sym->parentFunction = ctx->GetFunction();
|
||||
lInitSymbol(sym->storagePtr, sym->name.c_str(), sym->type,
|
||||
initExpr, ctx, sym->pos);
|
||||
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -575,7 +427,7 @@ IfStmt::TypeCheck() {
|
||||
int
|
||||
IfStmt::EstimateCost() const {
|
||||
const Type *type;
|
||||
if (test == NULL || (type = test->GetType()) != NULL)
|
||||
if (test == NULL || (type = test->GetType()) == NULL)
|
||||
return 0;
|
||||
|
||||
return type->IsUniformType() ? COST_UNIFORM_IF : COST_VARYING_IF;
|
||||
@@ -621,103 +473,6 @@ IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
}
|
||||
|
||||
|
||||
/** Given an AST node, check to see if it's safe if we happen to run the
|
||||
code for that node with the execution mask all off.
|
||||
*/
|
||||
static bool
|
||||
lCheckAllOffSafety(ASTNode *node, void *data) {
|
||||
bool *okPtr = (bool *)data;
|
||||
|
||||
if (dynamic_cast<FunctionCallExpr *>(node) != NULL) {
|
||||
// FIXME: If we could somehow determine that the function being
|
||||
// called was safe (and all of the args Exprs were safe, then it'd
|
||||
// be nice to be able to return true here. (Consider a call to
|
||||
// e.g. floatbits() in the stdlib.) Unfortunately for now we just
|
||||
// have to be conservative.
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (dynamic_cast<AssertStmt *>(node) != NULL) {
|
||||
// While it's fine to run the assert for varying tests, it's not
|
||||
// desirable to check an assert on a uniform variable if all of the
|
||||
// lanes are off.
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
if (g->target.allOffMaskIsSafe == true)
|
||||
// Don't worry about memory accesses if we have a target that can
|
||||
// safely run them with the mask all off
|
||||
return true;
|
||||
|
||||
IndexExpr *ie;
|
||||
if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL && ie->baseExpr != NULL) {
|
||||
const Type *type = ie->baseExpr->GetType();
|
||||
if (type == NULL)
|
||||
return true;
|
||||
if (dynamic_cast<const ReferenceType *>(type) != NULL)
|
||||
type = type->GetReferenceTarget();
|
||||
|
||||
ConstExpr *ce = dynamic_cast<ConstExpr *>(ie->index);
|
||||
if (ce == NULL) {
|
||||
// indexing with a variable... -> not safe
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
const PointerType *pointerType =
|
||||
dynamic_cast<const PointerType *>(type);
|
||||
if (pointerType != NULL) {
|
||||
// pointer[index] -> can't be sure -> not safe
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
const SequentialType *seqType =
|
||||
dynamic_cast<const SequentialType *>(type);
|
||||
Assert(seqType != NULL);
|
||||
int nElements = seqType->GetElementCount();
|
||||
if (nElements == 0) {
|
||||
// Unsized array, so we can't be sure -> not safe
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
int32_t indices[ISPC_MAX_NVEC];
|
||||
int count = ce->AsInt32(indices);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
if (indices[i] < 0 || indices[i] >= nElements) {
|
||||
// Index is out of bounds -> not safe
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
// All indices are in-bounds
|
||||
return true;
|
||||
}
|
||||
|
||||
MemberExpr *me;
|
||||
if ((me = dynamic_cast<MemberExpr *>(node)) != NULL &&
|
||||
me->dereferenceExpr) {
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
DereferenceExpr *de;
|
||||
if ((de = dynamic_cast<DereferenceExpr *>(node)) != NULL) {
|
||||
const Type *exprType = de->expr->GetType();
|
||||
if (dynamic_cast<const PointerType *>(exprType) != NULL) {
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/** Emit code for an if test that checks the mask and the test values and
|
||||
tries to be smart about jumping over code that doesn't need to be run.
|
||||
*/
|
||||
@@ -771,7 +526,7 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
||||
//
|
||||
// Where the overhead of checking if any of the program instances wants
|
||||
// to run one side or the other is more than the actual computation.
|
||||
// The lSafeToRunWithAllLanesOff() checks to make sure that we don't do this
|
||||
// SafeToRunWithMaskAllOff() checks to make sure that we don't do this
|
||||
// for potentially dangerous code like:
|
||||
//
|
||||
// if (index < count) array[index] = 0;
|
||||
@@ -783,9 +538,8 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
||||
bool costIsAcceptable = (trueFalseCost <
|
||||
PREDICATE_SAFE_IF_STATEMENT_COST);
|
||||
|
||||
bool safeToRunWithAllLanesOff = true;
|
||||
WalkAST(trueStmts, lCheckAllOffSafety, NULL, &safeToRunWithAllLanesOff);
|
||||
WalkAST(falseStmts, lCheckAllOffSafety, NULL, &safeToRunWithAllLanesOff);
|
||||
bool safeToRunWithAllLanesOff = (SafeToRunWithMaskAllOff(trueStmts) &&
|
||||
SafeToRunWithMaskAllOff(falseStmts));
|
||||
|
||||
if (safeToRunWithAllLanesOff &&
|
||||
(costIsAcceptable || g->opt.disableCoherentControlFlow)) {
|
||||
@@ -2123,9 +1877,7 @@ lCheckMask(Stmt *stmts) {
|
||||
return false;
|
||||
|
||||
int cost = EstimateCost(stmts);
|
||||
|
||||
bool safeToRunWithAllLanesOff = true;
|
||||
WalkAST(stmts, lCheckAllOffSafety, NULL, &safeToRunWithAllLanesOff);
|
||||
bool safeToRunWithAllLanesOff = SafeToRunWithMaskAllOff(stmts);
|
||||
|
||||
// The mask should be checked if the code following the
|
||||
// 'case'/'default' is relatively complex, or if it would be unsafe to
|
||||
@@ -2880,3 +2632,82 @@ AssertStmt::EstimateCost() const {
|
||||
return COST_ASSERT;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// DeleteStmt
|
||||
|
||||
DeleteStmt::DeleteStmt(Expr *e, SourcePos p)
|
||||
: Stmt(p) {
|
||||
expr = e;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
DeleteStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
const Type *exprType;
|
||||
if (expr == NULL || ((exprType = expr->GetType()) == NULL)) {
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
llvm::Value *exprValue = expr->GetValue(ctx);
|
||||
if (exprValue == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
// Typechecking should catch this
|
||||
Assert(dynamic_cast<const PointerType *>(exprType) != NULL);
|
||||
|
||||
if (exprType->IsUniformType()) {
|
||||
// For deletion of a uniform pointer, we just need to cast the
|
||||
// pointer type to a void pointer type, to match what
|
||||
// __delete_uniform() from the builtins expects.
|
||||
exprValue = ctx->BitCastInst(exprValue, LLVMTypes::VoidPointerType,
|
||||
"ptr_to_void");
|
||||
llvm::Function *func = m->module->getFunction("__delete_uniform");
|
||||
Assert(func != NULL);
|
||||
|
||||
ctx->CallInst(func, NULL, exprValue, "");
|
||||
}
|
||||
else {
|
||||
// Varying pointers are arrays of ints, and __delete_varying()
|
||||
// takes a vector of i64s (even for 32-bit targets). Therefore, we
|
||||
// only need to extend to 64-bit values on 32-bit targets before
|
||||
// calling it.
|
||||
llvm::Function *func = m->module->getFunction("__delete_varying");
|
||||
Assert(func != NULL);
|
||||
if (g->target.is32Bit)
|
||||
exprValue = ctx->ZExtInst(exprValue, LLVMTypes::Int64VectorType,
|
||||
"ptr_to_64");
|
||||
ctx->CallInst(func, NULL, exprValue, "");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
DeleteStmt::Print(int indent) const {
|
||||
printf("%*cDelete Stmt", indent, ' ');
|
||||
}
|
||||
|
||||
|
||||
Stmt *
|
||||
DeleteStmt::TypeCheck() {
|
||||
const Type *exprType;
|
||||
if (expr == NULL || ((exprType = expr->GetType()) == NULL))
|
||||
return NULL;
|
||||
|
||||
if (dynamic_cast<const PointerType *>(exprType) == NULL) {
|
||||
Error(pos, "Illegal to delete non-pointer type \"%s\".",
|
||||
exprType->GetString().c_str());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
return this;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
DeleteStmt::EstimateCost() const {
|
||||
return COST_DELETE;
|
||||
}
|
||||
|
||||
17
stmt.h
17
stmt.h
@@ -442,4 +442,21 @@ public:
|
||||
Expr *expr;
|
||||
};
|
||||
|
||||
|
||||
/** Representation of a delete statement in the program.
|
||||
*/
|
||||
class DeleteStmt : public Stmt {
|
||||
public:
|
||||
DeleteStmt(Expr *e, SourcePos p);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
/** Expression that gives the pointer value to be deleted. */
|
||||
Expr *expr;
|
||||
};
|
||||
|
||||
#endif // ISPC_STMT_H
|
||||
|
||||
39
sym.cpp
39
sym.cpp
@@ -354,3 +354,42 @@ SymbolTable::Print() {
|
||||
depth += 4;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
inline int ispcRand() {
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
return rand();
|
||||
#else
|
||||
return lrand48();
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
Symbol *
|
||||
SymbolTable::RandomSymbol() {
|
||||
int v = ispcRand() % variables.size();
|
||||
if (variables[v]->size() == 0)
|
||||
return NULL;
|
||||
int count = ispcRand() % variables[v]->size();
|
||||
SymbolMapType::iterator iter = variables[v]->begin();
|
||||
while (count-- > 0) {
|
||||
++iter;
|
||||
Assert(iter != variables[v]->end());
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
|
||||
const Type *
|
||||
SymbolTable::RandomType() {
|
||||
int v = ispcRand() % types.size();
|
||||
if (types[v]->size() == 0)
|
||||
return NULL;
|
||||
int count = ispcRand() % types[v]->size();
|
||||
TypeMapType::iterator iter = types[v]->begin();
|
||||
while (count-- > 0) {
|
||||
++iter;
|
||||
Assert(iter != types[v]->end());
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
7
sym.h
7
sym.h
@@ -244,6 +244,13 @@ public:
|
||||
(Debugging method). */
|
||||
void Print();
|
||||
|
||||
/** Returns a random symbol from the symbol table. (It is not
|
||||
guaranteed that it is equally likely to return all symbols). */
|
||||
Symbol *RandomSymbol();
|
||||
|
||||
/** Returns a random type from the symbol table. */
|
||||
const Type *RandomType();
|
||||
|
||||
private:
|
||||
std::vector<std::string> closestTypeMatch(const char *str,
|
||||
bool structsVsEnums) const;
|
||||
|
||||
@@ -15,7 +15,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programCount == 4)
|
||||
if (programCount == 1)
|
||||
RET[programIndex] = 1;
|
||||
else if (programCount == 4)
|
||||
RET[programIndex] = 5.;
|
||||
else
|
||||
RET[programIndex] = 10.;
|
||||
|
||||
@@ -3,13 +3,13 @@ export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform float a[programCount];
|
||||
for (unsigned int i = 0; i < programCount; ++i)
|
||||
a[i] = aFOO[i];
|
||||
uniform float a[programCount+4];
|
||||
for (unsigned int i = 0; i < programCount+4; ++i)
|
||||
a[i] = aFOO[min((int)i, programCount)];
|
||||
|
||||
RET[programIndex] = *(a + 2);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 3;
|
||||
RET[programIndex] = (programCount == 1) ? 2 : 3;
|
||||
}
|
||||
|
||||
@@ -14,4 +14,4 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
}
|
||||
|
||||
|
||||
export void result(uniform float RET[]) { RET[programIndex] = 5; }
|
||||
export void result(uniform float RET[]) { RET[programIndex] = programCount == 1 ? 0 : 5; }
|
||||
|
||||
@@ -14,7 +14,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
if (programCount == 4)
|
||||
if (programCount == 1)
|
||||
RET[programIndex] = 0;
|
||||
else if (programCount == 4)
|
||||
RET[programIndex] = 2;
|
||||
else
|
||||
RET[programIndex] = 4;
|
||||
|
||||
@@ -13,5 +13,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2;
|
||||
RET[programIndex] = programCount == 1 ? 1 : 2;
|
||||
}
|
||||
|
||||
@@ -12,5 +12,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (programCount/2) - 1;
|
||||
RET[programIndex] = programCount == 1 ? 0 : ((programCount/2) - 1);
|
||||
}
|
||||
|
||||
@@ -5,11 +5,11 @@ uniform int32 s = 0xff;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
int32 bits = 0xfffffff0;
|
||||
int32 bits = 0xfff0;
|
||||
float b = atomic_xor_global(&s, bits);
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0xff;
|
||||
RET[programIndex] = (programCount & 1) ? 0xff0f : 0xff;
|
||||
}
|
||||
|
||||
@@ -10,5 +10,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 3000;
|
||||
RET[programIndex] = (programCount == 1) ? 2 : 3000;
|
||||
}
|
||||
|
||||
@@ -12,5 +12,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount;
|
||||
RET[programIndex] = (programCount == 1) ? 0 : programCount;
|
||||
}
|
||||
|
||||
@@ -13,5 +13,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
RET[programIndex] = (programCount == 1) ? 0 : 1;
|
||||
}
|
||||
|
||||
@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int a = aFOO[programIndex];
|
||||
int br = broadcast(a, (uniform int)b-2);
|
||||
int br = (programCount == 1) ? 4 : broadcast(a, (uniform int)b-2);
|
||||
RET[programIndex] = br;
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int16 a = aFOO[programIndex];
|
||||
int16 b = broadcast(a, 2);
|
||||
int16 b = (programCount == 1) ? 3 : broadcast(a, 2);
|
||||
RET[programIndex] = b;
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
int8 a = aFOO[programIndex];
|
||||
int8 br = broadcast(a, (uniform int)b-2);
|
||||
int8 br = (programCount == 1) ? 4 : broadcast(a, (uniform int)b-2);
|
||||
RET[programIndex] = br;
|
||||
}
|
||||
|
||||
|
||||
@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = broadcast(a, 2);
|
||||
float b = (programCount == 1) ? 3 : broadcast(a, 2);
|
||||
RET[programIndex] = b;
|
||||
}
|
||||
|
||||
|
||||
10
tests/const-fold-select-1.ispc
Normal file
10
tests/const-fold-select-1.ispc
Normal file
@@ -0,0 +1,10 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
RET[programIndex] = (programIndex >= 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
10
tests/const-fold-select-2.ispc
Normal file
10
tests/const-fold-select-2.ispc
Normal file
@@ -0,0 +1,10 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
RET[programIndex] = (programCount < 10000) ? 1 : 0;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
@@ -3,9 +3,9 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
double a = programIndex;
|
||||
RET[programIndex] = extract(a, 3);
|
||||
RET[programIndex] = extract(a, min(programCount-1, 3));
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 3;
|
||||
RET[programIndex] = (programCount == 1) ? 0 : 3;
|
||||
}
|
||||
|
||||
30
tests/foreach-double-1.ispc
Normal file
30
tests/foreach-double-1.ispc
Normal file
@@ -0,0 +1,30 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform double one = 1;
|
||||
|
||||
void copy(uniform double dst[], uniform double src[], uniform int count) {
|
||||
foreach (i = 0 ... count)
|
||||
dst[i] = one * src[i];
|
||||
}
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform int count = 200 + aFOO[1];
|
||||
uniform double * uniform src = uniform new uniform double[count];
|
||||
for (uniform int i = 0; i < count; ++i)
|
||||
src[i] = i;
|
||||
|
||||
uniform double * uniform dst = uniform new uniform double[count];
|
||||
copy(dst, src, count);
|
||||
|
||||
uniform int errors = 0;
|
||||
for (uniform int i = 0; i < count; ++i)
|
||||
if (dst[i] != src[i])
|
||||
++errors;
|
||||
|
||||
RET[programIndex] = errors;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
}
|
||||
15
tests/local-atomics-1.ispc
Normal file
15
tests/local-atomics-1.ispc
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s = 0;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float delta = 1;
|
||||
float b = atomic_add_local(&s, delta);
|
||||
RET[programIndex] = reduce_add(b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = reduce_add(programIndex);
|
||||
}
|
||||
17
tests/local-atomics-10.ispc
Normal file
17
tests/local-atomics-10.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s = 0;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
float delta = 1;
|
||||
if (programIndex < 2)
|
||||
b = atomic_add_local(&s, delta);
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount == 1 ? 1 : 2;
|
||||
}
|
||||
20
tests/local-atomics-11.ispc
Normal file
20
tests/local-atomics-11.ispc
Normal file
@@ -0,0 +1,20 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s = 0;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
if (programIndex & 1)
|
||||
b = atomic_add_local(&s, programIndex);
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int sum = 0;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
if (i & 1)
|
||||
sum += i;
|
||||
RET[programIndex] = sum;
|
||||
}
|
||||
20
tests/local-atomics-12.ispc
Normal file
20
tests/local-atomics-12.ispc
Normal file
@@ -0,0 +1,20 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s = 0;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
if (programIndex & 1)
|
||||
b = atomic_or_local(&s, (1 << programIndex));
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int sum = 0;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
if (i & 1)
|
||||
sum += (1 << i);
|
||||
RET[programIndex] = sum;
|
||||
}
|
||||
16
tests/local-atomics-13.ispc
Normal file
16
tests/local-atomics-13.ispc
Normal file
@@ -0,0 +1,16 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s = 0;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
if (programIndex & 1)
|
||||
b = atomic_or_local(&s, (1 << programIndex));
|
||||
RET[programIndex] = popcnt(reduce_max((int32)b));
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount == 1 ? 0 : ((programCount/2) - 1);
|
||||
}
|
||||
20
tests/local-atomics-14.ispc
Normal file
20
tests/local-atomics-14.ispc
Normal file
@@ -0,0 +1,20 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int64 s = 0xffffffffff000000;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
if (programIndex & 1)
|
||||
b = atomic_or_local(&s, (1 << programIndex));
|
||||
RET[programIndex] = (s>>20);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int sum = 0;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
if (i & 1)
|
||||
sum += (1 << i);
|
||||
RET[programIndex] = ((unsigned int64)(0xffffffffff000000 | sum)) >> 20;
|
||||
}
|
||||
15
tests/local-atomics-2.ispc
Normal file
15
tests/local-atomics-2.ispc
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform int64 s = 0;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float delta = 1;
|
||||
float b = atomic_add_local(&s, delta);
|
||||
RET[programIndex] = reduce_add(b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = reduce_add(programIndex);
|
||||
}
|
||||
15
tests/local-atomics-3.ispc
Normal file
15
tests/local-atomics-3.ispc
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform int32 s = 0xff;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
int32 bits = 0xfff0;
|
||||
float b = atomic_xor_local(&s, bits);
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (programCount & 1) ? 0xff0f : 0xff;
|
||||
}
|
||||
14
tests/local-atomics-4.ispc
Normal file
14
tests/local-atomics-4.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform int32 s = 0;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = atomic_or_local(&s, (1<<programIndex));
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (1<<programCount)-1;
|
||||
}
|
||||
14
tests/local-atomics-5.ispc
Normal file
14
tests/local-atomics-5.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform int32 s = 0xbeef;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = atomic_swap_local(&s, programIndex);
|
||||
RET[programIndex] = reduce_max(b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0xbeef;
|
||||
}
|
||||
14
tests/local-atomics-6.ispc
Normal file
14
tests/local-atomics-6.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform int32 s = 2;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = atomic_compare_exchange_local(&s, programIndex, a*1000);
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (programCount == 1) ? 2 : 3000;
|
||||
}
|
||||
14
tests/local-atomics-7.ispc
Normal file
14
tests/local-atomics-7.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform int32 s = 0;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int32 a = aFOO[programIndex];
|
||||
float b = atomic_min_local(&s, a);
|
||||
RET[programIndex] = reduce_min(b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = reduce_min(programIndex);
|
||||
}
|
||||
16
tests/local-atomics-8.ispc
Normal file
16
tests/local-atomics-8.ispc
Normal file
@@ -0,0 +1,16 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform int32 s = 0;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int32 a = aFOO[programIndex];
|
||||
int32 b = 0;
|
||||
if (programIndex & 1)
|
||||
b = atomic_max_local(&s, a);
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (programCount == 1) ? 0 : programCount;
|
||||
}
|
||||
17
tests/local-atomics-9.ispc
Normal file
17
tests/local-atomics-9.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s = 0;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
int32 delta = 1;
|
||||
if (programIndex < 2)
|
||||
b = atomic_add_local(&s, delta);
|
||||
RET[programIndex] = reduce_add(b);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (programCount == 1) ? 0 : 1;
|
||||
}
|
||||
17
tests/local-atomics-swap.ispc
Normal file
17
tests/local-atomics-swap.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform int32 s = 1234;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
if (programIndex & 1) {
|
||||
b = atomic_swap_local(&s, programIndex);
|
||||
}
|
||||
RET[programIndex] = reduce_add(b) + s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1234 + reduce_add(programIndex & 1 ? programIndex : 0);
|
||||
}
|
||||
14
tests/local-atomics-uniform-1.ispc
Normal file
14
tests/local-atomics-uniform-1.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s = 10;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform unsigned int32 b = atomic_add_local(&s, 1);
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 11;
|
||||
}
|
||||
14
tests/local-atomics-uniform-2.ispc
Normal file
14
tests/local-atomics-uniform-2.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s = 0b1010;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform unsigned int32 b = atomic_or_local(&s, 1);
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0b1011;
|
||||
}
|
||||
14
tests/local-atomics-uniform-3.ispc
Normal file
14
tests/local-atomics-uniform-3.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s = 0b1010;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform unsigned int32 b = atomic_or_local(&s, 1);
|
||||
RET[programIndex] = b;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0b1010;
|
||||
}
|
||||
14
tests/local-atomics-uniform-4.ispc
Normal file
14
tests/local-atomics-uniform-4.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s = 0xffff;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform unsigned int32 b = atomic_min_local(&s, 1);
|
||||
RET[programIndex] = b;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0xffff;
|
||||
}
|
||||
14
tests/local-atomics-uniform-5.ispc
Normal file
14
tests/local-atomics-uniform-5.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s = 0xffff;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform unsigned int32 b = atomic_min_local(&s, 1);
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1;
|
||||
}
|
||||
14
tests/local-atomics-uniform-6.ispc
Normal file
14
tests/local-atomics-uniform-6.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform float s = 100.;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform float b = atomic_swap_local(&s, 1.);
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1.;
|
||||
}
|
||||
14
tests/local-atomics-uniform-7.ispc
Normal file
14
tests/local-atomics-uniform-7.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform float s = 100.;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform float b = atomic_swap_local(&s, 1.);
|
||||
RET[programIndex] = b;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 100.;
|
||||
}
|
||||
14
tests/local-atomics-uniform-8.ispc
Normal file
14
tests/local-atomics-uniform-8.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform float s = 100.;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform float b = atomic_compare_exchange_local(&s, 1., -100.);
|
||||
RET[programIndex] = b;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 100.;
|
||||
}
|
||||
14
tests/local-atomics-uniform-9.ispc
Normal file
14
tests/local-atomics-uniform-9.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform int64 s = 100.;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform int64 b = atomic_compare_exchange_local(&s, 100, -100);
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = -100.;
|
||||
}
|
||||
18
tests/local-atomics-varyingptr-1.ispc
Normal file
18
tests/local-atomics-varyingptr-1.ispc
Normal file
@@ -0,0 +1,18 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s[programCount];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
float delta = 1;
|
||||
if (programIndex < 2)
|
||||
atomic_add_local(&s[programIndex], delta);
|
||||
RET[programIndex] = s[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 0;
|
||||
RET[0] = RET[1] = 1;
|
||||
}
|
||||
16
tests/local-atomics-varyingptr-2.ispc
Normal file
16
tests/local-atomics-varyingptr-2.ispc
Normal file
@@ -0,0 +1,16 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s[programCount];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
float delta = 1;
|
||||
atomic_add_local(&s[programCount-1-programIndex], programIndex);
|
||||
RET[programIndex] = s[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount-1-programIndex;
|
||||
}
|
||||
18
tests/local-atomics-varyingptr-3.ispc
Normal file
18
tests/local-atomics-varyingptr-3.ispc
Normal file
@@ -0,0 +1,18 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform unsigned int32 s[programCount];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
s[i] = 1234;
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
float delta = 1;
|
||||
a = atomic_max_local(&s[programIndex], programIndex);
|
||||
RET[programIndex] = a;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1234;
|
||||
}
|
||||
15
tests/local-atomics-varyingptr-4.ispc
Normal file
15
tests/local-atomics-varyingptr-4.ispc
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
uniform int32 s[programCount];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
s[i] = -1234;
|
||||
atomic_max_local(&s[programIndex], programIndex);
|
||||
RET[programIndex] = s[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programIndex;
|
||||
}
|
||||
15
tests/new-delete-1.ispc
Normal file
15
tests/new-delete-1.ispc
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
float * uniform buf = uniform new float[programCount];
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
buf[i] = i;
|
||||
RET[programIndex] = buf[a-1];
|
||||
delete buf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programIndex;
|
||||
}
|
||||
15
tests/new-delete-2.ispc
Normal file
15
tests/new-delete-2.ispc
Normal file
@@ -0,0 +1,15 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform float * uniform buf = uniform new uniform float[programCount];
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
buf[i] = i;
|
||||
RET[programIndex] = buf[a-1];
|
||||
delete buf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programIndex;
|
||||
}
|
||||
17
tests/new-delete-3.ispc
Normal file
17
tests/new-delete-3.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
float * uniform buf = uniform new float[programCount+1];
|
||||
for (uniform int i = 0; i < programCount+1; ++i) {
|
||||
buf[i] = i+a;
|
||||
}
|
||||
RET[programIndex] = buf[a];
|
||||
delete buf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2 + 2*programIndex;
|
||||
}
|
||||
14
tests/new-delete-4.ispc
Normal file
14
tests/new-delete-4.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
float * uniform buf = uniform new float(2*b);
|
||||
RET[programIndex] = buf[0];
|
||||
delete[] buf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 10;
|
||||
}
|
||||
17
tests/new-delete-5.ispc
Normal file
17
tests/new-delete-5.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
struct Point {
|
||||
uniform float x, y, z;
|
||||
};
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
varying Point * uniform buf = uniform new varying Point(a, b, 1234.);
|
||||
RET[programIndex] = buf->y;
|
||||
delete buf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 5;
|
||||
}
|
||||
17
tests/new-delete-6.ispc
Normal file
17
tests/new-delete-6.ispc
Normal file
@@ -0,0 +1,17 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
struct Point {
|
||||
float x, y, z;
|
||||
};
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
Point * varying buf = new Point(0., b, a);
|
||||
RET[programIndex] = buf->z;
|
||||
delete buf;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1+programIndex;
|
||||
}
|
||||
@@ -12,5 +12,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 2;
|
||||
RET[programIndex] = (programCount == 1) ? 1 : 2;
|
||||
}
|
||||
|
||||
@@ -15,8 +15,12 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
for (uniform int i = 0; i < programCount/2; ++i) {
|
||||
RET[2*i+1] = 10+i;
|
||||
RET[2*i] = 10+programCount/2+i;
|
||||
if (programCount == 1)
|
||||
RET[0] = 10;
|
||||
else {
|
||||
for (uniform int i = 0; i < programCount/2; ++i) {
|
||||
RET[2*i+1] = 10+i;
|
||||
RET[2*i] = 10+programCount/2+i;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -13,5 +13,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount/2;
|
||||
RET[programIndex] = (programCount == 1) ? 1 : programCount/2;
|
||||
}
|
||||
|
||||
@@ -8,4 +8,4 @@ export void f_f(uniform float RET[4], uniform float aFOO[]) {
|
||||
RET[programIndex] = popcnt(a < 3);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) { RET[programIndex] = 2; }
|
||||
export void result(uniform float RET[]) { RET[programIndex] = programCount == 1 ? 1 : 2; }
|
||||
|
||||
@@ -2,8 +2,9 @@
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform float a[programCount];
|
||||
uniform float a[programCount+1];
|
||||
a[programIndex] = aFOO[programIndex];
|
||||
a[programCount] = 1;
|
||||
|
||||
uniform float * uniform ptr = a;
|
||||
*(ptr+1) = 0;
|
||||
@@ -12,5 +13,6 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1+programIndex;
|
||||
RET[1] = 0;
|
||||
if (programCount > 0)
|
||||
RET[1] = 0;
|
||||
}
|
||||
|
||||
@@ -5,7 +5,7 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
double v = aFOO[programIndex];
|
||||
uniform float m;
|
||||
uniform float m = 42;
|
||||
int iv = (int)v;
|
||||
if (iv & 1)
|
||||
m = reduce_add((double)iv);
|
||||
@@ -14,7 +14,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int x = -1234;
|
||||
if (programCount == 4) x = 4;
|
||||
if (programCount == 1) x = 1;
|
||||
else if (programCount == 4) x = 4;
|
||||
else if (programCount == 8) x = 16;
|
||||
else if (programCount == 16) x = 64;
|
||||
RET[programIndex] = x;
|
||||
|
||||
@@ -13,7 +13,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int x = -1234;
|
||||
if (programCount == 4) x = 10;
|
||||
if (programCount == 1) x = 1;
|
||||
else if (programCount == 4) x = 10;
|
||||
else if (programCount == 8) x = 36;
|
||||
else if (programCount == 16) x = 136;
|
||||
RET[programIndex] = x;
|
||||
|
||||
@@ -11,4 +11,4 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
RET[programIndex] = m;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) { RET[programIndex] = -3; }
|
||||
export void result(uniform float RET[]) { RET[programIndex] = (programCount == 1) ? -1 : -3; }
|
||||
|
||||
@@ -14,7 +14,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int x = -1234;
|
||||
if (programCount == 4) x = 4;
|
||||
if (programCount == 1) x = 1;
|
||||
else if (programCount == 4) x = 4;
|
||||
else if (programCount == 8) x = 16;
|
||||
else if (programCount == 16) x = 64;
|
||||
RET[programIndex] = x;
|
||||
|
||||
@@ -13,7 +13,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int x = -1234;
|
||||
if (programCount == 4) x = 10;
|
||||
if (programCount == 1) x = 1;
|
||||
else if (programCount == 4) x = 10;
|
||||
else if (programCount == 8) x = 36;
|
||||
else if (programCount == 16) x = 136;
|
||||
RET[programIndex] = x;
|
||||
|
||||
@@ -11,4 +11,4 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
RET[programIndex] = m;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) { RET[programIndex] = -3; }
|
||||
export void result(uniform float RET[]) { RET[programIndex] = (programCount == 1) ? -1 : -3; }
|
||||
|
||||
@@ -14,7 +14,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int x = -1234;
|
||||
if (programCount == 4) x = 4;
|
||||
if (programCount == 1) x = 1;
|
||||
else if (programCount == 4) x = 4;
|
||||
else if (programCount == 8) x = 16;
|
||||
else if (programCount == 16) x = 64;
|
||||
RET[programIndex] = x;
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user