Merge remote-tracking branch 'matt/master'

This commit is contained in:
Jean-Luc Duprat
2012-02-07 12:50:31 -08:00
179 changed files with 5756 additions and 1373 deletions

View File

@@ -72,7 +72,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \ HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
opt.h stmt.h sym.h type.h util.h opt.h stmt.h sym.h type.h util.h
TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \ TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
generic-16 generic-16 generic-1
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \ BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
builtins/dispatch.ll builtins/dispatch.ll
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \ BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \

123
ast.cpp
View File

@@ -98,6 +98,7 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
StmtList *sl; StmtList *sl;
PrintStmt *ps; PrintStmt *ps;
AssertStmt *as; AssertStmt *as;
DeleteStmt *dels;
if ((es = dynamic_cast<ExprStmt *>(node)) != NULL) if ((es = dynamic_cast<ExprStmt *>(node)) != NULL)
es->expr = (Expr *)WalkAST(es->expr, preFunc, postFunc, data); es->expr = (Expr *)WalkAST(es->expr, preFunc, postFunc, data);
@@ -160,6 +161,8 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
ps->values = (Expr *)WalkAST(ps->values, preFunc, postFunc, data); ps->values = (Expr *)WalkAST(ps->values, preFunc, postFunc, data);
else if ((as = dynamic_cast<AssertStmt *>(node)) != NULL) else if ((as = dynamic_cast<AssertStmt *>(node)) != NULL)
as->expr = (Expr *)WalkAST(as->expr, preFunc, postFunc, data); as->expr = (Expr *)WalkAST(as->expr, preFunc, postFunc, data);
else if ((dels = dynamic_cast<DeleteStmt *>(node)) != NULL)
dels->expr = (Expr *)WalkAST(dels->expr, preFunc, postFunc, data);
else else
FATAL("Unhandled statement type in WalkAST()"); FATAL("Unhandled statement type in WalkAST()");
} }
@@ -180,6 +183,7 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
DereferenceExpr *dre; DereferenceExpr *dre;
SizeOfExpr *soe; SizeOfExpr *soe;
AddressOfExpr *aoe; AddressOfExpr *aoe;
NewExpr *newe;
if ((ue = dynamic_cast<UnaryExpr *>(node)) != NULL) if ((ue = dynamic_cast<UnaryExpr *>(node)) != NULL)
ue->expr = (Expr *)WalkAST(ue->expr, preFunc, postFunc, data); ue->expr = (Expr *)WalkAST(ue->expr, preFunc, postFunc, data);
@@ -223,6 +227,12 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
soe->expr = (Expr *)WalkAST(soe->expr, preFunc, postFunc, data); soe->expr = (Expr *)WalkAST(soe->expr, preFunc, postFunc, data);
else if ((aoe = dynamic_cast<AddressOfExpr *>(node)) != NULL) else if ((aoe = dynamic_cast<AddressOfExpr *>(node)) != NULL)
aoe->expr = (Expr *)WalkAST(aoe->expr, preFunc, postFunc, data); aoe->expr = (Expr *)WalkAST(aoe->expr, preFunc, postFunc, data);
else if ((newe = dynamic_cast<NewExpr *>(node)) != NULL) {
newe->countExpr = (Expr *)WalkAST(newe->countExpr, preFunc,
postFunc, data);
newe->initExpr = (Expr *)WalkAST(newe->initExpr, preFunc,
postFunc, data);
}
else if (dynamic_cast<SymbolExpr *>(node) != NULL || else if (dynamic_cast<SymbolExpr *>(node) != NULL ||
dynamic_cast<ConstExpr *>(node) != NULL || dynamic_cast<ConstExpr *>(node) != NULL ||
dynamic_cast<FunctionSymbolExpr *>(node) != NULL || dynamic_cast<FunctionSymbolExpr *>(node) != NULL ||
@@ -305,3 +315,116 @@ EstimateCost(ASTNode *root) {
return cost; return cost;
} }
/** Given an AST node, check to see if it's safe if we happen to run the
code for that node with the execution mask all off.
*/
static bool
lCheckAllOffSafety(ASTNode *node, void *data) {
bool *okPtr = (bool *)data;
if (dynamic_cast<FunctionCallExpr *>(node) != NULL) {
// FIXME: If we could somehow determine that the function being
// called was safe (and all of the args Exprs were safe, then it'd
// be nice to be able to return true here. (Consider a call to
// e.g. floatbits() in the stdlib.) Unfortunately for now we just
// have to be conservative.
*okPtr = false;
return false;
}
if (dynamic_cast<AssertStmt *>(node) != NULL) {
// While it's fine to run the assert for varying tests, it's not
// desirable to check an assert on a uniform variable if all of the
// lanes are off.
*okPtr = false;
return false;
}
if (dynamic_cast<NewExpr *>(node) != NULL ||
dynamic_cast<DeleteStmt *>(node) != NULL) {
// We definitely don't want to run the uniform variants of these if
// the mask is all off. It's also worth skipping the overhead of
// executing the varying versions of them in the all-off mask case.
*okPtr = false;
return false;
}
if (g->target.allOffMaskIsSafe == true)
// Don't worry about memory accesses if we have a target that can
// safely run them with the mask all off
return true;
IndexExpr *ie;
if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL && ie->baseExpr != NULL) {
const Type *type = ie->baseExpr->GetType();
if (type == NULL)
return true;
if (dynamic_cast<const ReferenceType *>(type) != NULL)
type = type->GetReferenceTarget();
ConstExpr *ce = dynamic_cast<ConstExpr *>(ie->index);
if (ce == NULL) {
// indexing with a variable... -> not safe
*okPtr = false;
return false;
}
const PointerType *pointerType =
dynamic_cast<const PointerType *>(type);
if (pointerType != NULL) {
// pointer[index] -> can't be sure -> not safe
*okPtr = false;
return false;
}
const SequentialType *seqType =
dynamic_cast<const SequentialType *>(type);
Assert(seqType != NULL);
int nElements = seqType->GetElementCount();
if (nElements == 0) {
// Unsized array, so we can't be sure -> not safe
*okPtr = false;
return false;
}
int32_t indices[ISPC_MAX_NVEC];
int count = ce->AsInt32(indices);
for (int i = 0; i < count; ++i) {
if (indices[i] < 0 || indices[i] >= nElements) {
// Index is out of bounds -> not safe
*okPtr = false;
return false;
}
}
// All indices are in-bounds
return true;
}
MemberExpr *me;
if ((me = dynamic_cast<MemberExpr *>(node)) != NULL &&
me->dereferenceExpr) {
*okPtr = false;
return false;
}
DereferenceExpr *de;
if ((de = dynamic_cast<DereferenceExpr *>(node)) != NULL) {
const Type *exprType = de->expr->GetType();
if (dynamic_cast<const PointerType *>(exprType) != NULL) {
*okPtr = false;
return false;
}
}
return true;
}
bool
SafeToRunWithMaskAllOff(ASTNode *root) {
bool safe = true;
WalkAST(root, lCheckAllOffSafety, NULL, &safe);
return safe;
}

4
ast.h
View File

@@ -144,4 +144,8 @@ extern Stmt *TypeCheck(Stmt *);
the given root. */ the given root. */
extern int EstimateCost(ASTNode *root); extern int EstimateCost(ASTNode *root);
/** Returns true if it would be safe to run the given code with an "all
off" mask. */
extern bool SafeToRunWithMaskAllOff(ASTNode *root);
#endif // ISPC_AST_H #endif // ISPC_AST_H

View File

@@ -391,6 +391,8 @@ lSetInternalFunctions(llvm::Module *module) {
"__count_trailing_zeros_i64", "__count_trailing_zeros_i64",
"__count_leading_zeros_i32", "__count_leading_zeros_i32",
"__count_leading_zeros_i64", "__count_leading_zeros_i64",
"__delete_uniform",
"__delete_varying",
"__do_assert_uniform", "__do_assert_uniform",
"__do_assert_varying", "__do_assert_varying",
"__do_print", "__do_print",
@@ -449,6 +451,9 @@ lSetInternalFunctions(llvm::Module *module) {
"__min_varying_uint32", "__min_varying_uint32",
"__min_varying_uint64", "__min_varying_uint64",
"__movmsk", "__movmsk",
"__new_uniform",
"__new_varying32",
"__new_varying64",
"__num_cores", "__num_cores",
"__packed_load_active", "__packed_load_active",
"__packed_store_active", "__packed_store_active",
@@ -794,6 +799,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
builtins_bitcode_generic_16_length, builtins_bitcode_generic_16_length,
module, symbolTable); module, symbolTable);
break; break;
case 1:
extern unsigned char builtins_bitcode_generic_1[];
extern int builtins_bitcode_generic_1_length;
AddBitcodeToModule(builtins_bitcode_generic_1,
builtins_bitcode_generic_1_length,
module, symbolTable);
break;
default: default:
FATAL("logic error in DefineStdlib"); FATAL("logic error in DefineStdlib");
} }
@@ -829,7 +841,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
// If the user wants the standard library to be included, parse the // If the user wants the standard library to be included, parse the
// serialized version of the stdlib.ispc file to get its // serialized version of the stdlib.ispc file to get its
// definitions added. // definitions added.
if (g->target.isa == Target::GENERIC) { if (g->target.isa == Target::GENERIC&&g->target.vectorWidth!=1) { // 1 wide uses x86 stdlib
extern char stdlib_generic_code[]; extern char stdlib_generic_code[];
yy_scan_string(stdlib_generic_code); yy_scan_string(stdlib_generic_code);
yyparse(); yyparse();

935
builtins/target-generic-1.ll Executable file
View File

@@ -0,0 +1,935 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Define the standard library builtins for the NOVEC target
define(`MASK',`i32')
define(`WIDTH',`1')
include(`util.m4')
; Define some basics for a 1-wide target
stdlib_core()
packed_load_and_store()
scans()
int64minmax()
aossoa()
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; masked store
gen_masked_store(1, i8, 8)
gen_masked_store(1, i16, 16)
gen_masked_store(1, i32, 32)
gen_masked_store(1, i64, 64)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unaligned loads/loads+broadcasts
load_and_broadcast(1, i8, 8)
load_and_broadcast(1, i16, 16)
load_and_broadcast(1, i32, 32)
load_and_broadcast(1, i64, 64)
masked_load(1, i8, 8, 1)
masked_load(1, i16, 16, 2)
masked_load(1, i32, 32, 4)
masked_load(1, i64, 64, 8)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather/scatter
; define these with the macros from stdlib.m4
gen_gather(1, i8)
gen_gather(1, i16)
gen_gather(1, i32)
gen_gather(1, i64)
gen_scatter(1, i8)
gen_scatter(1, i16)
gen_scatter(1, i32)
gen_scatter(1, i64)
define <1 x i8> @__vselect_i8(<1 x i8>, <1 x i8> ,
<1 x i32> %mask) nounwind readnone alwaysinline {
; %mv = trunc <1 x i32> %mask to <1 x i8>
; %notmask = xor <1 x i8> %mv, <i8 -1>
; %cleared_old = and <1 x i8> %0, %notmask
; %masked_new = and <1 x i8> %1, %mv
; %new = or <1 x i8> %cleared_old, %masked_new
; ret <1 x i8> %new
; not doing this the easy way because of problems with LLVM's scalarizer
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
; %sel = select <1 x i1> %cmp, <1 x i8> %0, <1 x i8> %1
%m = extractelement <1 x i32> %mask, i32 0
%cmp = icmp eq i32 %m, 0
%d0 = extractelement <1 x i8> %0, i32 0
%d1 = extractelement <1 x i8> %1, i32 0
%sel = select i1 %cmp, i8 %d0, i8 %d1
%r = insertelement <1 x i8> undef, i8 %sel, i32 0
ret <1 x i8> %r
}
define <1 x i16> @__vselect_i16(<1 x i16>, <1 x i16> ,
<1 x i32> %mask) nounwind readnone alwaysinline {
; %mv = trunc <1 x i32> %mask to <1 x i16>
; %notmask = xor <1 x i16> %mv, <i16 -1>
; %cleared_old = and <1 x i16> %0, %notmask
; %masked_new = and <1 x i16> %1, %mv
; %new = or <1 x i16> %cleared_old, %masked_new
; ret <1 x i16> %new
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
; %sel = select <1 x i1> %cmp, <1 x i16> %0, <1 x i16> %1
%m = extractelement <1 x i32> %mask, i32 0
%cmp = icmp eq i32 %m, 0
%d0 = extractelement <1 x i16> %0, i32 0
%d1 = extractelement <1 x i16> %1, i32 0
%sel = select i1 %cmp, i16 %d0, i16 %d1
%r = insertelement <1 x i16> undef, i16 %sel, i32 0
ret <1 x i16> %r
; ret <1 x i16> %sel
}
define <1 x i32> @__vselect_i32(<1 x i32>, <1 x i32> ,
<1 x i32> %mask) nounwind readnone alwaysinline {
; %notmask = xor <1 x i32> %mask, <i32 -1>
; %cleared_old = and <1 x i32> %0, %notmask
; %masked_new = and <1 x i32> %1, %mask
; %new = or <1 x i32> %cleared_old, %masked_new
; ret <1 x i32> %new
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
; %sel = select <1 x i1> %cmp, <1 x i32> %0, <1 x i32> %1
; ret <1 x i32> %sel
%m = extractelement <1 x i32> %mask, i32 0
%cmp = icmp eq i32 %m, 0
%d0 = extractelement <1 x i32> %0, i32 0
%d1 = extractelement <1 x i32> %1, i32 0
%sel = select i1 %cmp, i32 %d0, i32 %d1
%r = insertelement <1 x i32> undef, i32 %sel, i32 0
ret <1 x i32> %r
}
define <1 x i64> @__vselect_i64(<1 x i64>, <1 x i64> ,
<1 x i32> %mask) nounwind readnone alwaysinline {
; %newmask = zext <1 x i32> %mask to <1 x i64>
; %notmask = xor <1 x i64> %newmask, <i64 -1>
; %cleared_old = and <1 x i64> %0, %notmask
; %masked_new = and <1 x i64> %1, %newmask
; %new = or <1 x i64> %cleared_old, %masked_new
; ret <1 x i64> %new
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
; %sel = select <1 x i1> %cmp, <1 x i64> %0, <1 x i64> %1
; ret <1 x i64> %sel
%m = extractelement <1 x i32> %mask, i32 0
%cmp = icmp eq i32 %m, 0
%d0 = extractelement <1 x i64> %0, i32 0
%d1 = extractelement <1 x i64> %1, i32 0
%sel = select i1 %cmp, i64 %d0, i64 %d1
%r = insertelement <1 x i64> undef, i64 %sel, i32 0
ret <1 x i64> %r
}
define <1 x float> @__vselect_float(<1 x float>, <1 x float>,
<1 x i32> %mask) nounwind readnone alwaysinline {
; %v0 = bitcast <1 x float> %0 to <1 x i32>
; %v1 = bitcast <1 x float> %1 to <1 x i32>
; %r = call <1 x i32> @__vselect_i32(<1 x i32> %v0, <1 x i32> %v1, <1 x i32> %mask)
; %rf = bitcast <1 x i32> %r to <1 x float>
; ret <1 x float> %rf
; %cmp = icmp eq <1 x i32> %mask, <i32 0>
; %sel = select <1 x i1> %cmp, <1 x float> %0, <1 x float> %1
; ret <1 x float> %sel
%m = extractelement <1 x i32> %mask, i32 0
%cmp = icmp eq i32 %m, 0
%d0 = extractelement <1 x float> %0, i32 0
%d1 = extractelement <1 x float> %1, i32 0
%sel = select i1 %cmp, float %d0, float %d1
%r = insertelement <1 x float> undef, float %sel, i32 0
ret <1 x float> %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; masked store
define void @__masked_store_blend_8(<1 x i8>* nocapture, <1 x i8>,
<1 x i32> %mask) nounwind alwaysinline {
%val = load <1 x i8> * %0, align 4
%newval = call <1 x i8> @__vselect_i8(<1 x i8> %val, <1 x i8> %1, <1 x i32> %mask)
store <1 x i8> %newval, <1 x i8> * %0, align 4
ret void
}
define void @__masked_store_blend_16(<1 x i16>* nocapture, <1 x i16>,
<1 x i32> %mask) nounwind alwaysinline {
%val = load <1 x i16> * %0, align 4
%newval = call <1 x i16> @__vselect_i16(<1 x i16> %val, <1 x i16> %1, <1 x i32> %mask)
store <1 x i16> %newval, <1 x i16> * %0, align 4
ret void
}
define void @__masked_store_blend_32(<1 x i32>* nocapture, <1 x i32>,
<1 x i32> %mask) nounwind alwaysinline {
%val = load <1 x i32> * %0, align 4
%newval = call <1 x i32> @__vselect_i32(<1 x i32> %val, <1 x i32> %1, <1 x i32> %mask)
store <1 x i32> %newval, <1 x i32> * %0, align 4
ret void
}
define void @__masked_store_blend_64(<1 x i64>* nocapture, <1 x i64>,
<1 x i32> %mask) nounwind alwaysinline {
%val = load <1 x i64> * %0, align 4
%newval = call <1 x i64> @__vselect_i64(<1 x i64> %val, <1 x i64> %1, <1 x i32> %mask)
store <1 x i64> %newval, <1 x i64> * %0, align 4
ret void
}
define i32 @__movmsk(<1 x i32>) nounwind readnone alwaysinline {
%item = extractelement <1 x i32> %0, i32 0
%v = lshr i32 %item, 31
ret i32 %v
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rounding
;;
;; There are not any rounding instructions in SSE2, so we have to emulate
;; the functionality with multiple instructions...
; The code for __round_* is the result of compiling the following source
; code.
;
; export float Round(float x) {
; unsigned int sign = signbits(x);
; unsigned int ix = intbits(x);
; ix ^= sign;
; x = floatbits(ix);
; x += 0x1.0p23f;
; x -= 0x1.0p23f;
; ix = intbits(x);
; ix ^= sign;
; x = floatbits(ix);
; return x;
;}
define <1 x float> @__round_varying_float(<1 x float>) nounwind readonly alwaysinline {
%float_to_int_bitcast.i.i.i.i = bitcast <1 x float> %0 to <1 x i32>
%bitop.i.i = and <1 x i32> %float_to_int_bitcast.i.i.i.i, <i32 -2147483648>
%bitop.i = xor <1 x i32> %float_to_int_bitcast.i.i.i.i, %bitop.i.i
%int_to_float_bitcast.i.i40.i = bitcast <1 x i32> %bitop.i to <1 x float>
%binop.i = fadd <1 x float> %int_to_float_bitcast.i.i40.i, <float 8.388608e+06>
%binop21.i = fadd <1 x float> %binop.i, <float -8.388608e+06>
%float_to_int_bitcast.i.i.i = bitcast <1 x float> %binop21.i to <1 x i32>
%bitop31.i = xor <1 x i32> %float_to_int_bitcast.i.i.i, %bitop.i.i
%int_to_float_bitcast.i.i.i = bitcast <1 x i32> %bitop31.i to <1 x float>
ret <1 x float> %int_to_float_bitcast.i.i.i
}
;; Similarly, for implementations of the __floor* functions below, we have the
;; bitcode from compiling the following source code...
;export float Floor(float x) {
; float y = Round(x);
; unsigned int cmp = y > x ? 0xffffffff : 0;
; float delta = -1.f;
; unsigned int idelta = intbits(delta);
; idelta &= cmp;
; delta = floatbits(idelta);
; return y + delta;
;}
define <1 x float> @__floor_varying_float(<1 x float>) nounwind readonly alwaysinline {
%calltmp.i = tail call <1 x float> @__round_varying_float(<1 x float> %0) nounwind
%bincmp.i = fcmp ogt <1 x float> %calltmp.i, %0
%val_to_boolvec32.i = sext <1 x i1> %bincmp.i to <1 x i32>
%bitop.i = and <1 x i32> %val_to_boolvec32.i, <i32 -1082130432>
%int_to_float_bitcast.i.i.i = bitcast <1 x i32> %bitop.i to <1 x float>
%binop.i = fadd <1 x float> %calltmp.i, %int_to_float_bitcast.i.i.i
ret <1 x float> %binop.i
}
;; And here is the code we compiled to get the __ceil* functions below
;
;export uniform float Ceil(uniform float x) {
; uniform float y = Round(x);
; uniform int yltx = y < x ? 0xffffffff : 0;
; uniform float delta = 1.f;
; uniform int idelta = intbits(delta);
; idelta &= yltx;
; delta = floatbits(idelta);
; return y + delta;
;}
define <1 x float> @__ceil_varying_float(<1 x float>) nounwind readonly alwaysinline {
%calltmp.i = tail call <1 x float> @__round_varying_float(<1 x float> %0) nounwind
%bincmp.i = fcmp olt <1 x float> %calltmp.i, %0
%val_to_boolvec32.i = sext <1 x i1> %bincmp.i to <1 x i32>
%bitop.i = and <1 x i32> %val_to_boolvec32.i, <i32 1065353216>
%int_to_float_bitcast.i.i.i = bitcast <1 x i32> %bitop.i to <1 x float>
%binop.i = fadd <1 x float> %calltmp.i, %int_to_float_bitcast.i.i.i
ret <1 x float> %binop.i
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rounding doubles
; expecting math lib to provide this
declare double @ceil (double) nounwind readnone
declare double @floor (double) nounwind readnone
declare double @round (double) nounwind readnone
;declare float @llvm.sqrt.f32(float %Val)
declare double @llvm.sqrt.f64(double %Val)
declare float @llvm.sin.f32(float %Val)
declare float @llvm.cos.f32(float %Val)
declare float @llvm.sqrt.f32(float %Val)
declare float @llvm.exp.f32(float %Val)
declare float @llvm.log.f32(float %Val)
declare float @llvm.pow.f32(float %f, float %e)
;; stuff that could be in builtins ...
define(`unary1to1', `
%v_0 = extractelement <1 x $1> %0, i32 0
%r_0 = call $1 $2($1 %v_0)
%ret_0 = insertelement <1 x $1> undef, $1 %r_0, i32 0
ret <1 x $1> %ret_0
')
;; dummy 1 wide vector ops
define void
@__aos_to_soa4_float1(<1 x float> %v0, <1 x float> %v1, <1 x float> %v2,
<1 x float> %v3, <1 x float> * noalias %out0,
<1 x float> * noalias %out1, <1 x float> * noalias %out2,
<1 x float> * noalias %out3) nounwind alwaysinline {
store <1 x float> %v0, <1 x float > * %out0
store <1 x float> %v1, <1 x float > * %out1
store <1 x float> %v2, <1 x float > * %out2
store <1 x float> %v3, <1 x float > * %out3
ret void
}
define void
@__soa_to_aos4_float1(<1 x float> %v0, <1 x float> %v1, <1 x float> %v2,
<1 x float> %v3, <1 x float> * noalias %out0,
<1 x float> * noalias %out1, <1 x float> * noalias %out2,
<1 x float> * noalias %out3) nounwind alwaysinline {
call void @__aos_to_soa4_float1(<1 x float> %v0, <1 x float> %v1,
<1 x float> %v2, <1 x float> %v3, <1 x float> * %out0,
<1 x float> * %out1, <1 x float> * %out2, <1 x float> * %out3)
ret void
}
define void
@__aos_to_soa3_float1(<1 x float> %v0, <1 x float> %v1,
<1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1,
<1 x float> * %out2) {
store <1 x float> %v0, <1 x float > * %out0
store <1 x float> %v1, <1 x float > * %out1
store <1 x float> %v2, <1 x float > * %out2
ret void
}
define void
@__soa_to_aos3_float1(<1 x float> %v0, <1 x float> %v1,
<1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1,
<1 x float> * %out2) {
call void @__aos_to_soa3_float1(<1 x float> %v0, <1 x float> %v1,
<1 x float> %v2, <1 x float> * %out0, <1 x float> * %out1,
<1 x float> * %out2)
ret void
}
;; end builtins
define <1 x double> @__round_varying_double(<1 x double>) nounwind readonly alwaysinline {
unary1to1(double, @round)
}
define <1 x double> @__floor_varying_double(<1 x double>) nounwind readonly alwaysinline {
unary1to1(double, @floor)
}
define <1 x double> @__ceil_varying_double(<1 x double>) nounwind readonly alwaysinline {
unary1to1(double, @ceil)
}
; To do vector integer min and max, we do the vector compare and then sign
; extend the i1 vector result to an i32 mask. The __vselect does the
; rest...
define <1 x i32> @__min_varying_int32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
%c = icmp slt <1 x i32> %0, %1
%mask = sext <1 x i1> %c to <1 x i32>
%v = call <1 x i32> @__vselect_i32(<1 x i32> %1, <1 x i32> %0, <1 x i32> %mask)
ret <1 x i32> %v
}
define i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
%c = icmp slt i32 %0, %1
%r = select i1 %c, i32 %0, i32 %1
ret i32 %r
}
define <1 x i32> @__max_varying_int32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
%c = icmp sgt <1 x i32> %0, %1
%mask = sext <1 x i1> %c to <1 x i32>
%v = call <1 x i32> @__vselect_i32(<1 x i32> %1, <1 x i32> %0, <1 x i32> %mask)
ret <1 x i32> %v
}
define i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
%c = icmp sgt i32 %0, %1
%r = select i1 %c, i32 %0, i32 %1
ret i32 %r
}
; The functions for unsigned ints are similar, just with unsigned
; comparison functions...
define <1 x i32> @__min_varying_uint32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
%c = icmp ult <1 x i32> %0, %1
%mask = sext <1 x i1> %c to <1 x i32>
%v = call <1 x i32> @__vselect_i32(<1 x i32> %1, <1 x i32> %0, <1 x i32> %mask)
ret <1 x i32> %v
}
define i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
%c = icmp ult i32 %0, %1
%r = select i1 %c, i32 %0, i32 %1
ret i32 %r
}
define <1 x i32> @__max_varying_uint32(<1 x i32>, <1 x i32>) nounwind readonly alwaysinline {
%c = icmp ugt <1 x i32> %0, %1
%mask = sext <1 x i1> %c to <1 x i32>
%v = call <1 x i32> @__vselect_i32(<1 x i32> %1, <1 x i32> %0, <1 x i32> %mask)
ret <1 x i32> %v
}
define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
%c = icmp ugt i32 %0, %1
%r = select i1 %c, i32 %0, i32 %1
ret i32 %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; horizontal ops / reductions
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
define i32 @__popcnt_int32(i32) nounwind readonly alwaysinline {
%call = call i32 @llvm.ctpop.i32(i32 %0)
ret i32 %call
}
declare i64 @llvm.ctpop.i64(i64) nounwind readnone
define i64 @__popcnt_int64(i64) nounwind readonly alwaysinline {
%call = call i64 @llvm.ctpop.i64(i64 %0)
ret i64 %call
}
define float @__reduce_add_float(<1 x float> %v) nounwind readonly alwaysinline {
%r = extractelement <1 x float> %v, i32 0
ret float %r
}
define float @__reduce_min_float(<1 x float>) nounwind readnone {
%r = extractelement <1 x float> %0, i32 0
ret float %r
}
define float @__reduce_max_float(<1 x float>) nounwind readnone {
%r = extractelement <1 x float> %0, i32 0
ret float %r
}
define i32 @__reduce_add_int32(<1 x i32> %v) nounwind readnone {
%r = extractelement <1 x i32> %v, i32 0
ret i32 %r
}
define i32 @__reduce_min_int32(<1 x i32>) nounwind readnone {
%r = extractelement <1 x i32> %0, i32 0
ret i32 %r
}
define i32 @__reduce_max_int32(<1 x i32>) nounwind readnone {
%r = extractelement <1 x i32> %0, i32 0
ret i32 %r
}
define i32 @__reduce_add_uint32(<1 x i32> %v) nounwind readnone {
%r = call i32 @__reduce_add_int32(<1 x i32> %v)
ret i32 %r
}
define i32 @__reduce_min_uint32(<1 x i32>) nounwind readnone {
%r = extractelement <1 x i32> %0, i32 0
ret i32 %r
}
define i32 @__reduce_max_uint32(<1 x i32>) nounwind readnone {
%r = extractelement <1 x i32> %0, i32 0
ret i32 %r
}
define double @__reduce_add_double(<1 x double>) nounwind readnone {
%m = extractelement <1 x double> %0, i32 0
ret double %m
}
define double @__reduce_min_double(<1 x double>) nounwind readnone {
%m = extractelement <1 x double> %0, i32 0
ret double %m
}
define double @__reduce_max_double(<1 x double>) nounwind readnone {
%m = extractelement <1 x double> %0, i32 0
ret double %m
}
define i64 @__reduce_add_int64(<1 x i64>) nounwind readnone {
%m = extractelement <1 x i64> %0, i32 0
ret i64 %m
}
define i64 @__reduce_min_int64(<1 x i64>) nounwind readnone {
%m = extractelement <1 x i64> %0, i32 0
ret i64 %m
}
define i64 @__reduce_max_int64(<1 x i64>) nounwind readnone {
%m = extractelement <1 x i64> %0, i32 0
ret i64 %m
}
define i64 @__reduce_min_uint64(<1 x i64>) nounwind readnone {
%m = extractelement <1 x i64> %0, i32 0
ret i64 %m
}
define i64 @__reduce_max_uint64(<1 x i64>) nounwind readnone {
%m = extractelement <1 x i64> %0, i32 0
ret i64 %m
}
define i1 @__reduce_equal_int32(<1 x i32> %vv, i32 * %samevalue,
<1 x i32> %mask) nounwind alwaysinline {
%v=extractelement <1 x i32> %vv, i32 0
store i32 %v, i32 * %samevalue
ret i1 true
}
define i1 @__reduce_equal_float(<1 x float> %vv, float * %samevalue,
<1 x i32> %mask) nounwind alwaysinline {
%v=extractelement <1 x float> %vv, i32 0
store float %v, float * %samevalue
ret i1 true
}
define i1 @__reduce_equal_int64(<1 x i64> %vv, i64 * %samevalue,
<1 x i32> %mask) nounwind alwaysinline {
%v=extractelement <1 x i64> %vv, i32 0
store i64 %v, i64 * %samevalue
ret i1 true
}
define i1 @__reduce_equal_double(<1 x double> %vv, double * %samevalue,
<1 x i32> %mask) nounwind alwaysinline {
%v=extractelement <1 x double> %vv, i32 0
store double %v, double * %samevalue
ret i1 true
}
; extracting/reinserting elements because I want to be able to remove vectors later on
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp
define <1 x float> @__rcp_varying_float(<1 x float>) nounwind readonly alwaysinline {
;%call = call <1 x float> @llvm.x86.sse.rcp.ps(<1 x float> %0)
; do one N-R iteration to improve precision
; float iv = __rcp_v(v);
; return iv * (2. - v * iv);
;%v_iv = fmul <1 x float> %0, %call
;%two_minus = fsub <1 x float> <float 2., float 2., float 2., float 2.>, %v_iv
;%iv_mul = fmul <1 x float> %call, %two_minus
;ret <1 x float> %iv_mul
%d = extractelement <1 x float> %0, i32 0
%r = fdiv float 1.,%d
%rv = insertelement <1 x float> undef, float %r, i32 0
ret <1 x float> %rv
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; sqrt
define <1 x float> @__sqrt_varying_float(<1 x float>) nounwind readonly alwaysinline {
;%call = call <1 x float> @llvm.x86.sse.sqrt.ps(<1 x float> %0)
;ret <1 x float> %call
%d = extractelement <1 x float> %0, i32 0
%r = call float @llvm.sqrt.f32(float %d)
%rv = insertelement <1 x float> undef, float %r, i32 0
ret <1 x float> %rv
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; rsqrt
define <1 x float> @__rsqrt_varying_float(<1 x float> %v) nounwind readonly alwaysinline {
; float is = __rsqrt_v(v);
;%is = call <1 x float> @llvm.x86.sse.rsqrt.ps(<1 x float> %v)
; Newton-Raphson iteration to improve precision
; return 0.5 * is * (3. - (v * is) * is);
;%v_is = fmul <1 x float> %v, %is
;%v_is_is = fmul <1 x float> %v_is, %is
;%three_sub = fsub <1 x float> <float 3., float 3., float 3., float 3.>, %v_is_is
;%is_mul = fmul <1 x float> %is, %three_sub
;%half_scale = fmul <1 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, %is_mul
;ret <1 x float> %half_scale
%s = call <1 x float> @__sqrt_varying_float(<1 x float> %v)
%r = call <1 x float> @__rcp_varying_float(<1 x float> %s)
ret <1 x float> %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; svml stuff
define <1 x float> @__svml_sin(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_sinf4(<1 x float> %0)
;ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm.sin.f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
unary1to1(float,@llvm.sin.f32)
}
define <1 x float> @__svml_cos(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_cosf4(<1 x float> %0)
;ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm.cos.f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
unary1to1(float, @llvm.cos.f32)
}
define void @__svml_sincos(<1 x float>, <1 x float> *, <1 x float> *) nounwind readnone alwaysinline {
; %s = call <1 x float> @__svml_sincosf4(<1 x float> * %2, <1 x float> %0)
; store <1 x float> %s, <1 x float> * %1
; ret void
%sin = call <1 x float> @__svml_sin (<1 x float> %0)
%cos = call <1 x float> @__svml_cos (<1 x float> %0)
store <1 x float> %sin, <1 x float> * %1
store <1 x float> %cos, <1 x float> * %2
ret void
}
define <1 x float> @__svml_tan(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_tanf4(<1 x float> %0)
;ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm_tan_f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
;unasry1to1(float, @llvm.tan.f32)
; UNSUPPORTED!
ret <1 x float > %0
}
define <1 x float> @__svml_atan(<1 x float>) nounwind readnone alwaysinline {
; %ret = call <1 x float> @__svml_atanf4(<1 x float> %0)
; ret <1 x float> %ret
;%r = extractelement <1 x float> %0, i32 0
;%s = call float @llvm_atan_f32(float %r)
;%rv = insertelement <1 x float> undef, float %r, i32 0
;ret <1 x float> %rv
;unsary1to1(float,@llvm.atan.f32)
;UNSUPPORTED!
ret <1 x float > %0
}
define <1 x float> @__svml_atan2(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_atan2f4(<1 x float> %0, <1 x float> %1)
;ret <1 x float> %ret
;%y = extractelement <1 x float> %0, i32 0
;%x = extractelement <1 x float> %1, i32 0
;%q = fdiv float %y, %x
;%a = call float @llvm.atan.f32 (float %q)
;%rv = insertelement <1 x float> undef, float %a, i32 0
;ret <1 x float> %rv
; UNSUPPORTED!
ret <1 x float > %0
}
define <1 x float> @__svml_exp(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_expf4(<1 x float> %0)
;ret <1 x float> %ret
unary1to1(float, @llvm.exp.f32)
}
define <1 x float> @__svml_log(<1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_logf4(<1 x float> %0)
;ret <1 x float> %ret
unary1to1(float, @llvm.log.f32)
}
define <1 x float> @__svml_pow(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
;%ret = call <1 x float> @__svml_powf4(<1 x float> %0, <1 x float> %1)
;ret <1 x float> %ret
%r = extractelement <1 x float> %0, i32 0
%e = extractelement <1 x float> %1, i32 0
%s = call float @llvm.pow.f32(float %r,float %e)
%rv = insertelement <1 x float> undef, float %s, i32 0
ret <1 x float> %rv
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; float min/max
define <1 x float> @__max_varying_float(<1 x float>, <1 x float>) nounwind readonly alwaysinline {
; %call = call <1 x float> @llvm.x86.sse.max.ps(<1 x float> %0, <1 x float> %1)
; ret <1 x float> %call
%a = extractelement <1 x float> %0, i32 0
%b = extractelement <1 x float> %1, i32 0
%d = fcmp ogt float %a, %b
%r = select i1 %d, float %a, float %b
%rv = insertelement <1 x float> undef, float %r, i32 0
ret <1 x float> %rv
}
define <1 x float> @__min_varying_float(<1 x float>, <1 x float>) nounwind readonly alwaysinline {
; %call = call <1 x float> @llvm.x86.sse.min.ps(<1 x float> %0, <1 x float> %1)
; ret <1 x float> %call
%a = extractelement <1 x float> %0, i32 0
%b = extractelement <1 x float> %1, i32 0
%d = fcmp olt float %a, %b
%r = select i1 %d, float %a, float %b
%rv = insertelement <1 x float> undef, float %r, i32 0
ret <1 x float> %rv
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; double precision sqrt
;declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
define <1 x double> @__sqrt_varying_double(<1 x double>) nounwind alwaysinline {
;unarya2to4(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
;ret <1 x double> %ret
unary1to1(double, @llvm.sqrt.f64)
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; double precision min/max
;declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
;declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
define <1 x double> @__min_varying_double(<1 x double>, <1 x double>) nounwind readnone {
;binarsy2to4(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
;ret <1 x double> %ret
%a = extractelement <1 x double> %0, i32 0
%b = extractelement <1 x double> %1, i32 0
%d = fcmp olt double %a, %b
%r = select i1 %d, double %a, double %b
%rv = insertelement <1 x double> undef, double %r, i32 0
ret <1 x double> %rv
}
define <1 x double> @__max_varying_double(<1 x double>, <1 x double>) nounwind readnone {
;binary2sto4(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
;ret <1 x double> %ret
%a = extractelement <1 x double> %0, i32 0
%b = extractelement <1 x double> %1, i32 0
%d = fcmp ogt double %a, %b
%r = select i1 %d, double %a, double %b
%rv = insertelement <1 x double> undef, double %r, i32 0
ret <1 x double> %rv
}
define float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
; uniform float iv = extract(__rcp_u(v), 0);
; return iv * (2. - v * iv);
%r = fdiv float 1.,%0
ret float %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rounding floats
define float @__round_uniform_float(float) nounwind readonly alwaysinline {
; roundss, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
; the roundss intrinsic is a total mess--docs say:
;
; __m128 _mm_round_ss (__m128 a, __m128 b, const int c)
;
; b is a 128-bit parameter. The lowest 32 bits are the result of the rounding function
; on b0. The higher order 96 bits are copied directly from input parameter a. The
; return value is described by the following equations:
;
; r0 = RND(b0)
; r1 = a1
; r2 = a2
; r3 = a3
;
; It doesn't matter what we pass as a, since we only need the r0 value
; here. So we pass the same register for both.
%v = insertelement<1 x float> undef, float %0, i32 0
%rv = call <1 x float> @__round_varying_float(<1 x float> %v)
%r=extractelement <1 x float> %rv, i32 0
ret float %r
}
define float @__floor_uniform_float(float) nounwind readonly alwaysinline {
%v = insertelement<1 x float> undef, float %0, i32 0
%rv = call <1 x float> @__floor_varying_float(<1 x float> %v)
%r=extractelement <1 x float> %rv, i32 0
ret float %r
}
define float @__ceil_uniform_float(float) nounwind readonly alwaysinline {
%v = insertelement<1 x float> undef, float %0, i32 0
%rv = call <1 x float> @__ceil_varying_float(<1 x float> %v)
%r=extractelement <1 x float> %rv, i32 0
ret float %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rounding doubles
define double @__round_uniform_double(double) nounwind readonly alwaysinline {
%rs=call double @round(double %0)
ret double %rs
}
define double @__floor_uniform_double(double) nounwind readonly alwaysinline {
%rs = call double @floor(double %0)
ret double %rs
}
define double @__ceil_uniform_double(double) nounwind readonly alwaysinline {
%rs = call double @ceil(double %0)
ret double %rs
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; sqrt
define float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
%ret = call float @llvm.sqrt.f32(float %0)
ret float %ret
}
define double @__sqrt_uniform_double(double) nounwind readonly alwaysinline {
%ret = call double @llvm.sqrt.f64(double %0)
ret double %ret
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rsqrt
define float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
%s = call float @__sqrt_uniform_float(float %0)
%r = call float @__rcp_uniform_float(float %s)
ret float %r
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; fastmath
define void @__fastmath() nounwind alwaysinline {
; no-op
ret void
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; float min/max
define float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
%d = fcmp ogt float %0, %1
%r = select i1 %d, float %0, float %1
ret float %r
}
define float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
%d = fcmp olt float %0, %1
%r = select i1 %d, float %0, float %1
ret float %r
}
define double @__max_uniform_double(double, double) nounwind readonly alwaysinline {
%d = fcmp ogt double %0, %1
%r = select i1 %d, double %0, double %1
ret double %r
}
define double @__min_uniform_double(double, double) nounwind readonly alwaysinline {
%d = fcmp olt double %0, %1
%r = select i1 %d, double %0, double %1
ret double %r
}
define_shuffles()
ctlztz()
define_prefetches()
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone

View File

@@ -98,6 +98,14 @@ declare void @__aos_to_soa4_float(float * noalias %p, <WIDTH x float> * noalias
<WIDTH x float> * noalias %out2, <WIDTH x float> * noalias %out2,
<WIDTH x float> * noalias %out3) nounwind <WIDTH x float> * noalias %out3) nounwind
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; half conversion routines
declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; math ;; math

View File

@@ -1805,10 +1805,69 @@ ok:
ret void ret void
} }
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; new/delete
declare i8 * @malloc(i64)
declare void @free(i8 *)
define i8 * @__new_uniform(i64 %size) {
%a = call i8 * @malloc(i64 %size)
ret i8 * %a
}
define <WIDTH x i64> @__new_varying32(<WIDTH x i32> %size, <WIDTH x MASK> %mask) {
%ret = alloca <WIDTH x i64>
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %ret
%ret64 = bitcast <WIDTH x i64> * %ret to i64 *
per_lane(WIDTH, <WIDTH x MASK> %mask, `
%sz_LANE_ID = extractelement <WIDTH x i32> %size, i32 LANE
%sz64_LANE_ID = zext i32 %sz_LANE_ID to i64
%ptr_LANE_ID = call i8 * @malloc(i64 %sz64_LANE_ID)
%ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64
%store_LANE_ID = getelementptr i64 * %ret64, i32 LANE
store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID')
%r = load <WIDTH x i64> * %ret
ret <WIDTH x i64> %r
}
define <WIDTH x i64> @__new_varying64(<WIDTH x i64> %size, <WIDTH x MASK> %mask) {
%ret = alloca <WIDTH x i64>
store <WIDTH x i64> zeroinitializer, <WIDTH x i64> * %ret
%ret64 = bitcast <WIDTH x i64> * %ret to i64 *
per_lane(WIDTH, <WIDTH x MASK> %mask, `
%sz_LANE_ID = extractelement <WIDTH x i64> %size, i32 LANE
%ptr_LANE_ID = call i8 * @malloc(i64 %sz_LANE_ID)
%ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64
%store_LANE_ID = getelementptr i64 * %ret64, i32 LANE
store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID')
%r = load <WIDTH x i64> * %ret
ret <WIDTH x i64> %r
}
define void @__delete_uniform(i8 * %ptr) {
call void @free(i8 * %ptr)
ret void
}
define void @__delete_varying(<WIDTH x i64> %ptr, <WIDTH x MASK> %mask) {
per_lane(WIDTH, <WIDTH x MASK> %mask, `
%iptr_LANE_ID = extractelement <WIDTH x i64> %ptr, i32 LANE
%ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to i8 *
call void @free(i8 * %ptr_LANE_ID)
')
ret void
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; read hw clock ;; read hw clock
define i64 @__clock() nounwind uwtable ssp { define i64 @__clock() nounwind {
entry: entry:
tail call void asm sideeffect "xorl %eax,%eax \0A cpuid", "~{rax},~{rbx},~{rcx},~{rdx},~{dirflag},~{fpsr},~{flags}"() nounwind tail call void asm sideeffect "xorl %eax,%eax \0A cpuid", "~{rax},~{rbx},~{rcx},~{rdx},~{dirflag},~{fpsr},~{flags}"() nounwind
%0 = tail call { i32, i32 } asm sideeffect "rdtsc", "={ax},={dx},~{dirflag},~{fpsr},~{flags}"() nounwind %0 = tail call { i32, i32 } asm sideeffect "rdtsc", "={ax},={dx},~{dirflag},~{fpsr},~{flags}"() nounwind
@@ -2187,9 +2246,9 @@ return:
define(`gen_masked_store', ` define(`gen_masked_store', `
define void @__masked_store_$3(<$1 x $2>* nocapture, <$1 x $2>, <$1 x i32>) nounwind alwaysinline { define void @__masked_store_$3(<$1 x $2>* nocapture, <$1 x $2>, <$1 x i32>) nounwind alwaysinline {
per_lane($1, <$1 x i32> %2, ` per_lane($1, <$1 x i32> %2, `
%ptr_ID = getelementptr <$1 x $2> * %0, i32 0, i32 LANE %ptr_LANE_ID = getelementptr <$1 x $2> * %0, i32 0, i32 LANE
%storeval_ID = extractelement <$1 x $2> %1, i32 LANE %storeval_LANE_ID = extractelement <$1 x $2> %1, i32 LANE
store $2 %storeval_ID, $2 * %ptr_ID') store $2 %storeval_LANE_ID, $2 * %ptr_LANE_ID')
ret void ret void
} }
') ')
@@ -2644,7 +2703,7 @@ pl_known_mask:
pl_all_on: pl_all_on:
;; the mask is all on--just expand the code for each lane sequentially ;; the mask is all on--just expand the code for each lane sequentially
forloop(i, 0, eval($1-1), forloop(i, 0, eval($1-1),
`patsubst(`$3', `ID\|LANE', i)') `patsubst(`$3', `LANE', i)')
br label %pl_done br label %pl_done
pl_unknown_mask: pl_unknown_mask:
@@ -2806,11 +2865,11 @@ define <$1 x $2> @__gather32_$2(<$1 x i32> %ptrs,
<$1 x i32> %vecmask) nounwind readonly alwaysinline { <$1 x i32> %vecmask) nounwind readonly alwaysinline {
%ret_ptr = alloca <$1 x $2> %ret_ptr = alloca <$1 x $2>
per_lane($1, <$1 x i32> %vecmask, ` per_lane($1, <$1 x i32> %vecmask, `
%iptr_ID = extractelement <$1 x i32> %ptrs, i32 LANE %iptr_LANE_ID = extractelement <$1 x i32> %ptrs, i32 LANE
%ptr_ID = inttoptr i32 %iptr_ID to $2 * %ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $2 *
%val_ID = load $2 * %ptr_ID %val_LANE_ID = load $2 * %ptr_LANE_ID
%store_ptr_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE %store_ptr_LANE_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
store $2 %val_ID, $2 * %store_ptr_ID store $2 %val_LANE_ID, $2 * %store_ptr_LANE_ID
') ')
%ret = load <$1 x $2> * %ret_ptr %ret = load <$1 x $2> * %ret_ptr
@@ -2822,11 +2881,11 @@ define <$1 x $2> @__gather64_$2(<$1 x i64> %ptrs,
<$1 x i32> %vecmask) nounwind readonly alwaysinline { <$1 x i32> %vecmask) nounwind readonly alwaysinline {
%ret_ptr = alloca <$1 x $2> %ret_ptr = alloca <$1 x $2>
per_lane($1, <$1 x i32> %vecmask, ` per_lane($1, <$1 x i32> %vecmask, `
%iptr_ID = extractelement <$1 x i64> %ptrs, i32 LANE %iptr_LANE_ID = extractelement <$1 x i64> %ptrs, i32 LANE
%ptr_ID = inttoptr i64 %iptr_ID to $2 * %ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $2 *
%val_ID = load $2 * %ptr_ID %val_LANE_ID = load $2 * %ptr_LANE_ID
%store_ptr_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE %store_ptr_LANE_ID = getelementptr <$1 x $2> * %ret_ptr, i32 0, i32 LANE
store $2 %val_ID, $2 * %store_ptr_ID store $2 %val_LANE_ID, $2 * %store_ptr_LANE_ID
') ')
%ret = load <$1 x $2> * %ret_ptr %ret = load <$1 x $2> * %ret_ptr
@@ -2910,10 +2969,10 @@ define void @__scatter_base_offsets64_$2(i8* %base, <$1 x i64> %offsets, i32 %of
define void @__scatter32_$2(<$1 x i32> %ptrs, <$1 x $2> %values, define void @__scatter32_$2(<$1 x i32> %ptrs, <$1 x $2> %values,
<$1 x i32> %mask) nounwind alwaysinline { <$1 x i32> %mask) nounwind alwaysinline {
per_lane($1, <$1 x i32> %mask, ` per_lane($1, <$1 x i32> %mask, `
%iptr_ID = extractelement <$1 x i32> %ptrs, i32 LANE %iptr_LANE_ID = extractelement <$1 x i32> %ptrs, i32 LANE
%ptr_ID = inttoptr i32 %iptr_ID to $2 * %ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $2 *
%val_ID = extractelement <$1 x $2> %values, i32 LANE %val_LANE_ID = extractelement <$1 x $2> %values, i32 LANE
store $2 %val_ID, $2 * %ptr_ID store $2 %val_LANE_ID, $2 * %ptr_LANE_ID
') ')
ret void ret void
} }
@@ -2922,10 +2981,10 @@ define void @__scatter32_$2(<$1 x i32> %ptrs, <$1 x $2> %values,
define void @__scatter64_$2(<$1 x i64> %ptrs, <$1 x $2> %values, define void @__scatter64_$2(<$1 x i64> %ptrs, <$1 x $2> %values,
<$1 x i32> %mask) nounwind alwaysinline { <$1 x i32> %mask) nounwind alwaysinline {
per_lane($1, <$1 x i32> %mask, ` per_lane($1, <$1 x i32> %mask, `
%iptr_ID = extractelement <$1 x i64> %ptrs, i32 LANE %iptr_LANE_ID = extractelement <$1 x i64> %ptrs, i32 LANE
%ptr_ID = inttoptr i64 %iptr_ID to $2 * %ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $2 *
%val_ID = extractelement <$1 x $2> %values, i32 LANE %val_LANE_ID = extractelement <$1 x $2> %values, i32 LANE
store $2 %val_ID, $2 * %ptr_ID store $2 %val_LANE_ID, $2 * %ptr_LANE_ID
') ')
ret void ret void
} }

View File

@@ -2114,7 +2114,8 @@ bool CWriter::doInitialization(Module &M) {
I->getName() == "memset" || I->getName() == "memset_pattern16" || I->getName() == "memset" || I->getName() == "memset_pattern16" ||
I->getName() == "puts" || I->getName() == "puts" ||
I->getName() == "printf" || I->getName() == "putchar" || I->getName() == "printf" || I->getName() == "putchar" ||
I->getName() == "fflush") I->getName() == "fflush" || I->getName() == "malloc" ||
I->getName() == "free")
continue; continue;
// Don't redeclare ispc's own intrinsics // Don't redeclare ispc's own intrinsics
@@ -3437,6 +3438,9 @@ void CWriter::visitCallInst(CallInst &I) {
Callee = RF; Callee = RF;
} }
if (Callee->getName() == "malloc")
Out << "(uint8_t *)";
if (NeedsCast) { if (NeedsCast) {
// Ok, just cast the pointer type. // Ok, just cast the pointer type.
Out << "(("; Out << "((";

13
ctx.cpp
View File

@@ -642,12 +642,12 @@ FunctionEmitContext::inSwitchStatement() const {
void void
FunctionEmitContext::Break(bool doCoherenceCheck) { FunctionEmitContext::Break(bool doCoherenceCheck) {
Assert(controlFlowInfo.size() > 0);
if (breakTarget == NULL) { if (breakTarget == NULL) {
Error(currentPos, "\"break\" statement is illegal outside of " Error(currentPos, "\"break\" statement is illegal outside of "
"for/while/do loops and \"switch\" statements."); "for/while/do loops and \"switch\" statements.");
return; return;
} }
Assert(controlFlowInfo.size() > 0);
if (bblock == NULL) if (bblock == NULL)
return; return;
@@ -721,6 +721,7 @@ FunctionEmitContext::Continue(bool doCoherenceCheck) {
"for/while/do/foreach loops."); "for/while/do/foreach loops.");
return; return;
} }
Assert(controlFlowInfo.size() > 0);
if (ifsInCFAllUniform(CFInfo::Loop) || GetInternalMask() == LLVMMaskAllOn) { if (ifsInCFAllUniform(CFInfo::Loop) || GetInternalMask() == LLVMMaskAllOn) {
// Similarly to 'break' statements, we can immediately jump to the // Similarly to 'break' statements, we can immediately jump to the
@@ -1279,7 +1280,11 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
llvm::Value * llvm::Value *
FunctionEmitContext::GetStringPtr(const std::string &str) { FunctionEmitContext::GetStringPtr(const std::string &str) {
#ifdef LLVM_3_1svn
llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str);
#else
llvm::Constant *lstr = llvm::ConstantArray::get(*g->ctx, str); llvm::Constant *lstr = llvm::ConstantArray::get(*g->ctx, str);
#endif
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage; llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage;
llvm::Value *lstrPtr = new llvm::GlobalVariable(*m->module, lstr->getType(), llvm::Value *lstrPtr = new llvm::GlobalVariable(*m->module, lstr->getType(),
true /*isConst*/, true /*isConst*/,
@@ -1329,7 +1334,11 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
static llvm::Value * static llvm::Value *
lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) { lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
#ifdef LLVM_3_1svn
llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s);
#else
llvm::Constant *sConstant = llvm::ConstantArray::get(*g->ctx, s); llvm::Constant *sConstant = llvm::ConstantArray::get(*g->ctx, s);
#endif
llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(), llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(),
true /* const */, true /* const */,
llvm::GlobalValue::InternalLinkage, llvm::GlobalValue::InternalLinkage,
@@ -2923,7 +2932,7 @@ FunctionEmitContext::SyncInst() {
/** When we gathering from or scattering to a varying atomic type, we need /** When we gathering from or scattering to a varying atomic type, we need
to add an appropraite offset to the final address for each lane right to add an appropriate offset to the final address for each lane right
before we use it. Given a varying pointer we're about to use and its before we use it. Given a varying pointer we're about to use and its
type, this function determines whether these offsets are needed and type, this function determines whether these offsets are needed and
returns an updated pointer that incorporates these offsets if needed. returns an updated pointer that incorporates these offsets if needed.

View File

@@ -113,6 +113,12 @@ DeclSpecs::DeclSpecs(const Type *t, StorageClass sc, int tq) {
const Type * const Type *
DeclSpecs::GetBaseType(SourcePos pos) const { DeclSpecs::GetBaseType(SourcePos pos) const {
const Type *bt = baseType; const Type *bt = baseType;
if (bt == NULL) {
Warning(pos, "No type specified in declaration. Assuming int32.");
bt = AtomicType::UnboundInt32;
}
if (vectorSize > 0) { if (vectorSize > 0) {
const AtomicType *atomicType = dynamic_cast<const AtomicType *>(bt); const AtomicType *atomicType = dynamic_cast<const AtomicType *>(bt);
if (atomicType == NULL) { if (atomicType == NULL) {
@@ -171,6 +177,11 @@ Declarator::Declarator(DeclaratorKind dk, SourcePos p)
void void
Declarator::InitFromDeclSpecs(DeclSpecs *ds) { Declarator::InitFromDeclSpecs(DeclSpecs *ds) {
const Type *t = GetType(ds); const Type *t = GetType(ds);
if (t == NULL) {
Assert(m->errorCount > 0);
return;
}
Symbol *sym = GetSymbol(); Symbol *sym = GetSymbol();
if (sym != NULL) { if (sym != NULL) {
sym->type = t; sym->type = t;
@@ -248,8 +259,10 @@ Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *funArgs) {
// already have been added to the symbol table by AddGlobal() by the // already have been added to the symbol table by AddGlobal() by the
// time we get here.) // time we get here.)
Symbol *funSym = m->symbolTable->LookupFunction(declSym->name.c_str(), type); Symbol *funSym = m->symbolTable->LookupFunction(declSym->name.c_str(), type);
if (funSym != NULL) if (funSym == NULL)
// May be NULL due to error earlier in compilation // May be NULL due to error earlier in compilation
Assert(m->errorCount > 0);
else
funSym->pos = pos; funSym->pos = pos;
// Walk down to the declarator for the function. (We have to get past // Walk down to the declarator for the function. (We have to get past
@@ -262,11 +275,18 @@ Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *funArgs) {
for (unsigned int i = 0; i < d->functionParams.size(); ++i) { for (unsigned int i = 0; i < d->functionParams.size(); ++i) {
Symbol *sym = d->GetSymbolForFunctionParameter(i); Symbol *sym = d->GetSymbolForFunctionParameter(i);
sym->type = sym->type->ResolveUnboundVariability(Type::Varying); if (sym->type == NULL) {
Assert(m->errorCount > 0);
continue;
}
else
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
funArgs->push_back(sym); funArgs->push_back(sym);
} }
funSym->type = funSym->type->ResolveUnboundVariability(Type::Varying); if (funSym != NULL)
funSym->type = funSym->type->ResolveUnboundVariability(Type::Varying);
return funSym; return funSym;
} }
@@ -331,6 +351,16 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
break; break;
case DK_ARRAY: case DK_ARRAY:
if (type == AtomicType::Void) {
Error(pos, "Arrays of \"void\" type are illegal.");
return NULL;
}
if (dynamic_cast<const ReferenceType *>(type)) {
Error(pos, "Arrays of references (type \"%s\") are illegal.",
type->GetString().c_str());
return NULL;
}
type = new ArrayType(type, arraySize); type = new ArrayType(type, arraySize);
if (child) if (child)
return child->GetType(type, ds); return child->GetType(type, ds);
@@ -357,6 +387,11 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
"function parameter declaration for parameter \"%s\".", "function parameter declaration for parameter \"%s\".",
lGetStorageClassName(d->declSpecs->storageClass), lGetStorageClassName(d->declSpecs->storageClass),
sym->name.c_str()); sym->name.c_str());
if (sym->type == AtomicType::Void) {
Error(sym->pos, "Parameter with type \"void\" illegal in function "
"parameter list.");
sym->type = NULL;
}
const ArrayType *at = dynamic_cast<const ArrayType *>(sym->type); const ArrayType *at = dynamic_cast<const ArrayType *>(sym->type);
if (at != NULL) { if (at != NULL) {
@@ -368,8 +403,12 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
// report this differently than it was originally declared // report this differently than it was originally declared
// in the function, but it's not clear that this is a // in the function, but it's not clear that this is a
// significant problem.) // significant problem.)
sym->type = PointerType::GetUniform(at->GetElementType()); if (at->GetElementType() == NULL) {
Assert(m->errorCount > 0);
return NULL;
}
sym->type = PointerType::GetUniform(at->GetElementType());
// Make sure there are no unsized arrays (other than the // Make sure there are no unsized arrays (other than the
// first dimension) in function parameter lists. // first dimension) in function parameter lists.
at = dynamic_cast<const ArrayType *>(at->GetElementType()); at = dynamic_cast<const ArrayType *>(at->GetElementType());
@@ -413,6 +452,10 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
Error(pos, "No return type provided in function declaration."); Error(pos, "No return type provided in function declaration.");
return NULL; return NULL;
} }
if (dynamic_cast<const FunctionType *>(returnType) != NULL) {
Error(pos, "Illegal to return function type from function.");
return NULL;
}
bool isExported = ds && (ds->storageClass == SC_EXPORT); bool isExported = ds && (ds->storageClass == SC_EXPORT);
bool isExternC = ds && (ds->storageClass == SC_EXTERN_C); bool isExternC = ds && (ds->storageClass == SC_EXTERN_C);
@@ -434,6 +477,11 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
return NULL; return NULL;
} }
if (child == NULL) {
Assert(m->errorCount > 0);
return NULL;
}
const Type *functionType = const Type *functionType =
new FunctionType(returnType, args, argNames, argDefaults, new FunctionType(returnType, args, argNames, argDefaults,
argPos, isTask, isExported, isExternC); argPos, isTask, isExported, isExternC);
@@ -536,14 +584,23 @@ Declaration::GetVariableDeclarations() const {
for (unsigned int i = 0; i < declarators.size(); ++i) { for (unsigned int i = 0; i < declarators.size(); ++i) {
Declarator *decl = declarators[i]; Declarator *decl = declarators[i];
if (decl == NULL) if (decl == NULL) {
// Ignore earlier errors // Ignore earlier errors
Assert(m->errorCount > 0);
continue; continue;
}
Symbol *sym = decl->GetSymbol(); Symbol *sym = decl->GetSymbol();
if (sym == NULL || sym->type == NULL) {
// Ignore errors
Assert(m->errorCount > 0);
continue;
}
sym->type = sym->type->ResolveUnboundVariability(Type::Varying); sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
if (dynamic_cast<const FunctionType *>(sym->type) == NULL) { if (sym->type == AtomicType::Void)
Error(sym->pos, "\"void\" type variable illegal in declaration.");
else if (dynamic_cast<const FunctionType *>(sym->type) == NULL) {
m->symbolTable->AddVariable(sym); m->symbolTable->AddVariable(sym);
vars.push_back(VariableDeclaration(sym, decl->initExpr)); vars.push_back(VariableDeclaration(sym, decl->initExpr));
} }
@@ -558,11 +615,18 @@ Declaration::DeclareFunctions() {
for (unsigned int i = 0; i < declarators.size(); ++i) { for (unsigned int i = 0; i < declarators.size(); ++i) {
Declarator *decl = declarators[i]; Declarator *decl = declarators[i];
if (decl == NULL) if (decl == NULL) {
// Ignore earlier errors // Ignore earlier errors
Assert(m->errorCount > 0);
continue; continue;
}
Symbol *sym = decl->GetSymbol(); Symbol *sym = decl->GetSymbol();
if (sym == NULL || sym->type == NULL) {
// Ignore errors
Assert(m->errorCount > 0);
continue;
}
sym->type = sym->type->ResolveUnboundVariability(Type::Varying); sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
if (dynamic_cast<const FunctionType *>(sym->type) == NULL) if (dynamic_cast<const FunctionType *>(sym->type) == NULL)
@@ -610,6 +674,9 @@ GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
Symbol *sym = d->GetSymbol(); Symbol *sym = d->GetSymbol();
if (sym->type == AtomicType::Void)
Error(d->pos, "\"void\" type illegal for struct member.");
const ArrayType *arrayType = const ArrayType *arrayType =
dynamic_cast<const ArrayType *>(sym->type); dynamic_cast<const ArrayType *>(sym->type);
if (arrayType != NULL && arrayType->GetElementCount() == 0) { if (arrayType != NULL && arrayType->GetElementCount() == 0) {

View File

@@ -1,3 +1,42 @@
=== v1.1.4 === (4 February 2012)
There are two major bugfixes for Windows in this release. First, a number
of failures in AVX code generation on Windows have been fixed; AVX on
Windows now has no known issues. Second, a longstanding bug in parsing 64-bit
integer constants on Windows has been fixed.
This release features a new experimental scalar target, contributed by Gabe
Weisz <gweisz@cs.cmu.edu>. This target ("--target=generic-1") compiles
gangs of single program instances (i.e. programCount == 1); it can be
useful for debugging ispc programs.
The compiler now supports dynamic memory allocation in ispc programs (with
"new" and "delete" operators based on C++). See
http://ispc.github.com/ispc.html#dynamic-memory-allocation in the
documentation for more information.
ispc now performs "short circuit" evaluation of the || and && logical
operators and the ? : selection operator. (This represents the correction
of a major incompatibility with C.) Code like "(index < arraySize &&
array[index] == 1)" thus now executes as in C, where "array[index]" won't
be evaluated unless "index" is less than "arraySize".
The standard library now provides "local" atomic operations, which are
atomic across the gang of program instances (but not across other gangs or
other hardware threads. See the updated documentation on atomics for more
information:
http://ispc.github.com/ispc.html#atomic-operations-and-memory-fences.
The standard library now offers a clock() function, which returns a uniform
int64 value that counts processor cycles; it can be used for
fine-resolution timing measurements.
Finally (of limited interest now): ispc now supports the forthcoming AVX2
instruction set, due with Haswell-generation CPUs. All tests and examples
compile and execute correctly with AVX2. (Thanks specifically to Craig
Topper and Nadav Rotem for work on AVX2 support in LLVM, which made this
possible.)
=== v1.1.3 === (20 January 2012) === v1.1.3 === (20 January 2012)
With this release, the language now supports "switch" statements, with the With this release, the language now supports "switch" statements, with the

View File

@@ -96,6 +96,9 @@ Contents:
+ `Declarations and Initializers`_ + `Declarations and Initializers`_
+ `Expressions`_ + `Expressions`_
* `Dynamic Memory Allocation`_
+ `Control Flow`_ + `Control Flow`_
* `Conditional Statements: "if"`_ * `Conditional Statements: "if"`_
@@ -1148,6 +1151,7 @@ in C:
* Structs and arrays * Structs and arrays
* Support for recursive function calls * Support for recursive function calls
* Support for separate compilation of source files * Support for separate compilation of source files
* "Short-circuit" evaluation of ``||``, ``&&`` and ``? :`` operators
* The preprocessor * The preprocessor
``ispc`` adds a number of features from C++ and C99 to this base: ``ispc`` adds a number of features from C++ and C99 to this base:
@@ -1162,6 +1166,7 @@ in C:
* The ``inline`` qualifier to indicate that a function should be inlined * The ``inline`` qualifier to indicate that a function should be inlined
* Function overloading by parameter type * Function overloading by parameter type
* Hexadecimal floating-point constants * Hexadecimal floating-point constants
* Dynamic memory allocation with ``new`` and ``delete``.
``ispc`` also adds a number of new features that aren't in C89, C99, or ``ispc`` also adds a number of new features that aren't in C89, C99, or
C++: C++:
@@ -1180,7 +1185,6 @@ C++:
There are a number of features of C89 that are not supported in ``ispc`` There are a number of features of C89 that are not supported in ``ispc``
but are likely to be supported in future releases: but are likely to be supported in future releases:
* Short circuiting of logical operations
* There are no types named ``char``, ``short``, or ``long`` (or ``long * There are no types named ``char``, ``short``, or ``long`` (or ``long
double``). However, there are built-in ``int8``, ``int16``, and double``). However, there are built-in ``int8``, ``int16``, and
``int64`` types ``int64`` types
@@ -1965,19 +1969,137 @@ operator also work as expected.
(*fp).a = 0; (*fp).a = 0;
fp->b = 1; fp->b = 1;
As in C and C++, evaluation of the ``||`` and ``&&`` logical operators as
well as the selection operator ``? :`` is "short-circuited"; the right hand
side won't be evaluated if the value from the left-hand side determines the
logical operator's value. For example, in the following code,
``array[index]`` won't be evaluated for values of ``index`` that are
greater than or equal to ``NUM_ITEMS``.
::
if (index < NUM_ITEMS && array[index] > 0) {
// ...
}
Dynamic Memory Allocation
-------------------------
``ispc`` programs can dynamically allocate (and free) memory, using syntax
based on C++'s ``new`` and ``delete`` operators:
::
int count = ...;
int *ptr = new uniform int[count];
// use ptr...
delete[] ptr;
In the above code, each program instance allocates its own ``count`-sized
array of ``uniform int`` values, uses that memory, and then deallocates
that memory. Uses of ``new`` and ``delete`` in ``ispc`` programs are
serviced by corresponding calls the system C library's ``malloc()`` and
``free()`` functions.
After a pointer has been deleted, it is illegal to access the memory it
points to. However, note that deletion happens on a per-program-instance
basis. In other words, consider the following code:
::
int *ptr = new uniform int[count];
// use ptr
if (count > 1000)
delete[] ptr;
// ...
Here, the program instances where ``count`` is greater than 1000 have
deleted the dynamically allocated memory pointed to by ``ptr``, but the
other program instances have not. As such, it's illegal for the former set
of program instances to access ``*ptr``, but it's perfectly fine for the
latter set to continue to use the memory ``ptr`` points to. Note that it
is illegal to delete a pointer value returned by ``new`` more than one
time.
Sometimes, it's useful to be able to do a single allocation for the entire
gang of program instances. A ``new`` statement can be qualified with
``uniform`` to indicate a single memory allocation:
::
float * uniform ptr = uniform new float[10];
While a regular call to ``new`` returns a ``varying`` pointer (i.e. a
distinct pointer to separately-allocated memory for each program instance),
a ``uniform new`` performs a single allocation and returns a ``uniform``
pointer.
When using ``uniform new``, it's important to be aware of a subtlety; if
the returned pointer is stored in a varying pointer variable (as may be
appropriate and useful for the particular program being written), then the
varying pointer may inadvertently be passed to a subsequent ``delete``
statement, which is an error: effectively
::
float *ptr = uniform new float[10];
// use ptr...
delete ptr; // ERROR: varying pointer is deleted
In this case, ``ptr`` will be deleted multiple times, once for each
executing program instance, which is an error (unless it happens that only
a single program instance is active in the above code.)
When using ``new`` statements, it's important to make an appropriate choice
of ``uniform`` or ``varying`` (as always, the default), for both the
``new`` operator itself as well as the type of data being allocated, based
on the program's needs. Consider the following four memory allocations:
::
uniform float * uniform p1 = uniform new uniform float[10];
float * uniform p2 = uniform new float[10];
uniform float * p3 = new uniform float[10];
float * p4 = new float[10];
Assuming that a ``float`` is 4 bytes in memory and if the gang size is 8
program instances, then the first allocation represents a single allocation
of 40 bytes, the second is a single allocation of 8*4*10 = 320 bytes, the
third is 8 allocations of 40 bytes, and the last performs 8 allocations of
80 bytes each.
Note in particular that varying allocations of varying data types are rarely
desirable in practice. In that case, each program instance is performing a
separate allocation of ``varying float`` memory. In this case, it's likely
that the program instances will only access a single element of each
``varying float``, which is wasteful.
Although ``ispc`` doesn't support constructors or destructors like C++, it
is possible to provide initializer values with ``new`` statements:
::
struct Point { float x, y, z; };
Point *pptr = new Point(10, 20, 30);
Here for example, the "x" element of the returned ``Point`` is initialized
to have the value 10 and so forth. In general, the rules for how
initializer values provided in ``new`` statements are used to initialize
complex data types follow the same rules as initializers for variables
described in `Declarations and Initializers`_.
Control Flow Control Flow
------------ ------------
``ispc`` supports most of C's control flow constructs, including ``if``, ``ispc`` supports most of C's control flow constructs, including ``if``,
``for``, ``while``, ``do``. It also supports variants of C's control flow ``switch``, ``for``, ``while``, ``do``. It has limited support for
``goto``, detailed below. It also supports variants of C's control flow
constructs that provide hints about the expected runtime coherence of the constructs that provide hints about the expected runtime coherence of the
control flow at that statement. It also provides parallel looping control flow at that statement. It also provides parallel looping
constructs, ``foreach`` and ``foreach_tiled``, all of which will be constructs, ``foreach`` and ``foreach_tiled``, all of which will be
detailed in this section. detailed in this section.
``ispc`` does not currently support ``switch`` statements or ``goto``.
Conditional Statements: "if" Conditional Statements: "if"
---------------------------- ----------------------------
@@ -3267,24 +3389,53 @@ Systems Programming Support
Atomic Operations and Memory Fences Atomic Operations and Memory Fences
----------------------------------- -----------------------------------
The usual range of atomic memory operations are provided in ``ispc``, The standard range of atomic memory operations are provided by the standard
including variants to handle both uniform and varying types. As a first library``ispc``, including variants to handle both uniform and varying
example, consider on variant of the 32-bit integer atomic add routine: types as well as "local" and "global" atomics.
Local atomics provide atomic behavior across the program instances in a
gang, but not across multiple gangs or memory operations in different
hardware threads. To see why they are needed, consider a histogram
calculation where each program instance in the gang computes which bucket a
value lies in and then increments a corresponding counter. If the code is
written like this:
:: ::
int32 atomic_add_global(uniform int32 * uniform ptr, int32 delta) uniform int count[N_BUCKETS] = ...;
float value = ...;
int bucket = clamp(value / N_BUCKETS, 0, N_BUCKETS);
++count[bucket]; // ERROR: undefined behavior if collisions
The semantics are the expected ones for an atomic add function: the pointer then the program's behavior is undefined: whenever multiple program
points to a single location in memory (the same one for all program instances have values that map to the same value of ``bucket``, then the
instances), and for each executing program instance, the value stored in effect of the increment is undefined. (See the discussion in the `Data
the location that ``ptr`` points to has that program instance's value Races Within a Gang`_ section; in the case here, there isn't a sequence
"delta" added to it atomically, and the old value at that location is point between one program instance updating ``count[bucket]`` and the other
returned from the function. (Thus, if multiple processors simultaneously program instance reading its value.)
issue atomic adds to the same memory location, the adds will be serialized
by the hardware so that the correct result is computed in the end. The ``atomic_add_local()`` function can be used in this case; as a local
Furthermore, the atomic adds are serialized across the running program atomic it is atomic across the gang of program instances, such that the
instances.) expected result is computed.
::
...
int bucket = clamp(value / N_BUCKETS, 0, N_BUCKETS);
atomic_add_local(&count[bucket], 1);
It uses this variant of the 32-bit integer atomic add routine:
::
int32 atomic_add_local(uniform int32 * uniform ptr, int32 delta)
The semantics of this routine are typical for an atomic add function: the
pointer here points to a single location in memory (the same one for all
program instances), and for each executing program instance, the value
stored in the location that ``ptr`` points to has that program instance's
value "delta" added to it atomically, and the old value at that location is
returned from the function.
One thing to note is that that the type of the value being added to a One thing to note is that that the type of the value being added to a
``uniform`` integer, while the increment amount and the return value are ``uniform`` integer, while the increment amount and the return value are
@@ -3295,45 +3446,76 @@ atomics for the running program instances may be issued in arbitrary order;
it's not guaranteed that they will be issued in ``programIndex`` order, for it's not guaranteed that they will be issued in ``programIndex`` order, for
example. example.
Here are the declarations of the ``int32`` variants of these functions. Global atomics are more powerful than local atomics; they are atomic across
There are also ``int64`` equivalents as well as variants that take both the program instances in the gang as well as atomic across different
``unsigned`` ``int32`` and ``int64`` values. (The ``atomic_swap_global()`` gangs and different hardware threads. For example, for the global variant
function can be used with ``float`` and ``double`` types as well.) of the atomic used above,
:: ::
int32 atomic_add_global(uniform int32 * uniform ptr, int32 value) int32 atomic_add_global(uniform int32 * uniform ptr, int32 delta)
int32 atomic_subtract_global(uniform int32 * uniform ptr, int32 value)
int32 atomic_min_global(uniform int32 * uniform ptr, int32 value)
int32 atomic_max_global(uniform int32 * uniform ptr, int32 value)
int32 atomic_and_global(uniform int32 * uniform ptr, int32 value)
int32 atomic_or_global(uniform int32 * uniform ptr, int32 value)
int32 atomic_xor_global(uniform int32 * uniform ptr, int32 value)
int32 atomic_swap_global(uniform int32 * uniform ptr, int32 value)
There are also variants of these functions that take ``uniform`` values for if multiple processors simultaneously issue atomic adds to the same memory
the operand and return a ``uniform`` result. These correspond to a single location, the adds will be serialized by the hardware so that the correct
result is computed in the end.
Here are the declarations of the ``int32`` variants of these functions.
There are also ``int64`` equivalents as well as variants that take
``unsigned`` ``int32`` and ``int64`` values.
::
int32 atomic_add_{local,global}(uniform int32 * uniform ptr, int32 value)
int32 atomic_subtract_{local,global}(uniform int32 * uniform ptr, int32 value)
int32 atomic_min_{local,global}(uniform int32 * uniform ptr, int32 value)
int32 atomic_max_{local,global}(uniform int32 * uniform ptr, int32 value)
int32 atomic_and_{local,global}(uniform int32 * uniform ptr, int32 value)
int32 atomic_or_{local,global}(uniform int32 * uniform ptr, int32 value)
int32 atomic_xor_{local,global}(uniform int32 * uniform ptr, int32 value)
int32 atomic_swap_{local,global}(uniform int32 * uniform ptr, int32 value)
Support for ``float`` and ``double`` types is also available. For local
atomics, all but the logical operations are available. (There are
corresponding ``double`` variants of these, not listed here.)
::
float atomic_add_local(uniform float * uniform ptr, float value)
float atomic_subtract_local(uniform float * uniform ptr, float value)
float atomic_min_local(uniform float * uniform ptr, float value)
float atomic_max_local(uniform float * uniform ptr, float value)
float atomic_swap_local(uniform float * uniform ptr, float value)
For global atomics, only atomic swap is available for these types:
::
float atomic_swap_global(uniform float * uniform ptr, float value)
double atomic_swap_global(uniform double * uniform ptr, double value)
There are also variants of the atomic that take ``uniform`` values for the
operand and return a ``uniform`` result. These correspond to a single
atomic operation being performed for the entire gang of program instances, atomic operation being performed for the entire gang of program instances,
rather than one per program instance. rather than one per program instance.
:: ::
uniform int32 atomic_add_global(uniform int32 * uniform ptr, uniform int32 atomic_add_{local,global}(uniform int32 * uniform ptr,
uniform int32 value) uniform int32 value)
uniform int32 atomic_subtract_global(uniform int32 * uniform ptr, uniform int32 atomic_subtract_{local,global}(uniform int32 * uniform ptr,
uniform int32 value) uniform int32 value)
uniform int32 atomic_min_global(uniform int32 * uniform ptr, uniform int32 atomic_min_{local,global}(uniform int32 * uniform ptr,
uniform int32 value) uniform int32 value)
uniform int32 atomic_max_global(uniform int32 * uniform ptr, uniform int32 atomic_max_{local,global}(uniform int32 * uniform ptr,
uniform int32 value) uniform int32 value)
uniform int32 atomic_and_global(uniform int32 * uniform ptr, uniform int32 atomic_and_{local,global}(uniform int32 * uniform ptr,
uniform int32 value) uniform int32 value)
uniform int32 atomic_or_global(uniform int32 * uniform ptr, uniform int32 atomic_or_{local,global}(uniform int32 * uniform ptr,
uniform int32 value) uniform int32 value)
uniform int32 atomic_xor_global(uniform int32 * uniform ptr, uniform int32 atomic_xor_{local,global}(uniform int32 * uniform ptr,
uniform int32 value) uniform int32 value)
uniform int32 atomic_swap_global(uniform int32 * uniform ptr, uniform int32 atomic_swap_{local,global}(uniform int32 * uniform ptr,
uniform int32 newval) uniform int32 newval)
Be careful that you use the atomic function that you mean to; consider the Be careful that you use the atomic function that you mean to; consider the
following code: following code:
@@ -3357,8 +3539,7 @@ will cause the desired atomic add function to be called.
:: ::
extern uniform int32 counter; extern uniform int32 counter;
int32 one = 1; int32 myCounter = atomic_add_global(&counter, (varying int32)1);
int32 myCounter = atomic_add_global(&counter, one);
There is a third variant of each of these atomic functions that takes a There is a third variant of each of these atomic functions that takes a
``varying`` pointer; this allows each program instance to issue an atomic ``varying`` pointer; this allows each program instance to issue an atomic
@@ -3368,30 +3549,27 @@ the same location in memory!)
:: ::
int32 atomic_add_global(uniform int32 * varying ptr, int32 value) int32 atomic_add_{local,global}(uniform int32 * varying ptr, int32 value)
int32 atomic_subtract_global(uniform int32 * varying ptr, int32 value) int32 atomic_subtract_{local,global}(uniform int32 * varying ptr, int32 value)
int32 atomic_min_global(uniform int32 * varying ptr, int32 value) int32 atomic_min_{local,global}(uniform int32 * varying ptr, int32 value)
int32 atomic_max_global(uniform int32 * varying ptr, int32 value) int32 atomic_max_{local,global}(uniform int32 * varying ptr, int32 value)
int32 atomic_and_global(uniform int32 * varying ptr, int32 value) int32 atomic_and_{local,global}(uniform int32 * varying ptr, int32 value)
int32 atomic_or_global(uniform int32 * varying ptr, int32 value) int32 atomic_or_{local,global}(uniform int32 * varying ptr, int32 value)
int32 atomic_xor_global(uniform int32 * varying ptr, int32 value) int32 atomic_xor_{local,global}(uniform int32 * varying ptr, int32 value)
int32 atomic_swap_global(uniform int32 * varying ptr, int32 value) int32 atomic_swap_{local,global}(uniform int32 * varying ptr, int32 value)
There are also atomic swap and "compare and exchange" functions. There are also atomic "compare and exchange" functions. Compare and
Compare and exchange atomically compares the value in "val" to exchange atomically compares the value in "val" to "compare"--if they
"compare"--if they match, it assigns "newval" to "val". In either case, match, it assigns "newval" to "val". In either case, the old value of
the old value of "val" is returned. (As with the other atomic operations, "val" is returned. (As with the other atomic operations, there are also
there are also ``unsigned`` and 64-bit variants of this function. ``unsigned`` and 64-bit variants of this function. Furthermore, there are
Furthermore, there are ``float`` and ``double`` variants as well.) ``float`` and ``double`` variants as well.)
:: ::
int32 atomic_swap_global(uniform int32 * uniform ptr, int32 newvalue) int32 atomic_compare_exchange_{local,global}(uniform int32 * uniform ptr,
uniform int32 atomic_swap_global(uniform int32 * uniform ptr, int32 compare, int32 newval)
uniform int32 newvalue) uniform int32 atomic_compare_exchange_{local,global}(uniform int32 * uniform ptr,
int32 atomic_compare_exchange_global(uniform int32 * uniform ptr,
int32 compare, int32 newval)
uniform int32 atomic_compare_exchange_global(uniform int32 * uniform ptr,
uniform int32 compare, uniform int32 newval) uniform int32 compare, uniform int32 newval)
``ispc`` also has a standard library routine that inserts a memory barrier ``ispc`` also has a standard library routine that inserts a memory barrier

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
# This could be handy for archiving the generated documentation or # This could be handy for archiving the generated documentation or
# if some version control system is used. # if some version control system is used.
PROJECT_NUMBER = 1.1.3 PROJECT_NUMBER = 1.1.4
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute) # The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put. # base path where the generated documentation will be put.

View File

@@ -212,104 +212,44 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
RNGState rngstate; RNGState rngstate;
seed_rng(&rngstate, y0); seed_rng(&rngstate, y0);
float invSamples = 1.f / nsubsamples;
// Compute the mapping between the 'programCount'-wide program foreach_tiled(y = y0 ... y1, x = 0 ... w,
// instances running in parallel and samples in the image. u = 0 ... nsubsamples, v = 0 ... nsubsamples) {
// float du = (float)u * invSamples, dv = (float)v * invSamples;
// For now, we'll always take four samples per pixel, so start by
// initializing du and dv with offsets into subpixel samples. We'll
// take care of further updating du and dv for the case where we're
// doing more than 4 program instances in parallel shortly.
uniform float uSteps[4] = { 0, 1, 0, 1 };
uniform float vSteps[4] = { 0, 0, 1, 1 };
float du = uSteps[programIndex % 4] / nsubsamples;
float dv = vSteps[programIndex % 4] / nsubsamples;
// Now handle the case where we are able to do more than one pixel's // Figure out x,y pixel in NDC
// worth of work at once. nx records the number of pixels in the x float px = (x + du - (w / 2.0f)) / (w / 2.0f);
// direction we do per iteration and ny the number in y. float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
uniform int nx = 1, ny = 1; float ret = 0.f;
Ray ray;
Isect isect;
// FIXME: We actually need ny to be 1 regardless of the decomposition, ray.org = 0.f;
// since the task decomposition is one scanline high.
if (programCount == 8) { // Poor man's perspective projection
// Do two pixels at once in the x direction ray.dir.x = px;
nx = 2; ray.dir.y = py;
if (programIndex >= 4) ray.dir.z = -1.0;
// And shift the offsets for the second pixel's worth of work vnormalize(ray.dir);
++du;
}
else if (programCount == 16) {
nx = 4;
ny = 1;
if (programIndex >= 4 && programIndex < 8)
++du;
if (programIndex >= 8 && programIndex < 12)
du += 2;
if (programIndex >= 12)
du += 3;
}
// Now loop over all of the pixels, stepping in x and y as calculated isect.t = 1.0e+17;
// above. (Assumes that ny divides y and nx divides x...) isect.hit = 0;
for (uniform int y = y0; y < y1; y += ny) {
for (uniform int x = 0; x < w; x += nx) {
// Figure out x,y pixel in NDC
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
float ret = 0.f;
Ray ray;
Isect isect;
ray.org = 0.f; for (uniform int snum = 0; snum < 3; ++snum)
ray_sphere_intersect(isect, ray, spheres[snum]);
ray_plane_intersect(isect, ray, plane);
// Poor man's perspective projection // Note use of 'coherent' if statement; the set of rays we
ray.dir.x = px; // trace will often all hit or all miss the scene
ray.dir.y = py; cif (isect.hit) {
ray.dir.z = -1.0; ret = ambient_occlusion(isect, plane, spheres, rngstate);
vnormalize(ray.dir); ret *= invSamples * invSamples;
isect.t = 1.0e+17; int offset = 3 * (y * w + x);
isect.hit = 0; atomic_add_local(&image[offset], ret);
atomic_add_local(&image[offset+1], ret);
for (uniform int snum = 0; snum < 3; ++snum) atomic_add_local(&image[offset+2], ret);
ray_sphere_intersect(isect, ray, spheres[snum]);
ray_plane_intersect(isect, ray, plane);
// Note use of 'coherent' if statement; the set of rays we
// trace will often all hit or all miss the scene
cif (isect.hit)
ret = ambient_occlusion(isect, plane, spheres, rngstate);
// This is a little grungy; we have results for
// programCount-worth of values. Because we're doing 2x2
// subsamples, we need to peel them off in groups of four,
// average the four values for each pixel, and update the
// output image.
//
// Store the varying value to a uniform array of the same size.
// See the discussion about communication among program
// instances in the ispc user's manual for more discussion on
// this idiom.
uniform float retArray[programCount];
retArray[programIndex] = ret;
// offset to the first pixel in the image
uniform int offset = 3 * (y * w + x);
for (uniform int p = 0; p < programCount; p += 4, offset += 3) {
// Get the four sample values for this pixel
uniform float sumret = retArray[p] + retArray[p+1] + retArray[p+2] +
retArray[p+3];
// Normalize by number of samples taken
sumret /= nsubsamples * nsubsamples;
// Store result in the image
image[offset+0] = sumret;
image[offset+1] = sumret;
image[offset+2] = sumret;
}
} }
} }
} }

View File

@@ -14,7 +14,7 @@ CPP_OBJS=$(addprefix objs/, $(CPP_SRC:.cpp=.o) $(TASK_OBJ))
default: $(EXAMPLE) default: $(EXAMPLE)
all: $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16 all: $(EXAMPLE) $(EXAMPLE)-sse4 $(EXAMPLE)-generic16 $(EXAMPLE)-scalar
.PHONY: dirs clean .PHONY: dirs clean
@@ -57,3 +57,9 @@ objs/$(ISPC_SRC:.ispc=)_generic16.o: objs/$(ISPC_SRC:.ispc=)_generic16.cpp
$(EXAMPLE)-generic16: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_generic16.o $(EXAMPLE)-generic16: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_generic16.o
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS) $(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)
objs/$(ISPC_SRC:.ispc=)_scalar.o: $(ISPC_SRC)
$(ISPC) $< -o $@ --target=generic-1
$(EXAMPLE)-scalar: $(CPP_OBJS) objs/$(ISPC_SRC:.ispc=)_scalar.o
$(CXX) $(CXXFLAGS) -o $@ $^ $(LIBS)

View File

@@ -158,38 +158,22 @@ IntersectLightsWithTileMinMax(
uniform float gBufferScale_x = 0.5f * (float)gBufferWidth; uniform float gBufferScale_x = 0.5f * (float)gBufferWidth;
uniform float gBufferScale_y = 0.5f * (float)gBufferHeight; uniform float gBufferScale_y = 0.5f * (float)gBufferHeight;
// Parallize across frustum planes. uniform float frustumPlanes_xy[4] = {
// We really only have four side planes here, but write the code to -(cameraProj_11 * gBufferScale_x),
// handle programCount > 4 robustly (cameraProj_11 * gBufferScale_x),
uniform float frustumPlanes_xy[programCount]; (cameraProj_22 * gBufferScale_y),
uniform float frustumPlanes_z[programCount]; -(cameraProj_22 * gBufferScale_y) };
uniform float frustumPlanes_z[4] = {
tileEndX - gBufferScale_x,
-tileStartX + gBufferScale_x,
tileEndY - gBufferScale_y,
-tileStartY + gBufferScale_y };
// TODO: If programIndex < 4 here? Don't care about masking off the for (uniform int i = 0; i < 4; ++i) {
// rest but if interleaving ("x2" modes) the other lanes should ideally uniform float norm = rsqrt(frustumPlanes_xy[i] * frustumPlanes_xy[i] +
// not be emitted... frustumPlanes_z[i] * frustumPlanes_z[i]);
{ frustumPlanes_xy[i] *= norm;
// This one is totally constant over the whole screen... worth pulling it up at all? frustumPlanes_z[i] *= norm;
float frustumPlanes_xy_v;
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 0, -(cameraProj_11 * gBufferScale_x));
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 1, (cameraProj_11 * gBufferScale_x));
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 2, (cameraProj_22 * gBufferScale_y));
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 3, -(cameraProj_22 * gBufferScale_y));
float frustumPlanes_z_v;
frustumPlanes_z_v = insert(frustumPlanes_z_v, 0, tileEndX - gBufferScale_x);
frustumPlanes_z_v = insert(frustumPlanes_z_v, 1, -tileStartX + gBufferScale_x);
frustumPlanes_z_v = insert(frustumPlanes_z_v, 2, tileEndY - gBufferScale_y);
frustumPlanes_z_v = insert(frustumPlanes_z_v, 3, -tileStartY + gBufferScale_y);
// Normalize
float norm = rsqrt(frustumPlanes_xy_v * frustumPlanes_xy_v +
frustumPlanes_z_v * frustumPlanes_z_v);
frustumPlanes_xy_v *= norm;
frustumPlanes_z_v *= norm;
// Save out for uniform use later
frustumPlanes_xy[programIndex] = frustumPlanes_xy_v;
frustumPlanes_z[programIndex] = frustumPlanes_z_v;
} }
uniform int32 tileNumLights = 0; uniform int32 tileNumLights = 0;
@@ -601,30 +585,20 @@ SplitTileMinMax(
uniform float gBufferScale_x = 0.5f * (float)gBufferWidth; uniform float gBufferScale_x = 0.5f * (float)gBufferWidth;
uniform float gBufferScale_y = 0.5f * (float)gBufferHeight; uniform float gBufferScale_y = 0.5f * (float)gBufferHeight;
// Parallize across frustum planes uniform float frustumPlanes_xy[2] = { -(cameraProj_11 * gBufferScale_x),
// Only have 2 frustum split planes here so may not be worth it, but (cameraProj_22 * gBufferScale_y) };
// we'll do it for now for consistency uniform float frustumPlanes_z[2] = { tileMidX - gBufferScale_x,
uniform float frustumPlanes_xy[programCount]; tileMidY - gBufferScale_y };
uniform float frustumPlanes_z[programCount];
// This one is totally constant over the whole screen... worth pulling it up at all?
float frustumPlanes_xy_v;
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 0, -(cameraProj_11 * gBufferScale_x));
frustumPlanes_xy_v = insert(frustumPlanes_xy_v, 1, (cameraProj_22 * gBufferScale_y));
float frustumPlanes_z_v;
frustumPlanes_z_v = insert(frustumPlanes_z_v, 0, tileMidX - gBufferScale_x);
frustumPlanes_z_v = insert(frustumPlanes_z_v, 1, tileMidY - gBufferScale_y);
// Normalize // Normalize
float norm = rsqrt(frustumPlanes_xy_v * frustumPlanes_xy_v + uniform float norm[2] = { rsqrt(frustumPlanes_xy[0] * frustumPlanes_xy[0] +
frustumPlanes_z_v * frustumPlanes_z_v); frustumPlanes_z[0] * frustumPlanes_z[0]),
frustumPlanes_xy_v *= norm; rsqrt(frustumPlanes_xy[1] * frustumPlanes_xy[1] +
frustumPlanes_z_v *= norm; frustumPlanes_z[1] * frustumPlanes_z[1]) };
frustumPlanes_xy[0] *= norm[0];
// Save out for uniform use later frustumPlanes_xy[1] *= norm[1];
frustumPlanes_xy[programIndex] = frustumPlanes_xy_v; frustumPlanes_z[0] *= norm[0];
frustumPlanes_z[programIndex] = frustumPlanes_z_v; frustumPlanes_z[1] *= norm[1];
// Initialize // Initialize
uniform int32 subtileLightOffset[4]; uniform int32 subtileLightOffset[4];

View File

@@ -1106,7 +1106,7 @@ GATHER_GENERAL(__vec16_i64, int64_t, __vec16_i64, __gather64_i64)
// scatter // scatter
#define SCATTER_BASE_VARYINGOFFSET(VTYPE, STYPE, OTYPE, FUNC) \ #define SCATTER_BASE_OFFSETS(VTYPE, STYPE, OTYPE, FUNC) \
static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \ static FORCEINLINE void FUNC(unsigned char *b, OTYPE varyingOffset, \
uint32_t scale, OTYPE constOffset, \ uint32_t scale, OTYPE constOffset, \
VTYPE val, __vec16_i1 mask) { \ VTYPE val, __vec16_i1 mask) { \

View File

@@ -941,10 +941,8 @@ static FORCEINLINE __vec4_i1 __not_equal(__vec4_i32 a, __vec4_i32 b) {
} }
static FORCEINLINE __vec4_i1 __unsigned_less_equal(__vec4_i32 a, __vec4_i32 b) { static FORCEINLINE __vec4_i1 __unsigned_less_equal(__vec4_i32 a, __vec4_i32 b) {
a.v = _mm_xor_si128(a.v, _mm_set1_epi32(0x80000000)); // a<=b == (min(a,b) == a)
b.v = _mm_xor_si128(b.v, _mm_set1_epi32(0x80000000)); return _mm_cmpeq_epi32(_mm_min_epu32(a.v, b.v), a.v);
return _mm_or_si128(_mm_cmplt_epi32(a.v, b.v),
_mm_cmpeq_epi32(a.v, b.v));
} }
static FORCEINLINE __vec4_i1 __signed_less_equal(__vec4_i32 a, __vec4_i32 b) { static FORCEINLINE __vec4_i1 __signed_less_equal(__vec4_i32 a, __vec4_i32 b) {
@@ -953,10 +951,8 @@ static FORCEINLINE __vec4_i1 __signed_less_equal(__vec4_i32 a, __vec4_i32 b) {
} }
static FORCEINLINE __vec4_i1 __unsigned_greater_equal(__vec4_i32 a, __vec4_i32 b) { static FORCEINLINE __vec4_i1 __unsigned_greater_equal(__vec4_i32 a, __vec4_i32 b) {
a.v = _mm_xor_si128(a.v, _mm_set1_epi32(0x80000000)); // a>=b == (max(a,b) == a)
b.v = _mm_xor_si128(b.v, _mm_set1_epi32(0x80000000)); return _mm_cmpeq_epi32(_mm_max_epu32(a.v, b.v), a.v);
return _mm_or_si128(_mm_cmpgt_epi32(a.v, b.v),
_mm_cmpeq_epi32(a.v, b.v));
} }
static FORCEINLINE __vec4_i1 __signed_greater_equal(__vec4_i32 a, __vec4_i32 b) { static FORCEINLINE __vec4_i1 __signed_greater_equal(__vec4_i32 a, __vec4_i32 b) {

View File

@@ -273,7 +273,7 @@ lAtomicCompareAndSwapPointer(void **v, void *newValue, void *oldValue) {
#else #else
void *result; void *result;
#if (ISPC_POINTER_BYTES == 4) #if (ISPC_POINTER_BYTES == 4)
__asm__ __volatile__("lock\ncmpxchgd %2,%1" __asm__ __volatile__("lock\ncmpxchgl %2,%1"
: "=a"(result), "=m"(*v) : "=a"(result), "=m"(*v)
: "q"(newValue), "0"(oldValue) : "q"(newValue), "0"(oldValue)
: "memory"); : "memory");

973
expr.cpp

File diff suppressed because it is too large Load Diff

53
expr.h
View File

@@ -388,6 +388,10 @@ public:
with values given by the "vales" parameter. */ with values given by the "vales" parameter. */
ConstExpr(ConstExpr *old, double *values); ConstExpr(ConstExpr *old, double *values);
/** Create ConstExpr with the same type and values as the given one,
but at the given position. */
ConstExpr(ConstExpr *old, SourcePos pos);
llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetValue(FunctionEmitContext *ctx) const;
const Type *GetType() const; const Type *GetType() const;
void Print() const; void Print() const;
@@ -680,11 +684,44 @@ public:
const Type *GetType() const; const Type *GetType() const;
Expr *TypeCheck(); Expr *TypeCheck();
Expr *Optimize(); Expr *Optimize();
llvm::Constant *GetConstant(const Type *type) const;
void Print() const; void Print() const;
int EstimateCost() const; int EstimateCost() const;
}; };
/** An expression representing a "new" expression, used for dynamically
allocating memory.
*/
class NewExpr : public Expr {
public:
NewExpr(int typeQual, const Type *type, Expr *initializer, Expr *count,
SourcePos tqPos, SourcePos p);
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
const Type *GetType() const;
Expr *TypeCheck();
Expr *Optimize();
void Print() const;
int EstimateCost() const;
/** Type of object to allocate storage for. */
const Type *allocType;
/** Expression giving the number of elements to allocate, when the
"new Foo[expr]" form is used. This may be NULL, in which case a
single element of the given type will be allocated. */
Expr *countExpr;
/** Optional initializer expression used to initialize the allocated
memory. */
Expr *initExpr;
/** Indicates whether this is a "varying new" or "uniform new"
(i.e. whether a separate allocation is performed per program
instance, or whether a single allocation is performed for the
entire gang of program instances.) */
bool isVarying;
};
/** This function indicates whether it's legal to convert from fromType to /** This function indicates whether it's legal to convert from fromType to
toType. If the optional errorMsgBase and source position parameters toType. If the optional errorMsgBase and source position parameters
are provided, then an error message is issued if the type conversion are provided, then an error message is issued if the type conversion
@@ -703,4 +740,20 @@ bool CanConvertTypes(const Type *fromType, const Type *toType,
*/ */
Expr *TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase); Expr *TypeConvertExpr(Expr *expr, const Type *toType, const char *errorMsgBase);
/** Utility routine that emits code to initialize a symbol given an
initializer expression.
@param lvalue Memory location of storage for the symbol's data
@param symName Name of symbol (used in error messages)
@param symType Type of variable being initialized
@param initExpr Expression for the initializer
@param ctx FunctionEmitContext to use for generating instructions
@param pos Source file position of the variable being initialized
*/
void
InitSymbol(llvm::Value *lvalue, const Type *symType, Expr *initExpr,
FunctionEmitContext *ctx, SourcePos pos);
bool PossiblyResolveFunctionOverloads(Expr *expr, const Type *type);
#endif // ISPC_EXPR_H #endif // ISPC_EXPR_H

View File

@@ -185,6 +185,14 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->allOffMaskIsSafe = true; t->allOffMaskIsSafe = true;
t->maskBitCount = 1; t->maskBitCount = 1;
} }
else if (!strcasecmp(isa, "generic-1")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 1;
t->vectorWidth = 1;
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) #if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
else if (!strcasecmp(isa, "avx")) { else if (!strcasecmp(isa, "avx")) {
t->isa = Target::AVX; t->isa = Target::AVX;
@@ -270,7 +278,7 @@ Target::SupportedTargetISAs() {
#ifdef LLVM_3_1svn #ifdef LLVM_3_1svn
", avx2, avx2-x2" ", avx2, avx2-x2"
#endif // LLVM_3_1svn #endif // LLVM_3_1svn
", generic-4, generic-8, generic-16"; ", generic-4, generic-8, generic-16, generic-1";
} }
@@ -502,10 +510,13 @@ Globals::Globals() {
debugPrint = false; debugPrint = false;
disableWarnings = false; disableWarnings = false;
warningsAsErrors = false; warningsAsErrors = false;
quiet = false;
disableLineWrap = false; disableLineWrap = false;
emitPerfWarnings = true; emitPerfWarnings = true;
emitInstrumentation = false; emitInstrumentation = false;
generateDebuggingSymbols = false; generateDebuggingSymbols = false;
enableFuzzTest = false;
fuzzTestSeed = -1;
mangleFunctionsWithTarget = false; mangleFunctionsWithTarget = false;
ctx = new llvm::LLVMContext; ctx = new llvm::LLVMContext;

17
ispc.h
View File

@@ -388,6 +388,9 @@ struct Globals {
possible performance pitfalls. */ possible performance pitfalls. */
bool emitPerfWarnings; bool emitPerfWarnings;
/** Indicates whether all printed output should be surpressed. */
bool quiet;
/** Indicates whether calls should be emitted in the program to an /** Indicates whether calls should be emitted in the program to an
externally-defined program instrumentation function. (See the externally-defined program instrumentation function. (See the
"Instrumenting your ispc programs" section in the user's "Instrumenting your ispc programs" section in the user's
@@ -402,6 +405,14 @@ struct Globals {
vector width to them. */ vector width to them. */
bool mangleFunctionsWithTarget; bool mangleFunctionsWithTarget;
/** If enabled, the lexer will randomly replace some tokens returned
with other tokens, in order to test error condition handling in the
compiler. */
bool enableFuzzTest;
/** Seed for random number generator used for fuzz testing. */
int fuzzTestSeed;
/** Global LLVMContext object */ /** Global LLVMContext object */
llvm::LLVMContext *ctx; llvm::LLVMContext *ctx;
@@ -412,12 +423,17 @@ struct Globals {
/** Arguments to pass along to the C pre-processor, if it is run on the /** Arguments to pass along to the C pre-processor, if it is run on the
program before compilation. */ program before compilation. */
std::vector<std::string> cppArgs; std::vector<std::string> cppArgs;
/** Additional user-provided directories to search when processing
#include directives in the preprocessor. */
std::vector<std::string> includePath;
}; };
enum { enum {
COST_ASSIGN = 1, COST_ASSIGN = 1,
COST_COHERENT_BREAK_CONTINE = 4, COST_COHERENT_BREAK_CONTINE = 4,
COST_COMPLEX_ARITH_OP = 4, COST_COMPLEX_ARITH_OP = 4,
COST_DELETE = 32,
COST_DEREF = 4, COST_DEREF = 4,
COST_FUNCALL = 4, COST_FUNCALL = 4,
COST_FUNPTR_UNIFORM = 12, COST_FUNPTR_UNIFORM = 12,
@@ -425,6 +441,7 @@ enum {
COST_GATHER = 8, COST_GATHER = 8,
COST_GOTO = 4, COST_GOTO = 4,
COST_LOAD = 2, COST_LOAD = 2,
COST_NEW = 32,
COST_REGULAR_BREAK_CONTINUE = 2, COST_REGULAR_BREAK_CONTINUE = 2,
COST_RETURN = 4, COST_RETURN = 4,
COST_SELECT = 4, COST_SELECT = 4,

View File

@@ -25,6 +25,7 @@
<ClCompile Include="gen-bitcode-c-32.cpp" /> <ClCompile Include="gen-bitcode-c-32.cpp" />
<ClCompile Include="gen-bitcode-c-64.cpp" /> <ClCompile Include="gen-bitcode-c-64.cpp" />
<ClCompile Include="gen-bitcode-dispatch.cpp" /> <ClCompile Include="gen-bitcode-dispatch.cpp" />
<ClCompile Include="gen-bitcode-generic-1.cpp" />
<ClCompile Include="gen-bitcode-generic-4.cpp" /> <ClCompile Include="gen-bitcode-generic-4.cpp" />
<ClCompile Include="gen-bitcode-generic-8.cpp" /> <ClCompile Include="gen-bitcode-generic-8.cpp" />
<ClCompile Include="gen-bitcode-generic-16.cpp" /> <ClCompile Include="gen-bitcode-generic-16.cpp" />
@@ -211,6 +212,19 @@
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx2-x2.cpp</Message> <Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx2-x2.cpp</Message>
</CustomBuild> </CustomBuild>
</ItemGroup> </ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-generic-1.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll &gt; gen-bitcode-generic-1.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-1.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-1.ll | python bitcode2cpp.py builtins\target-generic-1.ll &gt; gen-bitcode-generic-1.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-1.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-1.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-1.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup> <ItemGroup>
<CustomBuild Include="builtins\target-generic-4.ll"> <CustomBuild Include="builtins\target-generic-4.ll">
<FileType>Document</FileType> <FileType>Document</FileType>

512
lex.ll
View File

@@ -50,20 +50,275 @@ static void lStringConst(YYSTYPE *, SourcePos *);
static double lParseHexFloat(const char *ptr); static double lParseHexFloat(const char *ptr);
#define YY_USER_ACTION \ #define YY_USER_ACTION \
yylloc->first_line = yylloc->last_line; \ yylloc.first_line = yylloc.last_line; \
yylloc->first_column = yylloc->last_column; \ yylloc.first_column = yylloc.last_column; \
yylloc->last_column += yyleng; yylloc.last_column += yyleng;
#ifdef ISPC_IS_WINDOWS #ifdef ISPC_IS_WINDOWS
inline int isatty(int) { return 0; } inline int isatty(int) { return 0; }
#endif // ISPC_IS_WINDOWS #endif // ISPC_IS_WINDOWS
static int allTokens[] = {
TOKEN_ASSERT, TOKEN_BOOL, TOKEN_BREAK, TOKEN_CASE, TOKEN_CBREAK,
TOKEN_CCONTINUE, TOKEN_CDO, TOKEN_CFOR, TOKEN_CIF, TOKEN_CWHILE,
TOKEN_CONST, TOKEN_CONTINUE, TOKEN_CRETURN, TOKEN_DEFAULT, TOKEN_DO,
TOKEN_DELETE, TOKEN_DOUBLE, TOKEN_ELSE, TOKEN_ENUM,
TOKEN_EXPORT, TOKEN_EXTERN, TOKEN_FALSE, TOKEN_FLOAT, TOKEN_FOR,
TOKEN_FOREACH, TOKEN_FOREACH_TILED, TOKEN_GOTO, TOKEN_IF, TOKEN_INLINE,
TOKEN_INT, TOKEN_INT8, TOKEN_INT16, TOKEN_INT, TOKEN_INT64, TOKEN_LAUNCH,
TOKEN_NEW, TOKEN_NULL, TOKEN_PRINT, TOKEN_RETURN, TOKEN_SOA, TOKEN_SIGNED,
TOKEN_SIZEOF, TOKEN_STATIC, TOKEN_STRUCT, TOKEN_SWITCH, TOKEN_SYNC,
TOKEN_TASK, TOKEN_TRUE, TOKEN_TYPEDEF, TOKEN_UNIFORM, TOKEN_UNSIGNED,
TOKEN_VARYING, TOKEN_VOID, TOKEN_WHILE, TOKEN_STRING_C_LITERAL,
TOKEN_DOTDOTDOT,
TOKEN_FLOAT_CONSTANT,
TOKEN_INT32_CONSTANT, TOKEN_UINT32_CONSTANT,
TOKEN_INT64_CONSTANT, TOKEN_UINT64_CONSTANT,
TOKEN_INC_OP, TOKEN_DEC_OP, TOKEN_LEFT_OP, TOKEN_RIGHT_OP, TOKEN_LE_OP,
TOKEN_GE_OP, TOKEN_EQ_OP, TOKEN_NE_OP, TOKEN_AND_OP, TOKEN_OR_OP,
TOKEN_MUL_ASSIGN, TOKEN_DIV_ASSIGN, TOKEN_MOD_ASSIGN, TOKEN_ADD_ASSIGN,
TOKEN_SUB_ASSIGN, TOKEN_LEFT_ASSIGN, TOKEN_RIGHT_ASSIGN, TOKEN_AND_ASSIGN,
TOKEN_XOR_ASSIGN, TOKEN_OR_ASSIGN, TOKEN_PTR_OP,
';', '{', '}', ',', ':', '=', '(', ')', '[', ']', '.', '&', '!', '~', '-',
'+', '*', '/', '%', '<', '>', '^', '|', '?',
};
std::map<int, std::string> tokenToName;
std::map<std::string, std::string> tokenNameRemap;
void ParserInit() {
tokenToName[TOKEN_ASSERT] = "assert";
tokenToName[TOKEN_BOOL] = "bool";
tokenToName[TOKEN_BREAK] = "break";
tokenToName[TOKEN_CASE] = "case";
tokenToName[TOKEN_CBREAK] = "cbreak";
tokenToName[TOKEN_CCONTINUE] = "ccontinue";
tokenToName[TOKEN_CDO] = "cdo";
tokenToName[TOKEN_CFOR] = "cfor";
tokenToName[TOKEN_CIF] = "cif";
tokenToName[TOKEN_CWHILE] = "cwhile";
tokenToName[TOKEN_CONST] = "const";
tokenToName[TOKEN_CONTINUE] = "continue";
tokenToName[TOKEN_CRETURN] = "creturn";
tokenToName[TOKEN_DEFAULT] = "default";
tokenToName[TOKEN_DO] = "do";
tokenToName[TOKEN_DELETE] = "delete";
tokenToName[TOKEN_DOUBLE] = "double";
tokenToName[TOKEN_ELSE] = "else";
tokenToName[TOKEN_ENUM] = "enum";
tokenToName[TOKEN_EXPORT] = "export";
tokenToName[TOKEN_EXTERN] = "extern";
tokenToName[TOKEN_FALSE] = "false";
tokenToName[TOKEN_FLOAT] = "float";
tokenToName[TOKEN_FOR] = "for";
tokenToName[TOKEN_FOREACH] = "foreach";
tokenToName[TOKEN_FOREACH_TILED] = "foreach_tiled";
tokenToName[TOKEN_GOTO] = "goto";
tokenToName[TOKEN_IF] = "if";
tokenToName[TOKEN_INLINE] = "inline";
tokenToName[TOKEN_INT] = "int";
tokenToName[TOKEN_INT8] = "int8";
tokenToName[TOKEN_INT16] = "int16";
tokenToName[TOKEN_INT] = "int";
tokenToName[TOKEN_INT64] = "int64";
tokenToName[TOKEN_LAUNCH] = "launch";
tokenToName[TOKEN_NEW] = "new";
tokenToName[TOKEN_NULL] = "NULL";
tokenToName[TOKEN_PRINT] = "print";
tokenToName[TOKEN_RETURN] = "return";
tokenToName[TOKEN_SOA] = "soa";
tokenToName[TOKEN_SIGNED] = "signed";
tokenToName[TOKEN_SIZEOF] = "sizeof";
tokenToName[TOKEN_STATIC] = "static";
tokenToName[TOKEN_STRUCT] = "struct";
tokenToName[TOKEN_SWITCH] = "switch";
tokenToName[TOKEN_SYNC] = "sync";
tokenToName[TOKEN_TASK] = "task";
tokenToName[TOKEN_TRUE] = "true";
tokenToName[TOKEN_TYPEDEF] = "typedef";
tokenToName[TOKEN_UNIFORM] = "uniform";
tokenToName[TOKEN_UNSIGNED] = "unsigned";
tokenToName[TOKEN_VARYING] = "varying";
tokenToName[TOKEN_VOID] = "void";
tokenToName[TOKEN_WHILE] = "while";
tokenToName[TOKEN_STRING_C_LITERAL] = "\"C\"";
tokenToName[TOKEN_DOTDOTDOT] = "...";
tokenToName[TOKEN_FLOAT_CONSTANT] = "TOKEN_FLOAT_CONSTANT";
tokenToName[TOKEN_INT32_CONSTANT] = "TOKEN_INT32_CONSTANT";
tokenToName[TOKEN_UINT32_CONSTANT] = "TOKEN_UINT32_CONSTANT";
tokenToName[TOKEN_INT64_CONSTANT] = "TOKEN_INT64_CONSTANT";
tokenToName[TOKEN_UINT64_CONSTANT] = "TOKEN_UINT64_CONSTANT";
tokenToName[TOKEN_INC_OP] = "++";
tokenToName[TOKEN_DEC_OP] = "--";
tokenToName[TOKEN_LEFT_OP] = "<<";
tokenToName[TOKEN_RIGHT_OP] = ">>";
tokenToName[TOKEN_LE_OP] = "<=";
tokenToName[TOKEN_GE_OP] = ">=";
tokenToName[TOKEN_EQ_OP] = "==";
tokenToName[TOKEN_NE_OP] = "!=";
tokenToName[TOKEN_AND_OP] = "&&";
tokenToName[TOKEN_OR_OP] = "||";
tokenToName[TOKEN_MUL_ASSIGN] = "*=";
tokenToName[TOKEN_DIV_ASSIGN] = "/=";
tokenToName[TOKEN_MOD_ASSIGN] = "%=";
tokenToName[TOKEN_ADD_ASSIGN] = "+=";
tokenToName[TOKEN_SUB_ASSIGN] = "-=";
tokenToName[TOKEN_LEFT_ASSIGN] = "<<=";
tokenToName[TOKEN_RIGHT_ASSIGN] = ">>=";
tokenToName[TOKEN_AND_ASSIGN] = "&=";
tokenToName[TOKEN_XOR_ASSIGN] = "^=";
tokenToName[TOKEN_OR_ASSIGN] = "|=";
tokenToName[TOKEN_PTR_OP] = "->";
tokenToName[';'] = ";";
tokenToName['{'] = "{";
tokenToName['}'] = "}";
tokenToName[','] = ",";
tokenToName[':'] = ":";
tokenToName['='] = "=";
tokenToName['('] = "(";
tokenToName[')'] = ")";
tokenToName['['] = "[";
tokenToName[']'] = "]";
tokenToName['.'] = ".";
tokenToName['&'] = "&";
tokenToName['!'] = "!";
tokenToName['~'] = "~";
tokenToName['-'] = "-";
tokenToName['+'] = "+";
tokenToName['*'] = "*";
tokenToName['/'] = "/";
tokenToName['%'] = "%";
tokenToName['<'] = "<";
tokenToName['>'] = ">";
tokenToName['^'] = "^";
tokenToName['|'] = "|";
tokenToName['?'] = "?";
tokenToName[';'] = ";";
tokenNameRemap["TOKEN_ASSERT"] = "\'assert\'";
tokenNameRemap["TOKEN_BOOL"] = "\'bool\'";
tokenNameRemap["TOKEN_BREAK"] = "\'break\'";
tokenNameRemap["TOKEN_CASE"] = "\'case\'";
tokenNameRemap["TOKEN_CBREAK"] = "\'cbreak\'";
tokenNameRemap["TOKEN_CCONTINUE"] = "\'ccontinue\'";
tokenNameRemap["TOKEN_CDO"] = "\'cdo\'";
tokenNameRemap["TOKEN_CFOR"] = "\'cfor\'";
tokenNameRemap["TOKEN_CIF"] = "\'cif\'";
tokenNameRemap["TOKEN_CWHILE"] = "\'cwhile\'";
tokenNameRemap["TOKEN_CONST"] = "\'const\'";
tokenNameRemap["TOKEN_CONTINUE"] = "\'continue\'";
tokenNameRemap["TOKEN_CRETURN"] = "\'creturn\'";
tokenNameRemap["TOKEN_DEFAULT"] = "\'default\'";
tokenNameRemap["TOKEN_DO"] = "\'do\'";
tokenNameRemap["TOKEN_DELETE"] = "\'delete\'";
tokenNameRemap["TOKEN_DOUBLE"] = "\'double\'";
tokenNameRemap["TOKEN_ELSE"] = "\'else\'";
tokenNameRemap["TOKEN_ENUM"] = "\'enum\'";
tokenNameRemap["TOKEN_EXPORT"] = "\'export\'";
tokenNameRemap["TOKEN_EXTERN"] = "\'extern\'";
tokenNameRemap["TOKEN_FALSE"] = "\'false\'";
tokenNameRemap["TOKEN_FLOAT"] = "\'float\'";
tokenNameRemap["TOKEN_FOR"] = "\'for\'";
tokenNameRemap["TOKEN_FOREACH"] = "\'foreach\'";
tokenNameRemap["TOKEN_FOREACH_TILED"] = "\'foreach_tiled\'";
tokenNameRemap["TOKEN_GOTO"] = "\'goto\'";
tokenNameRemap["TOKEN_IDENTIFIER"] = "identifier";
tokenNameRemap["TOKEN_IF"] = "\'if\'";
tokenNameRemap["TOKEN_INLINE"] = "\'inline\'";
tokenNameRemap["TOKEN_INT"] = "\'int\'";
tokenNameRemap["TOKEN_INT8"] = "\'int8\'";
tokenNameRemap["TOKEN_INT16"] = "\'int16\'";
tokenNameRemap["TOKEN_INT"] = "\'int\'";
tokenNameRemap["TOKEN_INT64"] = "\'int64\'";
tokenNameRemap["TOKEN_LAUNCH"] = "\'launch\'";
tokenNameRemap["TOKEN_NEW"] = "\'new\'";
tokenNameRemap["TOKEN_NULL"] = "\'NULL\'";
tokenNameRemap["TOKEN_PRINT"] = "\'print\'";
tokenNameRemap["TOKEN_RETURN"] = "\'return\'";
tokenNameRemap["TOKEN_SOA"] = "\'soa\'";
tokenNameRemap["TOKEN_SIGNED"] = "\'signed\'";
tokenNameRemap["TOKEN_SIZEOF"] = "\'sizeof\'";
tokenNameRemap["TOKEN_STATIC"] = "\'static\'";
tokenNameRemap["TOKEN_STRUCT"] = "\'struct\'";
tokenNameRemap["TOKEN_SWITCH"] = "\'switch\'";
tokenNameRemap["TOKEN_SYNC"] = "\'sync\'";
tokenNameRemap["TOKEN_TASK"] = "\'task\'";
tokenNameRemap["TOKEN_TRUE"] = "\'true\'";
tokenNameRemap["TOKEN_TYPEDEF"] = "\'typedef\'";
tokenNameRemap["TOKEN_UNIFORM"] = "\'uniform\'";
tokenNameRemap["TOKEN_UNSIGNED"] = "\'unsigned\'";
tokenNameRemap["TOKEN_VARYING"] = "\'varying\'";
tokenNameRemap["TOKEN_VOID"] = "\'void\'";
tokenNameRemap["TOKEN_WHILE"] = "\'while\'";
tokenNameRemap["TOKEN_STRING_C_LITERAL"] = "\"C\"";
tokenNameRemap["TOKEN_DOTDOTDOT"] = "\'...\'";
tokenNameRemap["TOKEN_FLOAT_CONSTANT"] = "float constant";
tokenNameRemap["TOKEN_INT32_CONSTANT"] = "int32 constant";
tokenNameRemap["TOKEN_UINT32_CONSTANT"] = "unsigned int32 constant";
tokenNameRemap["TOKEN_INT64_CONSTANT"] = "int64 constant";
tokenNameRemap["TOKEN_UINT64_CONSTANT"] = "unsigned int64 constant";
tokenNameRemap["TOKEN_INC_OP"] = "\'++\'";
tokenNameRemap["TOKEN_DEC_OP"] = "\'--\'";
tokenNameRemap["TOKEN_LEFT_OP"] = "\'<<\'";
tokenNameRemap["TOKEN_RIGHT_OP"] = "\'>>\'";
tokenNameRemap["TOKEN_LE_OP"] = "\'<=\'";
tokenNameRemap["TOKEN_GE_OP"] = "\'>=\'";
tokenNameRemap["TOKEN_EQ_OP"] = "\'==\'";
tokenNameRemap["TOKEN_NE_OP"] = "\'!=\'";
tokenNameRemap["TOKEN_AND_OP"] = "\'&&\'";
tokenNameRemap["TOKEN_OR_OP"] = "\'||\'";
tokenNameRemap["TOKEN_MUL_ASSIGN"] = "\'*=\'";
tokenNameRemap["TOKEN_DIV_ASSIGN"] = "\'/=\'";
tokenNameRemap["TOKEN_MOD_ASSIGN"] = "\'%=\'";
tokenNameRemap["TOKEN_ADD_ASSIGN"] = "\'+=\'";
tokenNameRemap["TOKEN_SUB_ASSIGN"] = "\'-=\'";
tokenNameRemap["TOKEN_LEFT_ASSIGN"] = "\'<<=\'";
tokenNameRemap["TOKEN_RIGHT_ASSIGN"] = "\'>>=\'";
tokenNameRemap["TOKEN_AND_ASSIGN"] = "\'&=\'";
tokenNameRemap["TOKEN_XOR_ASSIGN"] = "\'^=\'";
tokenNameRemap["TOKEN_OR_ASSIGN"] = "\'|=\'";
tokenNameRemap["TOKEN_PTR_OP"] = "\'->\'";
tokenNameRemap["$end"] = "end of file";
}
inline int ispcRand() {
#ifdef ISPC_IS_WINDOWS
return rand();
#else
return lrand48();
#endif
}
#define RT \
if (g->enableFuzzTest) { \
int r = ispcRand() % 40; \
if (r == 0) { \
Warning(yylloc, "Fuzz test dropping token"); \
} \
else if (r == 1) { \
Assert (tokenToName.size() > 0); \
int nt = sizeof(allTokens) / sizeof(allTokens[0]); \
int tn = ispcRand() % nt; \
yylval.stringVal = new std::string(yytext); /* just in case */\
Warning(yylloc, "Fuzz test replaced token with \"%s\"", tokenToName[allTokens[tn]].c_str()); \
return allTokens[tn]; \
} \
else if (r == 2) { \
Symbol *sym = m->symbolTable->RandomSymbol(); \
if (sym != NULL) { \
yylval.stringVal = new std::string(sym->name); \
Warning(yylloc, "Fuzz test replaced with identifier \"%s\".", sym->name.c_str()); \
return TOKEN_IDENTIFIER; \
} \
} \
/* TOKEN_TYPE_NAME */ \
} else /* swallow semicolon */
%} %}
%option nounput %option nounput
%option noyywrap %option noyywrap
%option bison-bridge
%option bison-locations
%option nounistd %option nounistd
WHITESPACE [ \t\r]+ WHITESPACE [ \t\r]+
@@ -75,73 +330,77 @@ IDENT [a-zA-Z_][a-zA-Z_0-9]*
ZO_SWIZZLE ([01]+[w-z]+)+|([01]+[rgba]+)+|([01]+[uv]+)+ ZO_SWIZZLE ([01]+[w-z]+)+|([01]+[rgba]+)+|([01]+[uv]+)+
%% %%
"/*" { lCComment(yylloc); } "/*" { lCComment(&yylloc); }
"//" { lCppComment(yylloc); } "//" { lCppComment(&yylloc); }
__assert { return TOKEN_ASSERT; } __assert { RT; return TOKEN_ASSERT; }
bool { return TOKEN_BOOL; } bool { RT; return TOKEN_BOOL; }
break { return TOKEN_BREAK; } break { RT; return TOKEN_BREAK; }
case { return TOKEN_CASE; } case { RT; return TOKEN_CASE; }
cbreak { return TOKEN_CBREAK; } cbreak { RT; return TOKEN_CBREAK; }
ccontinue { return TOKEN_CCONTINUE; } ccontinue { RT; return TOKEN_CCONTINUE; }
cdo { return TOKEN_CDO; } cdo { RT; return TOKEN_CDO; }
cfor { return TOKEN_CFOR; } cfor { RT; return TOKEN_CFOR; }
cif { return TOKEN_CIF; } cif { RT; return TOKEN_CIF; }
cwhile { return TOKEN_CWHILE; } cwhile { RT; return TOKEN_CWHILE; }
const { return TOKEN_CONST; } const { RT; return TOKEN_CONST; }
continue { return TOKEN_CONTINUE; } continue { RT; return TOKEN_CONTINUE; }
creturn { return TOKEN_CRETURN; } creturn { RT; return TOKEN_CRETURN; }
default { return TOKEN_DEFAULT; } default { RT; return TOKEN_DEFAULT; }
do { return TOKEN_DO; } do { RT; return TOKEN_DO; }
double { return TOKEN_DOUBLE; } delete { RT; return TOKEN_DELETE; }
else { return TOKEN_ELSE; } delete\[\] { RT; return TOKEN_DELETE; }
enum { return TOKEN_ENUM; } double { RT; return TOKEN_DOUBLE; }
export { return TOKEN_EXPORT; } else { RT; return TOKEN_ELSE; }
extern { return TOKEN_EXTERN; } enum { RT; return TOKEN_ENUM; }
false { return TOKEN_FALSE; } export { RT; return TOKEN_EXPORT; }
float { return TOKEN_FLOAT; } extern { RT; return TOKEN_EXTERN; }
for { return TOKEN_FOR; } false { RT; return TOKEN_FALSE; }
foreach { return TOKEN_FOREACH; } float { RT; return TOKEN_FLOAT; }
foreach_tiled { return TOKEN_FOREACH_TILED; } for { RT; return TOKEN_FOR; }
goto { return TOKEN_GOTO; } foreach { RT; return TOKEN_FOREACH; }
if { return TOKEN_IF; } foreach_tiled { RT; return TOKEN_FOREACH_TILED; }
inline { return TOKEN_INLINE; } goto { RT; return TOKEN_GOTO; }
int { return TOKEN_INT; } if { RT; return TOKEN_IF; }
int8 { return TOKEN_INT8; } inline { RT; return TOKEN_INLINE; }
int16 { return TOKEN_INT16; } int { RT; return TOKEN_INT; }
int32 { return TOKEN_INT; } int8 { RT; return TOKEN_INT8; }
int64 { return TOKEN_INT64; } int16 { RT; return TOKEN_INT16; }
launch { return TOKEN_LAUNCH; } int32 { RT; return TOKEN_INT; }
NULL { return TOKEN_NULL; } int64 { RT; return TOKEN_INT64; }
print { return TOKEN_PRINT; } launch { RT; return TOKEN_LAUNCH; }
reference { Error(*yylloc, "\"reference\" qualifier is no longer supported; " new { RT; return TOKEN_NEW; }
"please use C++-style '&' syntax for references " NULL { RT; return TOKEN_NULL; }
"instead."); } print { RT; return TOKEN_PRINT; }
return { return TOKEN_RETURN; } reference { Error(yylloc, "\"reference\" qualifier is no longer supported; "
soa { return TOKEN_SOA; } "please use C++-style '&' syntax for references "
signed { return TOKEN_SIGNED; } "instead."); }
sizeof { return TOKEN_SIZEOF; } return { RT; return TOKEN_RETURN; }
static { return TOKEN_STATIC; } soa { RT; return TOKEN_SOA; }
struct { return TOKEN_STRUCT; } signed { RT; return TOKEN_SIGNED; }
switch { return TOKEN_SWITCH; } sizeof { RT; return TOKEN_SIZEOF; }
sync { return TOKEN_SYNC; } static { RT; return TOKEN_STATIC; }
task { return TOKEN_TASK; } struct { RT; return TOKEN_STRUCT; }
true { return TOKEN_TRUE; } switch { RT; return TOKEN_SWITCH; }
typedef { return TOKEN_TYPEDEF; } sync { RT; return TOKEN_SYNC; }
uniform { return TOKEN_UNIFORM; } task { RT; return TOKEN_TASK; }
unsigned { return TOKEN_UNSIGNED; } true { RT; return TOKEN_TRUE; }
varying { return TOKEN_VARYING; } typedef { RT; return TOKEN_TYPEDEF; }
void { return TOKEN_VOID; } uniform { RT; return TOKEN_UNIFORM; }
while { return TOKEN_WHILE; } unsigned { RT; return TOKEN_UNSIGNED; }
\"C\" { return TOKEN_STRING_C_LITERAL; } varying { RT; return TOKEN_VARYING; }
\.\.\. { return TOKEN_DOTDOTDOT; } void { RT; return TOKEN_VOID; }
while { RT; return TOKEN_WHILE; }
\"C\" { RT; return TOKEN_STRING_C_LITERAL; }
\.\.\. { RT; return TOKEN_DOTDOTDOT; }
L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL; } L?\"(\\.|[^\\"])*\" { lStringConst(&yylval, &yylloc); return TOKEN_STRING_LITERAL; }
{IDENT} { {IDENT} {
RT;
/* We have an identifier--is it a type name or an identifier? /* We have an identifier--is it a type name or an identifier?
The symbol table will straighten us out... */ The symbol table will straighten us out... */
yylval->stringVal = new std::string(yytext); yylval.stringVal = new std::string(yytext);
if (m->symbolTable->LookupType(yytext) != NULL) if (m->symbolTable->LookupType(yytext) != NULL)
return TOKEN_TYPE_NAME; return TOKEN_TYPE_NAME;
else else
@@ -149,18 +408,19 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
} }
{INT_NUMBER}+(u|U|l|L)*? { {INT_NUMBER}+(u|U|l|L)*? {
RT;
int ls = 0, us = 0; int ls = 0, us = 0;
char *endPtr = NULL; char *endPtr = NULL;
if (yytext[0] == '0' && yytext[1] == 'b') if (yytext[0] == '0' && yytext[1] == 'b')
yylval->intVal = lParseBinary(yytext+2, *yylloc, &endPtr); yylval.intVal = lParseBinary(yytext+2, yylloc, &endPtr);
else { else {
#if defined(ISPC_IS_WINDOWS) && !defined(__MINGW32__) #if defined(ISPC_IS_WINDOWS) && !defined(__MINGW32__)
yylval->intVal = _strtoi64(yytext, &endPtr, 0); yylval.intVal = _strtoui64(yytext, &endPtr, 0);
#else #else
// FIXME: should use strtouq and then issue an error if we can't // FIXME: should use strtouq and then issue an error if we can't
// fit into 64 bits... // fit into 64 bits...
yylval->intVal = strtoull(yytext, &endPtr, 0); yylval.intVal = strtoull(yytext, &endPtr, 0);
#endif #endif
} }
@@ -178,11 +438,11 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
us++; us++;
} }
if (kilo) if (kilo)
yylval->intVal *= 1024; yylval.intVal *= 1024;
if (mega) if (mega)
yylval->intVal *= 1024*1024; yylval.intVal *= 1024*1024;
if (giga) if (giga)
yylval->intVal *= 1024*1024*1024; yylval.intVal *= 1024*1024*1024;
if (ls >= 2) if (ls >= 2)
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT; return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
@@ -190,7 +450,7 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT; return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
// See if we can fit this into a 32-bit integer... // See if we can fit this into a 32-bit integer...
if ((yylval->intVal & 0xffffffff) == yylval->intVal) if ((yylval.intVal & 0xffffffff) == yylval.intVal)
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT; return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
else else
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT; return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
@@ -198,74 +458,76 @@ L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL;
{FLOAT_NUMBER} { {FLOAT_NUMBER} {
yylval->floatVal = (float)atof(yytext); RT;
yylval.floatVal = (float)atof(yytext);
return TOKEN_FLOAT_CONSTANT; return TOKEN_FLOAT_CONSTANT;
} }
{HEX_FLOAT_NUMBER} { {HEX_FLOAT_NUMBER} {
yylval->floatVal = (float)lParseHexFloat(yytext); RT;
yylval.floatVal = (float)lParseHexFloat(yytext);
return TOKEN_FLOAT_CONSTANT; return TOKEN_FLOAT_CONSTANT;
} }
"++" { return TOKEN_INC_OP; } "++" { RT; return TOKEN_INC_OP; }
"--" { return TOKEN_DEC_OP; } "--" { RT; return TOKEN_DEC_OP; }
"<<" { return TOKEN_LEFT_OP; } "<<" { RT; return TOKEN_LEFT_OP; }
">>" { return TOKEN_RIGHT_OP; } ">>" { RT; return TOKEN_RIGHT_OP; }
"<=" { return TOKEN_LE_OP; } "<=" { RT; return TOKEN_LE_OP; }
">=" { return TOKEN_GE_OP; } ">=" { RT; return TOKEN_GE_OP; }
"==" { return TOKEN_EQ_OP; } "==" { RT; return TOKEN_EQ_OP; }
"!=" { return TOKEN_NE_OP; } "!=" { RT; return TOKEN_NE_OP; }
"&&" { return TOKEN_AND_OP; } "&&" { RT; return TOKEN_AND_OP; }
"||" { return TOKEN_OR_OP; } "||" { RT; return TOKEN_OR_OP; }
"*=" { return TOKEN_MUL_ASSIGN; } "*=" { RT; return TOKEN_MUL_ASSIGN; }
"/=" { return TOKEN_DIV_ASSIGN; } "/=" { RT; return TOKEN_DIV_ASSIGN; }
"%=" { return TOKEN_MOD_ASSIGN; } "%=" { RT; return TOKEN_MOD_ASSIGN; }
"+=" { return TOKEN_ADD_ASSIGN; } "+=" { RT; return TOKEN_ADD_ASSIGN; }
"-=" { return TOKEN_SUB_ASSIGN; } "-=" { RT; return TOKEN_SUB_ASSIGN; }
"<<=" { return TOKEN_LEFT_ASSIGN; } "<<=" { RT; return TOKEN_LEFT_ASSIGN; }
">>=" { return TOKEN_RIGHT_ASSIGN; } ">>=" { RT; return TOKEN_RIGHT_ASSIGN; }
"&=" { return TOKEN_AND_ASSIGN; } "&=" { RT; return TOKEN_AND_ASSIGN; }
"^=" { return TOKEN_XOR_ASSIGN; } "^=" { RT; return TOKEN_XOR_ASSIGN; }
"|=" { return TOKEN_OR_ASSIGN; } "|=" { RT; return TOKEN_OR_ASSIGN; }
"->" { return TOKEN_PTR_OP; } "->" { RT; return TOKEN_PTR_OP; }
";" { return ';'; } ";" { RT; return ';'; }
("{"|"<%") { return '{'; } ("{"|"<%") { RT; return '{'; }
("}"|"%>") { return '}'; } ("}"|"%>") { RT; return '}'; }
"," { return ','; } "," { RT; return ','; }
":" { return ':'; } ":" { RT; return ':'; }
"=" { return '='; } "=" { RT; return '='; }
"(" { return '('; } "(" { RT; return '('; }
")" { return ')'; } ")" { RT; return ')'; }
("["|"<:") { return '['; } ("["|"<:") { RT; return '['; }
("]"|":>") { return ']'; } ("]"|":>") { RT; return ']'; }
"." { return '.'; } "." { RT; return '.'; }
"&" { return '&'; } "&" { RT; return '&'; }
"!" { return '!'; } "!" { RT; return '!'; }
"~" { return '~'; } "~" { RT; return '~'; }
"-" { return '-'; } "-" { RT; return '-'; }
"+" { return '+'; } "+" { RT; return '+'; }
"*" { return '*'; } "*" { RT; return '*'; }
"/" { return '/'; } "/" { RT; return '/'; }
"%" { return '%'; } "%" { RT; return '%'; }
"<" { return '<'; } "<" { RT; return '<'; }
">" { return '>'; } ">" { RT; return '>'; }
"^" { return '^'; } "^" { RT; return '^'; }
"|" { return '|'; } "|" { RT; return '|'; }
"?" { return '?'; } "?" { RT; return '?'; }
{WHITESPACE} { } {WHITESPACE} { }
\n { \n {
yylloc->last_line++; yylloc.last_line++;
yylloc->last_column = 1; yylloc.last_column = 1;
} }
#(line)?[ ][0-9]+[ ]\"(\\.|[^\\"])*\"[^\n]* { #(line)?[ ][0-9]+[ ]\"(\\.|[^\\"])*\"[^\n]* {
lHandleCppHash(yylloc); lHandleCppHash(&yylloc);
} }
. { . {
Error(*yylloc, "Illegal character: %c (0x%x)", yytext[0], int(yytext[0])); Error(yylloc, "Illegal character: %c (0x%x)", yytext[0], int(yytext[0]));
YY_USER_ACTION YY_USER_ACTION
} }
@@ -308,6 +570,8 @@ lCComment(SourcePos *pos) {
char c, prev = 0; char c, prev = 0;
while ((c = yyinput()) != 0) { while ((c = yyinput()) != 0) {
++pos->last_column;
if (c == '\n') { if (c == '\n') {
pos->last_line++; pos->last_line++;
pos->last_column = 1; pos->last_column = 1;

View File

@@ -597,6 +597,9 @@ LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
bool bool
LLVMVectorValuesAllEqual(llvm::Value *v, int vectorLength, LLVMVectorValuesAllEqual(llvm::Value *v, int vectorLength,
std::vector<llvm::PHINode *> &seenPhis) { std::vector<llvm::PHINode *> &seenPhis) {
if (vectorLength == 1)
return true;
if (llvm::isa<llvm::ConstantAggregateZero>(v)) if (llvm::isa<llvm::ConstantAggregateZero>(v))
return true; return true;
@@ -604,6 +607,12 @@ LLVMVectorValuesAllEqual(llvm::Value *v, int vectorLength,
if (cv != NULL) if (cv != NULL)
return (cv->getSplatValue() != NULL); return (cv->getSplatValue() != NULL);
#ifdef LLVM_3_1svn
llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(v);
if (cdv != NULL)
return (cdv->getSplatValue() != NULL);
#endif
llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(v); llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(v);
if (bop != NULL) if (bop != NULL)
return (LLVMVectorValuesAllEqual(bop->getOperand(0), vectorLength, return (LLVMVectorValuesAllEqual(bop->getOperand(0), vectorLength,
@@ -669,6 +678,10 @@ LLVMVectorValuesAllEqual(llvm::Value *v, int vectorLength,
return true; return true;
} }
if (llvm::isa<llvm::UndefValue>(v))
// ?
return false;
Assert(!llvm::isa<llvm::Constant>(v)); Assert(!llvm::isa<llvm::Constant>(v));
if (llvm::isa<llvm::CallInst>(v) || llvm::isa<llvm::LoadInst>(v) || if (llvm::isa<llvm::CallInst>(v) || llvm::isa<llvm::LoadInst>(v) ||

View File

@@ -41,6 +41,9 @@
#include "type.h" #include "type.h"
#include <stdio.h> #include <stdio.h>
#include <stdlib.h> #include <stdlib.h>
#ifdef ISPC_IS_WINDOWS
#include <time.h>
#endif // ISPC_IS_WINDOWS
#include <llvm/Support/PrettyStackTrace.h> #include <llvm/Support/PrettyStackTrace.h>
#include <llvm/Support/Signals.h> #include <llvm/Support/Signals.h>
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn) #if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
@@ -90,7 +93,6 @@ usage(int ret) {
printf(" [--cpu=<cpu>]\t\t\tSelect target CPU type\n"); printf(" [--cpu=<cpu>]\t\t\tSelect target CPU type\n");
printf(" <cpu>={%s}\n", Target::SupportedTargetCPUs()); printf(" <cpu>={%s}\n", Target::SupportedTargetCPUs());
printf(" [-D<foo>]\t\t\t\t#define given value when running preprocessor\n"); printf(" [-D<foo>]\t\t\t\t#define given value when running preprocessor\n");
printf(" [--debug]\t\t\t\tPrint information useful for debugging ispc\n");
printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n"); printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n");
#ifndef LLVM_2_9 #ifndef LLVM_2_9
printf(" [--emit-c++]\t\t\tEmit a C++ source file as output\n"); printf(" [--emit-c++]\t\t\tEmit a C++ source file as output\n");
@@ -99,7 +101,9 @@ usage(int ret) {
printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n"); printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n");
printf(" [-g]\t\t\t\tGenerate debugging information\n"); printf(" [-g]\t\t\t\tGenerate debugging information\n");
printf(" [--help]\t\t\t\tPrint help\n"); printf(" [--help]\t\t\t\tPrint help\n");
printf(" [--help-dev]\t\t\tPrint help for developer options\n");
printf(" [-h <name>/--header-outfile=<name>]\tOutput filename for header\n"); printf(" [-h <name>/--header-outfile=<name>]\tOutput filename for header\n");
printf(" [-I <path>]\t\t\t\tAdd <path> to #include file search path\n");
printf(" [--instrument]\t\t\tEmit instrumentation to gather performance data\n"); printf(" [--instrument]\t\t\tEmit instrumentation to gather performance data\n");
printf(" [--math-lib=<option>]\t\tSelect math library\n"); printf(" [--math-lib=<option>]\t\tSelect math library\n");
printf(" default\t\t\t\tUse ispc's built-in math functions\n"); printf(" default\t\t\t\tUse ispc's built-in math functions\n");
@@ -115,20 +119,10 @@ usage(int ret) {
printf(" disable-loop-unroll\t\tDisable loop unrolling.\n"); printf(" disable-loop-unroll\t\tDisable loop unrolling.\n");
printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n"); printf(" fast-masked-vload\t\tFaster masked vector loads on SSE (may go past end of array)\n");
printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n"); printf(" fast-math\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
#if 0
printf(" disable-all-on-optimizations\n");
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
printf(" disable-blending-removal\t\tDisable eliminating blend at same scope\n");
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
printf(" disable-handle-pseudo-memory-ops\n");
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
#endif
#ifndef ISPC_IS_WINDOWS #ifndef ISPC_IS_WINDOWS
printf(" [--pic]\t\t\t\tGenerate position-independent code\n"); printf(" [--pic]\t\t\t\tGenerate position-independent code\n");
#endif // !ISPC_IS_WINDOWS #endif // !ISPC_IS_WINDOWS
printf(" [--quiet]\t\t\t\tSuppress all output\n");
printf(" [--target=<isa>]\t\t\tSelect target ISA. <isa>={%s}\n", Target::SupportedTargetISAs()); printf(" [--target=<isa>]\t\t\tSelect target ISA. <isa>={%s}\n", Target::SupportedTargetISAs());
printf(" [--version]\t\t\t\tPrint ispc version\n"); printf(" [--version]\t\t\t\tPrint ispc version\n");
printf(" [--werror]\t\t\t\tTreat warnings as errors\n"); printf(" [--werror]\t\t\t\tTreat warnings as errors\n");
@@ -139,11 +133,32 @@ usage(int ret) {
} }
static void
devUsage(int ret) {
lPrintVersion();
printf("\nusage (developer options): ispc\n");
printf(" [--debug]\t\t\t\tPrint information useful for debugging ispc\n");
printf(" [--fuzz-test]\t\t\tRandomly perturb program input to test error conditions\n");
printf(" [--fuzz-seed=<value>]\t\tSeed value for RNG for fuzz testing\n");
printf(" [--opt=<option>]\t\t\tSet optimization option\n");
printf(" disable-all-on-optimizations\n");
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
printf(" disable-blending-removal\t\tDisable eliminating blend at same scope\n");
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
printf(" disable-handle-pseudo-memory-ops\n");
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
printf(" [--yydebug]\t\t\tPrint debugging information during parsing\n");
exit(ret);
}
/** We take arguments from both the command line as well as from the /** We take arguments from both the command line as well as from the
ISPC_ARGS environment variable. This function returns a new set of ISPC_ARGS environment variable. This function returns a new set of
arguments representing the ones from those two sources merged together. arguments representing the ones from those two sources merged together.
*/ */
static void lGetAllArgs(int Argc, char *Argv[], int &argc, char *argv[128]) { static void lGetAllArgs(int Argc, char *Argv[], int &argc, char *argv[128]) {
// Copy over the command line arguments (passed in) // Copy over the command line arguments (passed in)
for (int i = 0; i < Argc; ++i) for (int i = 0; i < Argc; ++i)
@@ -227,6 +242,8 @@ int main(int Argc, char *Argv[]) {
for (int i = 1; i < argc; ++i) { for (int i = 1; i < argc; ++i) {
if (!strcmp(argv[i], "--help")) if (!strcmp(argv[i], "--help"))
usage(0); usage(0);
if (!strcmp(argv[i], "--help-dev"))
devUsage(0);
else if (!strncmp(argv[i], "-D", 2)) else if (!strncmp(argv[i], "-D", 2))
g->cppArgs.push_back(argv[i]); g->cppArgs.push_back(argv[i]);
else if (!strncmp(argv[i], "--addressing=", 13)) { else if (!strncmp(argv[i], "--addressing=", 13)) {
@@ -271,6 +288,19 @@ int main(int Argc, char *Argv[]) {
ot = Module::Bitcode; ot = Module::Bitcode;
else if (!strcmp(argv[i], "--emit-obj")) else if (!strcmp(argv[i], "--emit-obj"))
ot = Module::Object; ot = Module::Object;
else if (!strcmp(argv[i], "-I")) {
if (++i == argc) {
fprintf(stderr, "No path specified after -I option.\n");
usage(1);
}
g->includePath.push_back(argv[i]);
}
else if (!strncmp(argv[i], "-I", 2))
g->includePath.push_back(argv[i]+2);
else if (!strcmp(argv[i], "--fuzz-test"))
g->enableFuzzTest = true;
else if (!strncmp(argv[i], "--fuzz-seed=", 12))
g->fuzzTestSeed = atoi(argv[i] + 12);
else if (!strcmp(argv[i], "--target")) { else if (!strcmp(argv[i], "--target")) {
// FIXME: should remove this way of specifying the target... // FIXME: should remove this way of specifying the target...
if (++i == argc) { if (++i == argc) {
@@ -383,6 +413,12 @@ int main(int Argc, char *Argv[]) {
else if (!strcmp(argv[i], "--pic")) else if (!strcmp(argv[i], "--pic"))
generatePIC = true; generatePIC = true;
#endif // !ISPC_IS_WINDOWS #endif // !ISPC_IS_WINDOWS
else if (!strcmp(argv[i], "--quiet"))
g->quiet = true;
else if (!strcmp(argv[i], "--yydebug")) {
extern int yydebug;
yydebug = 1;
}
else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) { else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) {
lPrintVersion(); lPrintVersion();
return 0; return 0;
@@ -408,6 +444,24 @@ int main(int Argc, char *Argv[]) {
if (debugSet && !optSet) if (debugSet && !optSet)
g->opt.level = 0; g->opt.level = 0;
if (g->enableFuzzTest) {
if (g->fuzzTestSeed == -1) {
#ifdef ISPC_IS_WINDOWS
int seed = (unsigned)time(NULL);
#else
int seed = getpid();
#endif
g->fuzzTestSeed = seed;
Warning(SourcePos(), "Using seed %d for fuzz testing",
g->fuzzTestSeed);
}
#ifdef ISPC_IS_WINDOWS
srand(g->fuzzTestSeed);
#else
srand48(g->fuzzTestSeed);
#endif
}
if (outFileName == NULL && headerFileName == NULL) if (outFileName == NULL && headerFileName == NULL)
Warning(SourcePos(), "No output file or header file name specified. " Warning(SourcePos(), "No output file or header file name specified. "
"Program will be compiled and warnings/errors will " "Program will be compiled and warnings/errors will "

View File

@@ -161,6 +161,9 @@ Module::CompileFile() {
bool runPreprocessor = g->runCPP; bool runPreprocessor = g->runCPP;
extern void ParserInit();
ParserInit();
if (runPreprocessor) { if (runPreprocessor) {
if (filename != NULL) { if (filename != NULL) {
// Try to open the file first, since otherwise we crash in the // Try to open the file first, since otherwise we crash in the
@@ -227,13 +230,19 @@ Module::AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst) {
} }
if (symbolTable->LookupFunction(sym->name.c_str())) { if (symbolTable->LookupFunction(sym->name.c_str())) {
Error(sym->pos, "Global variable \"%s\" shadows previously-declared function.", Error(sym->pos, "Global variable \"%s\" shadows previously-declared "
sym->name.c_str()); "function.", sym->name.c_str());
return; return;
} }
if (sym->storageClass == SC_EXTERN_C) { if (sym->storageClass == SC_EXTERN_C) {
Error(sym->pos, "extern \"C\" qualifier can only be used for functions."); Error(sym->pos, "extern \"C\" qualifier can only be used for "
"functions.");
return;
}
if (sym->type == AtomicType::Void) {
Error(sym->pos, "\"void\" type global variable is illegal.");
return; return;
} }
@@ -1165,6 +1174,24 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre
inst.createSourceManager(inst.getFileManager()); inst.createSourceManager(inst.getFileManager());
inst.InitializeSourceManager(infilename); inst.InitializeSourceManager(infilename);
// Don't remove comments in the preprocessor, so that we can accurately
// track the source file position by handling them ourselves.
inst.getPreprocessorOutputOpts().ShowComments = 1;
clang::HeaderSearchOptions &headerOpts = inst.getHeaderSearchOpts();
headerOpts.UseBuiltinIncludes = 0;
#ifndef LLVM_2_9
headerOpts.UseStandardSystemIncludes = 0;
#endif // !LLVM_2_9
headerOpts.UseStandardCXXIncludes = 0;
if (g->debugPrint)
headerOpts.Verbose = 1;
for (int i = 0; i < (int)g->includePath.size(); ++i)
headerOpts.AddPath(g->includePath[i], clang::frontend::Angled,
true /* is user supplied */,
false /* not a framework */,
true /* ignore sys root */);
clang::PreprocessorOptions &opts = inst.getPreprocessorOpts(); clang::PreprocessorOptions &opts = inst.getPreprocessorOpts();
// Add defs for ISPC and PI // Add defs for ISPC and PI

187
opt.cpp
View File

@@ -184,7 +184,7 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[2]); llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[2]);
return llvm::CallInst::Create(func, newArgArray, name, insertBefore); return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
#else #else
return llvm::CallInst::Create(func, &newArgs[0], &newArgs[2], return llvm::CallInst::Create(func, &args[0], &args[2],
name, insertBefore); name, insertBefore);
#endif #endif
} }
@@ -199,7 +199,7 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[3]); llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[3]);
return llvm::CallInst::Create(func, newArgArray, name, insertBefore); return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
#else #else
return llvm::CallInst::Create(func, &newArgs[0], &newArgs[3], return llvm::CallInst::Create(func, &args[0], &args[3],
name, insertBefore); name, insertBefore);
#endif #endif
} }
@@ -215,7 +215,7 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[4]); llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[4]);
return llvm::CallInst::Create(func, newArgArray, name, insertBefore); return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
#else #else
return llvm::CallInst::Create(func, &newArgs[0], &newArgs[4], return llvm::CallInst::Create(func, &args[0], &args[4],
name, insertBefore); name, insertBefore);
#endif #endif
} }
@@ -230,7 +230,7 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[5]); llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[5]);
return llvm::CallInst::Create(func, newArgArray, name, insertBefore); return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
#else #else
return llvm::CallInst::Create(func, &newArgs[0], &newArgs[5], return llvm::CallInst::Create(func, &args[0], &args[5],
name, insertBefore); name, insertBefore);
#endif #endif
} }
@@ -245,7 +245,7 @@ lCallInst(llvm::Function *func, llvm::Value *arg0, llvm::Value *arg1,
llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[6]); llvm::ArrayRef<llvm::Value *> newArgArray(&args[0], &args[6]);
return llvm::CallInst::Create(func, newArgArray, name, insertBefore); return llvm::CallInst::Create(func, newArgArray, name, insertBefore);
#else #else
return llvm::CallInst::Create(func, &newArgs[0], &newArgs[6], return llvm::CallInst::Create(func, &args[0], &args[6],
name, insertBefore); name, insertBefore);
#endif #endif
} }
@@ -368,8 +368,10 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(CreateMaskedStoreOptPass()); optPM.add(CreateMaskedStoreOptPass());
optPM.add(CreateMaskedLoadOptPass()); optPM.add(CreateMaskedLoadOptPass());
} }
optPM.add(CreatePseudoMaskedStorePass()); if (g->opt.disableHandlePseudoMemoryOps == false)
if (!g->opt.disableGatherScatterOptimizations) optPM.add(CreatePseudoMaskedStorePass());
if (g->opt.disableGatherScatterOptimizations == false &&
g->opt.disableHandlePseudoMemoryOps == false)
optPM.add(CreateGSToLoadStorePass()); optPM.add(CreateGSToLoadStorePass());
if (g->opt.disableHandlePseudoMemoryOps == false) { if (g->opt.disableHandlePseudoMemoryOps == false) {
optPM.add(CreatePseudoMaskedStorePass()); optPM.add(CreatePseudoMaskedStorePass());
@@ -628,11 +630,20 @@ lGetMask(llvm::Value *factor) {
"known and all bits on". */ "known and all bits on". */
Assert(g->target.vectorWidth < 32); Assert(g->target.vectorWidth < 32);
#ifdef LLVM_3_1svn
llvm::ConstantDataVector *cv = llvm::dyn_cast<llvm::ConstantDataVector>(factor);
#else
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(factor); llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(factor);
#endif
if (cv) { if (cv) {
int mask = 0; int mask = 0;
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements; llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
#ifdef LLVM_3_1svn
for (int i = 0; i < (int)cv->getNumElements(); ++i)
elements.push_back(cv->getElementAsConstant(i));
#else
cv->getVectorElements(elements); cv->getVectorElements(elements);
#endif
for (unsigned int i = 0; i < elements.size(); ++i) { for (unsigned int i = 0; i < elements.size(); ++i) {
llvm::APInt intMaskValue; llvm::APInt intMaskValue;
@@ -1125,7 +1136,17 @@ lGetBasePtrAndOffsets(llvm::Value *ptrs, llvm::Value **offsets,
// Indexing into global arrays can lead to this form, with // Indexing into global arrays can lead to this form, with
// ConstantVectors.. // ConstantVectors..
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements; llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
#ifdef LLVM_3_1svn
for (int i = 0; i < (int)cv->getNumOperands(); ++i) {
llvm::Constant *c =
llvm::dyn_cast<llvm::Constant>(cv->getOperand(i));
if (c == NULL)
return NULL;
elements.push_back(c);
}
#else
cv->getVectorElements(elements); cv->getVectorElements(elements);
#endif
llvm::Constant *delta[ISPC_MAX_NVEC]; llvm::Constant *delta[ISPC_MAX_NVEC];
for (unsigned int i = 0; i < elements.size(); ++i) { for (unsigned int i = 0; i < elements.size(); ++i) {
@@ -1235,6 +1256,9 @@ lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset,
llvm::Value **variableOffset, llvm::Value **variableOffset,
llvm::Instruction *insertBefore) { llvm::Instruction *insertBefore) {
if (llvm::isa<llvm::ConstantVector>(vec) || if (llvm::isa<llvm::ConstantVector>(vec) ||
#ifdef LLVM_3_1svn
llvm::isa<llvm::ConstantDataVector>(vec) ||
#endif
llvm::isa<llvm::ConstantAggregateZero>(vec)) { llvm::isa<llvm::ConstantAggregateZero>(vec)) {
*constOffset = vec; *constOffset = vec;
*variableOffset = NULL; *variableOffset = NULL;
@@ -1353,7 +1377,12 @@ lExtractConstantOffset(llvm::Value *vec, llvm::Value **constOffset,
in *splat, if so). */ in *splat, if so). */
static bool static bool
lIs248Splat(llvm::Value *v, int *splat) { lIs248Splat(llvm::Value *v, int *splat) {
#ifdef LLVM_3_1svn
llvm::ConstantDataVector *cvec =
llvm::dyn_cast<llvm::ConstantDataVector>(v);
#else
llvm::ConstantVector *cvec = llvm::dyn_cast<llvm::ConstantVector>(v); llvm::ConstantVector *cvec = llvm::dyn_cast<llvm::ConstantVector>(v);
#endif
if (cvec == NULL) if (cvec == NULL)
return false; return false;
@@ -1460,6 +1489,9 @@ lExtractUniforms(llvm::Value **vec, llvm::Instruction *insertBefore) {
fprintf(stderr, "\n"); fprintf(stderr, "\n");
if (llvm::isa<llvm::ConstantVector>(*vec) || if (llvm::isa<llvm::ConstantVector>(*vec) ||
#ifdef LLVM_3_1svn
llvm::isa<llvm::ConstantDataVector>(*vec) ||
#endif
llvm::isa<llvm::ConstantAggregateZero>(*vec)) llvm::isa<llvm::ConstantAggregateZero>(*vec))
return NULL; return NULL;
@@ -1855,6 +1887,7 @@ MaskedStoreOptPass::runOnBasicBlock(llvm::BasicBlock &bb) {
goto restart; goto restart;
} }
} }
return modifiedAny; return modifiedAny;
} }
@@ -2092,6 +2125,7 @@ PseudoMaskedStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
modifiedAny = true; modifiedAny = true;
goto restart; goto restart;
} }
return modifiedAny; return modifiedAny;
} }
@@ -2139,11 +2173,22 @@ char GSToLoadStorePass::ID = 0;
elements. elements.
*/ */
static bool static bool
lVectorIsLinearConstantInts(llvm::ConstantVector *cv, int vectorLength, lVectorIsLinearConstantInts(
#ifdef LLVM_3_1svn
llvm::ConstantDataVector *cv,
#else
llvm::ConstantVector *cv,
#endif
int vectorLength,
int stride) { int stride) {
// Flatten the vector out into the elements array // Flatten the vector out into the elements array
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements; llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
#ifdef LLVM_3_1svn
for (int i = 0; i < (int)cv->getNumElements(); ++i)
elements.push_back(cv->getElementAsConstant(i));
#else
cv->getVectorElements(elements); cv->getVectorElements(elements);
#endif
Assert((int)elements.size() == vectorLength); Assert((int)elements.size() == vectorLength);
llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(elements[0]); llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(elements[0]);
@@ -2182,11 +2227,19 @@ lCheckMulForLinear(llvm::Value *op0, llvm::Value *op1, int vectorLength,
int stride, std::vector<llvm::PHINode *> &seenPhis) { int stride, std::vector<llvm::PHINode *> &seenPhis) {
// Is the first operand a constant integer value splatted across all of // Is the first operand a constant integer value splatted across all of
// the lanes? // the lanes?
#ifdef LLVM_3_1svn
llvm::ConstantDataVector *cv = llvm::dyn_cast<llvm::ConstantDataVector>(op0);
#else
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(op0); llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(op0);
#endif
if (cv == NULL) if (cv == NULL)
return false; return false;
llvm::ConstantInt *splat =
llvm::dyn_cast<llvm::ConstantInt>(cv->getSplatValue()); llvm::Constant *csplat = cv->getSplatValue();
if (csplat == NULL)
return false;
llvm::ConstantInt *splat = llvm::dyn_cast<llvm::ConstantInt>(csplat);
if (splat == NULL) if (splat == NULL)
return false; return false;
@@ -2214,7 +2267,11 @@ lVectorIsLinear(llvm::Value *v, int vectorLength, int stride,
std::vector<llvm::PHINode *> &seenPhis) { std::vector<llvm::PHINode *> &seenPhis) {
// First try the easy case: if the values are all just constant // First try the easy case: if the values are all just constant
// integers and have the expected stride between them, then we're done. // integers and have the expected stride between them, then we're done.
#ifdef LLVM_3_1svn
llvm::ConstantDataVector *cv = llvm::dyn_cast<llvm::ConstantDataVector>(v);
#else
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v); llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v);
#endif
if (cv != NULL) if (cv != NULL)
return lVectorIsLinearConstantInts(cv, vectorLength, stride); return lVectorIsLinearConstantInts(cv, vectorLength, stride);
@@ -2471,7 +2528,6 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
constOffsets, "varying+const_offsets", constOffsets, "varying+const_offsets",
callInst); callInst);
{
std::vector<llvm::PHINode *> seenPhis; std::vector<llvm::PHINode *> seenPhis;
if (LLVMVectorValuesAllEqual(fullOffsets, g->target.vectorWidth, seenPhis)) { if (LLVMVectorValuesAllEqual(fullOffsets, g->target.vectorWidth, seenPhis)) {
// If all the offsets are equal, then compute the single // If all the offsets are equal, then compute the single
@@ -2493,66 +2549,61 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
"load_braodcast"); "load_braodcast");
lCopyMetadata(newCall, callInst); lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall); llvm::ReplaceInstWithInst(callInst, newCall);
modifiedAny = true;
goto restart;
} }
else { else {
// A scatter with everyone going to the same location is // A scatter with everyone going to the same location is
// undefined. Issue a warning and arbitrarily let the // undefined (if there's more than one program instance in
// first guy win. // the gang). Issue a warning.
Warning(pos, "Undefined behavior: all program instances are " if (g->target.vectorWidth > 1)
"writing to the same location!"); Warning(pos, "Undefined behavior: all program instances are "
"writing to the same location!");
llvm::Value *first = // We could do something similar to the gather case, where
llvm::ExtractElementInst::Create(storeValue, LLVMInt32(0), "rvalue_first", // we arbitrarily write one of the values, but we need to
callInst); // a) check to be sure the mask isn't all off and b) pick
lCopyMetadata(first, callInst); // the value from an executing program instance in that
// case. We'll just let a bunch of the program instances
// do redundant writes, since this isn't important to make
// fast anyway...
}
}
else {
int step = gatherInfo ? gatherInfo->align : scatterInfo->align;
ptr = new llvm::BitCastInst(ptr, llvm::PointerType::get(first->getType(), 0), std::vector<llvm::PHINode *> seenPhis;
"ptr2rvalue_type", callInst); if (step > 0 && lVectorIsLinear(fullOffsets, g->target.vectorWidth,
step, seenPhis)) {
// We have a linear sequence of memory locations being accessed
// starting with the location given by the offset from
// offsetElements[0], with stride of 4 or 8 bytes (for 32 bit
// and 64 bit gather/scatters, respectively.)
llvm::Value *ptr = lComputeCommonPointer(base, fullOffsets, callInst);
lCopyMetadata(ptr, callInst); lCopyMetadata(ptr, callInst);
llvm::Instruction *sinst = new llvm::StoreInst(first, ptr, false, if (gatherInfo != NULL) {
scatterInfo->align); Debug(pos, "Transformed gather to unaligned vector load!");
lCopyMetadata(sinst, callInst); llvm::Instruction *newCall =
llvm::ReplaceInstWithInst(callInst, sinst); lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, "masked_load");
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
}
else {
Debug(pos, "Transformed scatter to unaligned vector store!");
ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast",
callInst);
llvm::Instruction *newCall =
lCallInst(scatterInfo->maskedStoreFunc, ptr, storeValue,
mask, "");
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
}
modifiedAny = true;
goto restart;
} }
modifiedAny = true;
goto restart;
}
}
int step = gatherInfo ? gatherInfo->align : scatterInfo->align;
std::vector<llvm::PHINode *> seenPhis;
if (step > 0 && lVectorIsLinear(fullOffsets, g->target.vectorWidth,
step, seenPhis)) {
// We have a linear sequence of memory locations being accessed
// starting with the location given by the offset from
// offsetElements[0], with stride of 4 or 8 bytes (for 32 bit
// and 64 bit gather/scatters, respectively.)
llvm::Value *ptr = lComputeCommonPointer(base, fullOffsets, callInst);
lCopyMetadata(ptr, callInst);
if (gatherInfo != NULL) {
Debug(pos, "Transformed gather to unaligned vector load!");
llvm::Instruction *newCall =
lCallInst(gatherInfo->loadMaskedFunc, ptr, mask, "masked_load");
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
}
else {
Debug(pos, "Transformed scatter to unaligned vector store!");
ptr = new llvm::BitCastInst(ptr, scatterInfo->vecPtrType, "ptrcast",
callInst);
llvm::Instruction *newCall =
lCallInst(scatterInfo->maskedStoreFunc, ptr, storeValue,
mask, "");
lCopyMetadata(newCall, callInst);
llvm::ReplaceInstWithInst(callInst, newCall);
}
modifiedAny = true;
goto restart;
} }
} }
@@ -2675,10 +2726,12 @@ PseudoGSToGSPass::runOnBasicBlock(llvm::BasicBlock &bb) {
Assert(ok); Assert(ok);
callInst->setCalledFunction(info->actualFunc); callInst->setCalledFunction(info->actualFunc);
if (info->isGather) if (g->target.vectorWidth > 1) {
PerformanceWarning(pos, "Gather required to compute value in expression."); if (info->isGather)
else PerformanceWarning(pos, "Gather required to compute value in expression.");
PerformanceWarning(pos, "Scatter required for storing value."); else
PerformanceWarning(pos, "Scatter required for storing value.");
}
modifiedAny = true; modifiedAny = true;
goto restart; goto restart;
} }

607
parse.yy
View File

@@ -37,10 +37,13 @@
/* one for 'if', one for 'cif' */ /* one for 'if', one for 'cif' */
%expect 2 %expect 2
%pure-parser %error-verbose
%code requires { %code requires {
#define yytnamerr lYYTNameErr
#define YYLTYPE SourcePos #define YYLTYPE SourcePos
# define YYLLOC_DEFAULT(Current, Rhs, N) \ # define YYLLOC_DEFAULT(Current, Rhs, N) \
@@ -87,11 +90,16 @@ struct ForeachDimension;
__FILE__, __LINE__); __FILE__, __LINE__);
union YYSTYPE; union YYSTYPE;
extern int yylex(YYSTYPE *, SourcePos *); extern int yylex();
extern char *yytext; extern char *yytext;
void yyerror(const char *s) { fprintf(stderr, "Parse error: %s\n", s); } void yyerror(const char *s);
static int lYYTNameErr(char *yyres, const char *yystr);
static void lSuggestBuiltinAlternates();
static void lSuggestParamListAlternates();
static void lAddDeclaration(DeclSpecs *ds, Declarator *decl); static void lAddDeclaration(DeclSpecs *ds, Declarator *decl);
static void lAddFunctionParams(Declarator *decl); static void lAddFunctionParams(Declarator *decl);
@@ -106,13 +114,14 @@ static void lFinalizeEnumeratorSymbols(std::vector<Symbol *> &enums,
const EnumType *enumType); const EnumType *enumType);
static const char *lBuiltinTokens[] = { static const char *lBuiltinTokens[] = {
"assert", "bool", "break", "case", "cbreak", "ccontinue", "cdo", "cfor", "assert", "bool", "break", "case", "cbreak", "ccontinue", "cdo",
"cif", "cwhile", "const", "continue", "creturn", "default", "do", "double", "cfor", "cif", "cwhile", "const", "continue", "creturn", "default",
"else", "enum", "export", "extern", "false", "float", "for", "foreach", "do", "delete", "double", "else", "enum", "export", "extern", "false",
"foreach_tiled", "goto", "if", "inline", "int", "int8", "int16", "float", "for", "foreach", "foreach_tiled", "goto", "if", "inline",
"int32", "int64", "launch", "NULL", "print", "return", "signed", "sizeof", "int", "int8", "int16", "int32", "int64", "launch", "new", "NULL",
"static", "struct", "switch", "sync", "task", "true", "typedef", "uniform", "print", "return", "signed", "sizeof", "static", "struct", "switch",
"unsigned", "varying", "void", "while", NULL "sync", "task", "true", "typedef", "uniform", "unsigned", "varying",
"void", "while", NULL
}; };
static const char *lParamListTokens[] = { static const char *lParamListTokens[] = {
@@ -170,7 +179,7 @@ struct ForeachDimension {
%token TOKEN_AND_OP TOKEN_OR_OP TOKEN_MUL_ASSIGN TOKEN_DIV_ASSIGN TOKEN_MOD_ASSIGN %token TOKEN_AND_OP TOKEN_OR_OP TOKEN_MUL_ASSIGN TOKEN_DIV_ASSIGN TOKEN_MOD_ASSIGN
%token TOKEN_ADD_ASSIGN TOKEN_SUB_ASSIGN TOKEN_LEFT_ASSIGN TOKEN_RIGHT_ASSIGN %token TOKEN_ADD_ASSIGN TOKEN_SUB_ASSIGN TOKEN_LEFT_ASSIGN TOKEN_RIGHT_ASSIGN
%token TOKEN_AND_ASSIGN TOKEN_OR_ASSIGN TOKEN_XOR_ASSIGN %token TOKEN_AND_ASSIGN TOKEN_OR_ASSIGN TOKEN_XOR_ASSIGN
%token TOKEN_SIZEOF %token TOKEN_SIZEOF TOKEN_NEW TOKEN_DELETE
%token TOKEN_EXTERN TOKEN_EXPORT TOKEN_STATIC TOKEN_INLINE TOKEN_TASK %token TOKEN_EXTERN TOKEN_EXPORT TOKEN_STATIC TOKEN_INLINE TOKEN_TASK
%token TOKEN_UNIFORM TOKEN_VARYING TOKEN_TYPEDEF TOKEN_SOA %token TOKEN_UNIFORM TOKEN_VARYING TOKEN_TYPEDEF TOKEN_SOA
@@ -189,7 +198,7 @@ struct ForeachDimension {
%type <expr> multiplicative_expression additive_expression shift_expression %type <expr> multiplicative_expression additive_expression shift_expression
%type <expr> relational_expression equality_expression and_expression %type <expr> relational_expression equality_expression and_expression
%type <expr> exclusive_or_expression inclusive_or_expression %type <expr> exclusive_or_expression inclusive_or_expression
%type <expr> logical_and_expression logical_or_expression %type <expr> logical_and_expression logical_or_expression new_expression
%type <expr> conditional_expression assignment_expression expression %type <expr> conditional_expression assignment_expression expression
%type <expr> initializer constant_expression for_test %type <expr> initializer constant_expression for_test
%type <exprList> argument_expression_list initializer_list %type <exprList> argument_expression_list initializer_list
@@ -197,7 +206,7 @@ struct ForeachDimension {
%type <stmt> statement labeled_statement compound_statement for_init_statement %type <stmt> statement labeled_statement compound_statement for_init_statement
%type <stmt> expression_statement selection_statement iteration_statement %type <stmt> expression_statement selection_statement iteration_statement
%type <stmt> jump_statement statement_list declaration_statement print_statement %type <stmt> jump_statement statement_list declaration_statement print_statement
%type <stmt> assert_statement sync_statement %type <stmt> assert_statement sync_statement delete_statement
%type <declaration> declaration parameter_declaration %type <declaration> declaration parameter_declaration
%type <declarators> init_declarator_list %type <declarators> init_declarator_list
@@ -215,7 +224,7 @@ struct ForeachDimension {
%type <enumType> enum_specifier %type <enumType> enum_specifier
%type <type> specifier_qualifier_list struct_or_union_specifier %type <type> specifier_qualifier_list struct_or_union_specifier
%type <type> type_specifier type_name %type <type> type_specifier type_name rate_qualified_new_type
%type <type> short_vec_specifier %type <type> short_vec_specifier
%type <atomicType> atomic_var_type_specifier %type <atomicType> atomic_var_type_specifier
@@ -225,7 +234,7 @@ struct ForeachDimension {
%type <stringVal> string_constant %type <stringVal> string_constant
%type <constCharPtr> struct_or_union_name enum_identifier goto_identifier %type <constCharPtr> struct_or_union_name enum_identifier goto_identifier
%type <intVal> int_constant soa_width_specifier %type <intVal> int_constant soa_width_specifier rate_qualified_new
%type <foreachDimension> foreach_dimension_specifier %type <foreachDimension> foreach_dimension_specifier
%type <foreachDimensionList> foreach_dimension_list %type <foreachDimensionList> foreach_dimension_list
@@ -284,6 +293,7 @@ primary_expression
/* | TOKEN_STRING_LITERAL /* | TOKEN_STRING_LITERAL
{ UNIMPLEMENTED }*/ { UNIMPLEMENTED }*/
| '(' expression ')' { $$ = $2; } | '(' expression ')' { $$ = $2; }
| '(' error ')' { $$ = NULL; }
; ;
launch_expression launch_expression
@@ -307,10 +317,14 @@ postfix_expression
: primary_expression : primary_expression
| postfix_expression '[' expression ']' | postfix_expression '[' expression ']'
{ $$ = new IndexExpr($1, $3, Union(@1,@4)); } { $$ = new IndexExpr($1, $3, Union(@1,@4)); }
| postfix_expression '[' error ']'
{ $$ = NULL; }
| postfix_expression '(' ')' | postfix_expression '(' ')'
{ $$ = new FunctionCallExpr($1, new ExprList(Union(@1,@2)), Union(@1,@3)); } { $$ = new FunctionCallExpr($1, new ExprList(Union(@1,@2)), Union(@1,@3)); }
| postfix_expression '(' argument_expression_list ')' | postfix_expression '(' argument_expression_list ')'
{ $$ = new FunctionCallExpr($1, $3, Union(@1,@4)); } { $$ = new FunctionCallExpr($1, $3, Union(@1,@4)); }
| postfix_expression '(' error ')'
{ $$ = NULL; }
| launch_expression | launch_expression
| postfix_expression '.' TOKEN_IDENTIFIER | postfix_expression '.' TOKEN_IDENTIFIER
{ $$ = MemberExpr::create($1, yytext, Union(@1,@3), @3, false); } { $$ = MemberExpr::create($1, yytext, Union(@1,@3), @3, false); }
@@ -327,7 +341,10 @@ argument_expression_list
| argument_expression_list ',' assignment_expression | argument_expression_list ',' assignment_expression
{ {
ExprList *argList = dynamic_cast<ExprList *>($1); ExprList *argList = dynamic_cast<ExprList *>($1);
Assert(argList != NULL); if (argList == NULL) {
Assert(m->errorCount > 0);
argList = new ExprList(@3);
}
argList->exprs.push_back($3); argList->exprs.push_back($3);
argList->pos = Union(argList->pos, @3); argList->pos = Union(argList->pos, @3);
$$ = argList; $$ = argList;
@@ -448,8 +465,36 @@ conditional_expression
{ $$ = new SelectExpr($1, $3, $5, Union(@1,@5)); } { $$ = new SelectExpr($1, $3, $5, Union(@1,@5)); }
; ;
assignment_expression rate_qualified_new
: TOKEN_NEW { $$ = 0; }
| TOKEN_UNIFORM TOKEN_NEW { $$ = TYPEQUAL_UNIFORM; }
| TOKEN_VARYING TOKEN_NEW { $$ = TYPEQUAL_VARYING; }
;
rate_qualified_new_type
: type_specifier { $$ = $1; }
| TOKEN_UNIFORM type_specifier { $$ = $2 ? $2->GetAsUniformType() : NULL; }
| TOKEN_VARYING type_specifier { $$ = $2 ? $2->GetAsVaryingType() : NULL; }
;
new_expression
: conditional_expression : conditional_expression
| rate_qualified_new rate_qualified_new_type
{
$$ = new NewExpr($1, $2, NULL, NULL, @1, Union(@1, @2));
}
| rate_qualified_new rate_qualified_new_type '(' initializer_list ')'
{
$$ = new NewExpr($1, $2, $4, NULL, @1, Union(@1, @2));
}
| rate_qualified_new rate_qualified_new_type '[' expression ']'
{
$$ = new NewExpr($1, $2, NULL, $4, @1, Union(@1, @4));
}
;
assignment_expression
: new_expression
| unary_expression '=' assignment_expression | unary_expression '=' assignment_expression
{ $$ = new AssignExpr(AssignExpr::Assign, $1, $3, Union(@1, @3)); } { $$ = new AssignExpr(AssignExpr::Assign, $1, $3, Union(@1, @3)); }
| unary_expression TOKEN_MUL_ASSIGN assignment_expression | unary_expression TOKEN_MUL_ASSIGN assignment_expression
@@ -487,9 +532,16 @@ constant_expression
declaration_statement declaration_statement
: declaration : declaration
{ {
if ($1->declSpecs->storageClass == SC_TYPEDEF) { if ($1 == NULL) {
Assert(m->errorCount > 0);
$$ = NULL;
}
else if ($1->declSpecs->storageClass == SC_TYPEDEF) {
for (unsigned int i = 0; i < $1->declarators.size(); ++i) { for (unsigned int i = 0; i < $1->declarators.size(); ++i) {
m->AddTypeDef($1->declarators[i]->GetSymbol()); if ($1->declarators[i] == NULL)
Assert(m->errorCount > 0);
else
m->AddTypeDef($1->declarators[i]->GetSymbol());
} }
$$ = NULL; $$ = NULL;
} }
@@ -590,15 +642,20 @@ init_declarator_list
: init_declarator : init_declarator
{ {
std::vector<Declarator *> *dl = new std::vector<Declarator *>; std::vector<Declarator *> *dl = new std::vector<Declarator *>;
dl->push_back($1); if ($1 != NULL)
dl->push_back($1);
$$ = dl; $$ = dl;
} }
| init_declarator_list ',' init_declarator | init_declarator_list ',' init_declarator
{ {
std::vector<Declarator *> *dl = (std::vector<Declarator *> *)$1; std::vector<Declarator *> *dl = (std::vector<Declarator *> *)$1;
if (dl != NULL && $3 != NULL) if (dl == NULL) {
Assert(m->errorCount > 0);
dl = new std::vector<Declarator *>;
}
if ($3 != NULL)
dl->push_back($3); dl->push_back($3);
$$ = $1; $$ = dl;
} }
; ;
@@ -623,10 +680,10 @@ storage_class_specifier
type_specifier type_specifier
: atomic_var_type_specifier { $$ = $1; } : atomic_var_type_specifier { $$ = $1; }
| TOKEN_TYPE_NAME | TOKEN_TYPE_NAME
{ const Type *t = m->symbolTable->LookupType(yytext); {
Assert(t != NULL); const Type *t = m->symbolTable->LookupType(yytext);
$$ = t; $$ = t;
} }
| struct_or_union_specifier { $$ = $1; } | struct_or_union_specifier { $$ = $1; }
| enum_specifier { $$ = $1; } | enum_specifier { $$ = $1; }
; ;
@@ -644,41 +701,47 @@ atomic_var_type_specifier
short_vec_specifier short_vec_specifier
: atomic_var_type_specifier '<' int_constant '>' : atomic_var_type_specifier '<' int_constant '>'
{ {
Type* vt = $$ = $1 ? new VectorType($1, (int32_t)$3) : NULL;
new VectorType($1, (int32_t)$3); }
$$ = vt;
}
; ;
struct_or_union_name struct_or_union_name
: TOKEN_IDENTIFIER { $$ = strdup(yytext); } : TOKEN_IDENTIFIER { $$ = strdup(yytext); }
| TOKEN_TYPE_NAME { $$ = strdup(yytext); } | TOKEN_TYPE_NAME { $$ = strdup(yytext); }
; ;
struct_or_union_specifier struct_or_union_specifier
: struct_or_union struct_or_union_name '{' struct_declaration_list '}' : struct_or_union struct_or_union_name '{' struct_declaration_list '}'
{ {
std::vector<const Type *> elementTypes; if ($4 != NULL) {
std::vector<std::string> elementNames; std::vector<const Type *> elementTypes;
std::vector<SourcePos> elementPositions; std::vector<std::string> elementNames;
GetStructTypesNamesPositions(*$4, &elementTypes, &elementNames, std::vector<SourcePos> elementPositions;
&elementPositions); GetStructTypesNamesPositions(*$4, &elementTypes, &elementNames,
StructType *st = new StructType($2, elementTypes, elementNames, &elementPositions);
elementPositions, false, Type::Unbound, @2); StructType *st = new StructType($2, elementTypes, elementNames,
m->symbolTable->AddType($2, st, @2); elementPositions, false, Type::Unbound, @2);
$$ = st; m->symbolTable->AddType($2, st, @2);
$$ = st;
}
else
$$ = NULL;
} }
| struct_or_union '{' struct_declaration_list '}' | struct_or_union '{' struct_declaration_list '}'
{ {
std::vector<const Type *> elementTypes; if ($3 != NULL) {
std::vector<std::string> elementNames; std::vector<const Type *> elementTypes;
std::vector<SourcePos> elementPositions; std::vector<std::string> elementNames;
GetStructTypesNamesPositions(*$3, &elementTypes, &elementNames, std::vector<SourcePos> elementPositions;
&elementPositions); GetStructTypesNamesPositions(*$3, &elementTypes, &elementNames,
// FIXME: should be unbound &elementPositions);
$$ = new StructType("", elementTypes, elementNames, elementPositions, // FIXME: should be unbound
false, Type::Unbound, @1); $$ = new StructType("", elementTypes, elementNames, elementPositions,
false, Type::Unbound, @1);
}
else
$$ = NULL;
} }
| struct_or_union '{' '}' | struct_or_union '{' '}'
{ {
@@ -689,16 +752,17 @@ struct_or_union_specifier
Error(@1, "Empty struct definitions not allowed."); Error(@1, "Empty struct definitions not allowed.");
} }
| struct_or_union struct_or_union_name | struct_or_union struct_or_union_name
{ const Type *st = m->symbolTable->LookupType($2); {
if (!st) { const Type *st = m->symbolTable->LookupType($2);
std::vector<std::string> alternates = m->symbolTable->ClosestTypeMatch($2); if (!st) {
std::string alts = lGetAlternates(alternates); std::vector<std::string> alternates = m->symbolTable->ClosestTypeMatch($2);
Error(@2, "Struct type \"%s\" unknown.%s", $2, alts.c_str()); std::string alts = lGetAlternates(alternates);
} Error(@2, "Struct type \"%s\" unknown.%s", $2, alts.c_str());
else if (dynamic_cast<const StructType *>(st) == NULL) }
Error(@2, "Type \"%s\" is not a struct type! (%s)", $2, else if (dynamic_cast<const StructType *>(st) == NULL)
st->GetString().c_str()); Error(@2, "Type \"%s\" is not a struct type! (%s)", $2,
$$ = st; st->GetString().c_str());
$$ = st;
} }
; ;
@@ -710,22 +774,26 @@ struct_declaration_list
: struct_declaration : struct_declaration
{ {
std::vector<StructDeclaration *> *sdl = new std::vector<StructDeclaration *>; std::vector<StructDeclaration *> *sdl = new std::vector<StructDeclaration *>;
if (sdl != NULL && $1 != NULL) if ($1 != NULL)
sdl->push_back($1); sdl->push_back($1);
$$ = sdl; $$ = sdl;
} }
| struct_declaration_list struct_declaration | struct_declaration_list struct_declaration
{ {
std::vector<StructDeclaration *> *sdl = (std::vector<StructDeclaration *> *)$1; std::vector<StructDeclaration *> *sdl = (std::vector<StructDeclaration *> *)$1;
if (sdl != NULL && $2 != NULL) if (sdl == NULL) {
Assert(m->errorCount > 0);
sdl = new std::vector<StructDeclaration *>;
}
if ($2 != NULL)
sdl->push_back($2); sdl->push_back($2);
$$ = $1; $$ = sdl;
} }
; ;
struct_declaration struct_declaration
: specifier_qualifier_list struct_declarator_list ';' : specifier_qualifier_list struct_declarator_list ';'
{ $$ = new StructDeclaration($1, $2); } { $$ = ($1 != NULL && $2 != NULL) ? new StructDeclaration($1, $2) : NULL; }
; ;
specifier_qualifier_list specifier_qualifier_list
@@ -791,9 +859,13 @@ struct_declarator_list
| struct_declarator_list ',' struct_declarator | struct_declarator_list ',' struct_declarator
{ {
std::vector<Declarator *> *sdl = (std::vector<Declarator *> *)$1; std::vector<Declarator *> *sdl = (std::vector<Declarator *> *)$1;
if (sdl != NULL && $3 != NULL) if (sdl == NULL) {
Assert(m->errorCount > 0);
sdl = new std::vector<Declarator *>;
}
if ($3 != NULL)
sdl->push_back($3); sdl->push_back($3);
$$ = $1; $$ = sdl;
} }
; ;
@@ -860,9 +932,14 @@ enumerator_list
} }
| enumerator_list ',' enumerator | enumerator_list ',' enumerator
{ {
if ($1 != NULL && $3 != NULL) std::vector<Symbol *> *symList = $1;
$1->push_back($3); if (symList == NULL) {
$$ = $1; Assert(m->errorCount > 0);
symList = new std::vector<Symbol *>;
}
if ($3 != NULL)
symList->push_back($3);
$$ = symList;
} }
; ;
@@ -910,19 +987,27 @@ type_qualifier_list
declarator declarator
: pointer direct_declarator : pointer direct_declarator
{ {
Declarator *tail = $1; if ($1 != NULL) {
while (tail->child != NULL) Declarator *tail = $1;
tail = tail->child; while (tail->child != NULL)
tail->child = $2; tail = tail->child;
$$ = $1; tail->child = $2;
$$ = $1;
}
else
$$ = NULL;
} }
| reference direct_declarator | reference direct_declarator
{ {
Declarator *tail = $1; if ($1 != NULL) {
while (tail->child != NULL) Declarator *tail = $1;
tail = tail->child; while (tail->child != NULL)
tail->child = $2; tail = tail->child;
$$ = $1; tail->child = $2;
$$ = $1;
}
else
$$ = NULL;
} }
| direct_declarator | direct_declarator
; ;
@@ -971,12 +1056,17 @@ direct_declarator
else else
$$ = NULL; $$ = NULL;
} }
| direct_declarator '[' error ']'
{
$$ = NULL;
}
| direct_declarator '(' parameter_type_list ')' | direct_declarator '(' parameter_type_list ')'
{ {
if ($1 != NULL) { if ($1 != NULL) {
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @4)); Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @4));
d->child = $1; d->child = $1;
if ($3 != NULL) d->functionParams = *$3; if ($3 != NULL)
d->functionParams = *$3;
$$ = d; $$ = d;
} }
else else
@@ -992,6 +1082,10 @@ direct_declarator
else else
$$ = NULL; $$ = NULL;
} }
| direct_declarator '(' error ')'
{
$$ = NULL;
}
; ;
@@ -1046,27 +1140,14 @@ parameter_list
{ {
std::vector<Declaration *> *dl = (std::vector<Declaration *> *)$1; std::vector<Declaration *> *dl = (std::vector<Declaration *> *)$1;
if (dl == NULL) if (dl == NULL)
// dl may be NULL due to an earlier parse error...
dl = new std::vector<Declaration *>; dl = new std::vector<Declaration *>;
if ($3 != NULL) if ($3 != NULL)
dl->push_back($3); dl->push_back($3);
$$ = dl; $$ = dl;
} }
| error | error ','
{ {
std::vector<std::string> builtinTokens; lSuggestParamListAlternates();
const char **token = lParamListTokens;
while (*token) {
builtinTokens.push_back(*token);
++token;
}
if (strlen(yytext) == 0)
Error(@1, "Syntax error--premature end of file.");
else {
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
std::string alts = lGetAlternates(alternates);
Error(@1, "Syntax error--token \"%s\" unexpected.%s", yytext, alts.c_str());
}
$$ = NULL; $$ = NULL;
} }
; ;
@@ -1078,18 +1159,26 @@ parameter_declaration
} }
| declaration_specifiers declarator '=' initializer | declaration_specifiers declarator '=' initializer
{ {
if ($2 != NULL) if ($1 != NULL && $2 != NULL) {
$2->initExpr = $4; $2->initExpr = $4;
$$ = new Declaration($1, $2); $$ = new Declaration($1, $2);
}
else
$$ = NULL;
} }
| declaration_specifiers abstract_declarator | declaration_specifiers abstract_declarator
{ {
$$ = new Declaration($1, $2); if ($1 != NULL && $2 != NULL)
$$ = new Declaration($1, $2);
else
$$ = NULL;
} }
| declaration_specifiers | declaration_specifiers
{ {
$$ = new Declaration($1); if ($1 == NULL)
$$ = NULL;
else
$$ = new Declaration($1);
} }
; ;
@@ -1104,7 +1193,10 @@ type_name
: specifier_qualifier_list : specifier_qualifier_list
| specifier_qualifier_list abstract_declarator | specifier_qualifier_list abstract_declarator
{ {
$$ = $2->GetType($1, NULL); if ($1 == NULL || $2 == NULL)
$$ = NULL;
else
$$ = $2->GetType($1, NULL);
} }
; ;
@@ -1116,20 +1208,27 @@ abstract_declarator
| direct_abstract_declarator | direct_abstract_declarator
| pointer direct_abstract_declarator | pointer direct_abstract_declarator
{ {
Declarator *d = new Declarator(DK_POINTER, Union(@1, @2)); if ($2 == NULL)
d->child = $2; $$ = NULL;
$$ = d; else {
Declarator *d = new Declarator(DK_POINTER, Union(@1, @2));
d->child = $2;
$$ = d;
}
} }
| reference | reference
{ {
Declarator *d = new Declarator(DK_REFERENCE, @1); $$ = new Declarator(DK_REFERENCE, @1);
$$ = d;
} }
| reference direct_abstract_declarator | reference direct_abstract_declarator
{ {
Declarator *d = new Declarator(DK_REFERENCE, Union(@1, @2)); if ($2 == NULL)
d->child = $2; $$ = NULL;
$$ = d; else {
Declarator *d = new Declarator(DK_REFERENCE, Union(@1, @2));
d->child = $2;
$$ = d;
}
} }
; ;
@@ -1161,15 +1260,19 @@ direct_abstract_declarator
} }
| direct_abstract_declarator '[' ']' | direct_abstract_declarator '[' ']'
{ {
Declarator *d = new Declarator(DK_ARRAY, Union(@1, @3)); if ($1 == NULL)
d->arraySize = 0; $$ = NULL;
d->child = $1; else {
$$ = d; Declarator *d = new Declarator(DK_ARRAY, Union(@1, @3));
d->arraySize = 0;
d->child = $1;
$$ = d;
}
} }
| direct_abstract_declarator '[' constant_expression ']' | direct_abstract_declarator '[' constant_expression ']'
{ {
int size; int size;
if ($3 != NULL && lGetConstantInt($3, &size, @3, "Array dimension")) { if ($1 != NULL && $3 != NULL && lGetConstantInt($3, &size, @3, "Array dimension")) {
if (size < 0) { if (size < 0) {
Error(@3, "Array dimension must be non-negative."); Error(@3, "Array dimension must be non-negative.");
$$ = NULL; $$ = NULL;
@@ -1190,19 +1293,28 @@ direct_abstract_declarator
{ {
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @3)); Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @3));
if ($2 != NULL) d->functionParams = *$2; if ($2 != NULL) d->functionParams = *$2;
$$ = d;
} }
| direct_abstract_declarator '(' ')' | direct_abstract_declarator '(' ')'
{ {
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @3)); if ($1 == NULL)
d->child = $1; $$ = NULL;
$$ = d; else {
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @3));
d->child = $1;
$$ = d;
}
} }
| direct_abstract_declarator '(' parameter_type_list ')' | direct_abstract_declarator '(' parameter_type_list ')'
{ {
Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @4)); if ($1 == NULL)
d->child = $1; $$ = NULL;
if ($3 != NULL) d->functionParams = *$3; else {
$$ = d; Declarator *d = new Declarator(DK_FUNCTION, Union(@1, @4));
d->child = $1;
if ($3 != NULL) d->functionParams = *$3;
$$ = d;
}
} }
; ;
@@ -1217,15 +1329,14 @@ initializer_list
{ $$ = new ExprList($1, @1); } { $$ = new ExprList($1, @1); }
| initializer_list ',' initializer | initializer_list ',' initializer
{ {
if ($1 == NULL) ExprList *exprList = $1;
$$ = NULL; if (exprList == NULL) {
else { Assert(m->errorCount > 0);
ExprList *exprList = dynamic_cast<ExprList *>($1); exprList = new ExprList(@3);
Assert(exprList);
exprList->exprs.push_back($3);
exprList->pos = Union(exprList->pos, @3);
$$ = exprList;
} }
exprList->exprs.push_back($3);
exprList->pos = Union(exprList->pos, @3);
$$ = exprList;
} }
; ;
@@ -1240,21 +1351,10 @@ statement
| print_statement | print_statement
| assert_statement | assert_statement
| sync_statement | sync_statement
| error | delete_statement
| error ';'
{ {
std::vector<std::string> builtinTokens; lSuggestBuiltinAlternates();
const char **token = lBuiltinTokens;
while (*token) {
builtinTokens.push_back(*token);
++token;
}
if (strlen(yytext) == 0)
Error(@1, "Syntax error--premature end of file.");
else {
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
std::string alts = lGetAlternates(alternates);
Error(@1, "Syntax error--token \"%s\" unexpected.%s", yytext, alts.c_str());
}
$$ = NULL; $$ = NULL;
} }
; ;
@@ -1300,15 +1400,19 @@ statement_list
} }
| statement_list statement | statement_list statement
{ {
if ($1 != NULL) StmtList *sl = (StmtList *)$1;
((StmtList *)$1)->Add($2); if (sl == NULL) {
$$ = $1; Assert(m->errorCount > 0);
sl = new StmtList(@2);
}
sl->Add($2);
$$ = sl;
} }
; ;
expression_statement expression_statement
: ';' { $$ = NULL; } : ';' { $$ = NULL; }
| expression ';' { $$ = new ExprStmt($1, @1); } | expression ';' { $$ = $1 ? new ExprStmt($1, @1) : NULL; }
; ;
selection_statement selection_statement
@@ -1374,7 +1478,14 @@ foreach_dimension_list
} }
| foreach_dimension_list ',' foreach_dimension_specifier | foreach_dimension_list ',' foreach_dimension_specifier
{ {
$$->push_back($3); std::vector<ForeachDimension *> *dv = $1;
if (dv == NULL) {
Assert(m->errorCount > 0);
dv = new std::vector<ForeachDimension *>;
}
if ($3 != NULL)
dv->push_back($3);
$$ = dv;
} }
; ;
@@ -1405,38 +1516,57 @@ iteration_statement
} }
| foreach_scope '(' foreach_dimension_list ')' | foreach_scope '(' foreach_dimension_list ')'
{ {
std::vector<ForeachDimension *> &dims = *$3; std::vector<ForeachDimension *> *dims = $3;
for (unsigned int i = 0; i < dims.size(); ++i) if (dims == NULL) {
m->symbolTable->AddVariable(dims[i]->sym); Assert(m->errorCount > 0);
dims = new std::vector<ForeachDimension *>;
}
for (unsigned int i = 0; i < dims->size(); ++i)
m->symbolTable->AddVariable((*dims)[i]->sym);
} }
statement statement
{ {
std::vector<ForeachDimension *> &dims = *$3; std::vector<ForeachDimension *> *dims = $3;
if (dims == NULL) {
Assert(m->errorCount > 0);
dims = new std::vector<ForeachDimension *>;
}
std::vector<Symbol *> syms; std::vector<Symbol *> syms;
std::vector<Expr *> begins, ends; std::vector<Expr *> begins, ends;
for (unsigned int i = 0; i < dims.size(); ++i) { for (unsigned int i = 0; i < dims->size(); ++i) {
syms.push_back(dims[i]->sym); syms.push_back((*dims)[i]->sym);
begins.push_back(dims[i]->beginExpr); begins.push_back((*dims)[i]->beginExpr);
ends.push_back(dims[i]->endExpr); ends.push_back((*dims)[i]->endExpr);
} }
$$ = new ForeachStmt(syms, begins, ends, $6, false, @1); $$ = new ForeachStmt(syms, begins, ends, $6, false, @1);
m->symbolTable->PopScope(); m->symbolTable->PopScope();
} }
| foreach_tiled_scope '(' foreach_dimension_list ')' | foreach_tiled_scope '(' foreach_dimension_list ')'
{ {
std::vector<ForeachDimension *> &dims = *$3; std::vector<ForeachDimension *> *dims = $3;
for (unsigned int i = 0; i < dims.size(); ++i) if (dims == NULL) {
m->symbolTable->AddVariable(dims[i]->sym); Assert(m->errorCount > 0);
dims = new std::vector<ForeachDimension *>;
}
for (unsigned int i = 0; i < dims->size(); ++i)
m->symbolTable->AddVariable((*dims)[i]->sym);
} }
statement statement
{ {
std::vector<ForeachDimension *> &dims = *$3; std::vector<ForeachDimension *> *dims = $3;
if (dims == NULL) {
Assert(m->errorCount > 0);
dims = new std::vector<ForeachDimension *>;
}
std::vector<Symbol *> syms; std::vector<Symbol *> syms;
std::vector<Expr *> begins, ends; std::vector<Expr *> begins, ends;
for (unsigned int i = 0; i < dims.size(); ++i) { for (unsigned int i = 0; i < dims->size(); ++i) {
syms.push_back(dims[i]->sym); syms.push_back((*dims)[i]->sym);
begins.push_back(dims[i]->beginExpr); begins.push_back((*dims)[i]->beginExpr);
ends.push_back(dims[i]->endExpr); ends.push_back((*dims)[i]->endExpr);
} }
$$ = new ForeachStmt(syms, begins, ends, $6, true, @1); $$ = new ForeachStmt(syms, begins, ends, $6, true, @1);
m->symbolTable->PopScope(); m->symbolTable->PopScope();
@@ -1469,23 +1599,30 @@ jump_statement
; ;
sync_statement sync_statement
: TOKEN_SYNC : TOKEN_SYNC ';'
{ $$ = new ExprStmt(new SyncExpr(@1), @1); } { $$ = new ExprStmt(new SyncExpr(@1), @1); }
; ;
delete_statement
: TOKEN_DELETE expression ';'
{
$$ = new DeleteStmt($2, Union(@1, @2));
}
;
print_statement print_statement
: TOKEN_PRINT '(' string_constant ')' : TOKEN_PRINT '(' string_constant ')' ';'
{ {
$$ = new PrintStmt(*$3, NULL, @1); $$ = new PrintStmt(*$3, NULL, @1);
} }
| TOKEN_PRINT '(' string_constant ',' argument_expression_list ')' | TOKEN_PRINT '(' string_constant ',' argument_expression_list ')' ';'
{ {
$$ = new PrintStmt(*$3, $5, @1); $$ = new PrintStmt(*$3, $5, @1);
} }
; ;
assert_statement assert_statement
: TOKEN_ASSERT '(' string_constant ',' expression ')' : TOKEN_ASSERT '(' string_constant ',' expression ')' ';'
{ {
$$ = new AssertStmt(*$3, $5, @1); $$ = new AssertStmt(*$3, $5, @1);
} }
@@ -1494,22 +1631,7 @@ assert_statement
translation_unit translation_unit
: external_declaration : external_declaration
| translation_unit external_declaration | translation_unit external_declaration
| error | error ';'
{
std::vector<std::string> builtinTokens;
const char **token = lBuiltinTokens;
while (*token) {
builtinTokens.push_back(*token);
++token;
}
if (strlen(yytext) == 0)
Error(@1, "Syntax error--premature end of file.");
else {
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
std::string alts = lGetAlternates(alternates);
Error(@1, "Syntax error--token \"%s\" unexpected.%s", yytext, alts.c_str());
}
}
; ;
external_declaration external_declaration
@@ -1535,9 +1657,11 @@ function_definition
compound_statement compound_statement
{ {
std::vector<Symbol *> args; std::vector<Symbol *> args;
Symbol *sym = $2->GetFunctionInfo($1, &args); if ($2 != NULL) {
if (sym != NULL) Symbol *sym = $2->GetFunctionInfo($1, &args);
m->AddFunctionDefinition(sym, args, $4); if (sym != NULL)
m->AddFunctionDefinition(sym, args, $4);
}
m->symbolTable->PopScope(); // push in lAddFunctionParams(); m->symbolTable->PopScope(); // push in lAddFunctionParams();
} }
/* function with no declared return type?? /* function with no declared return type??
@@ -1553,6 +1677,93 @@ func(...)
%% %%
void yyerror(const char *s) {
if (strlen(yytext) == 0)
Error(yylloc, "Premature end of file: %s.", s);
else
Error(yylloc, "%s.", s);
}
static int
lYYTNameErr (char *yyres, const char *yystr)
{
extern std::map<std::string, std::string> tokenNameRemap;
Assert(tokenNameRemap.size() > 0);
if (tokenNameRemap.find(yystr) != tokenNameRemap.end()) {
std::string n = tokenNameRemap[yystr];
if (yyres == NULL)
return n.size();
else
return yystpcpy(yyres, n.c_str()) - yyres;
}
if (*yystr == '"')
{
YYSIZE_T yyn = 0;
char const *yyp = yystr;
for (;;)
switch (*++yyp)
{
case '\'':
case ',':
goto do_not_strip_quotes;
case '\\':
if (*++yyp != '\\')
goto do_not_strip_quotes;
/* Fall through. */
default:
if (yyres)
yyres[yyn] = *yyp;
yyn++;
break;
case '"':
if (yyres)
yyres[yyn] = '\0';
return yyn;
}
do_not_strip_quotes: ;
}
if (! yyres)
return yystrlen (yystr);
return yystpcpy (yyres, yystr) - yyres;
}
static void
lSuggestBuiltinAlternates() {
std::vector<std::string> builtinTokens;
const char **token = lBuiltinTokens;
while (*token) {
builtinTokens.push_back(*token);
++token;
}
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
std::string alts = lGetAlternates(alternates);
if (alts.size() > 0)
Error(yylloc, "%s", alts.c_str());
}
static void
lSuggestParamListAlternates() {
std::vector<std::string> builtinTokens;
const char **token = lParamListTokens;
while (*token) {
builtinTokens.push_back(*token);
++token;
}
std::vector<std::string> alternates = MatchStrings(yytext, builtinTokens);
std::string alts = lGetAlternates(alternates);
if (alts.size() > 0)
Error(yylloc, "%s", alts.c_str());
}
static void static void
lAddDeclaration(DeclSpecs *ds, Declarator *decl) { lAddDeclaration(DeclSpecs *ds, Declarator *decl) {
if (ds == NULL || decl == NULL) if (ds == NULL || decl == NULL)
@@ -1576,7 +1787,10 @@ lAddDeclaration(DeclSpecs *ds, Declarator *decl) {
m->AddFunctionDeclaration(sym, isInline); m->AddFunctionDeclaration(sym, isInline);
} }
else { else {
sym->type = sym->type->ResolveUnboundVariability(Type::Varying); if (sym->type == NULL)
Assert(m->errorCount > 0);
else
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
bool isConst = (ds->typeQualifiers & TYPEQUAL_CONST) != 0; bool isConst = (ds->typeQualifiers & TYPEQUAL_CONST) != 0;
m->AddGlobalVariable(sym, decl->initExpr, isConst); m->AddGlobalVariable(sym, decl->initExpr, isConst);
} }
@@ -1591,6 +1805,11 @@ static void
lAddFunctionParams(Declarator *decl) { lAddFunctionParams(Declarator *decl) {
m->symbolTable->PushScope(); m->symbolTable->PushScope();
if (decl == NULL) {
Assert(m->errorCount > 0);
return;
}
// walk down to the declarator for the function itself // walk down to the declarator for the function itself
while (decl->kind != DK_FUNCTION && decl->child != NULL) while (decl->kind != DK_FUNCTION && decl->child != NULL)
decl = decl->child; decl = decl->child;
@@ -1605,14 +1824,18 @@ lAddFunctionParams(Declarator *decl) {
continue; continue;
Assert(pdecl->declarators.size() == 1); Assert(pdecl->declarators.size() == 1);
Symbol *sym = pdecl->declarators[0]->GetSymbol(); Symbol *sym = pdecl->declarators[0]->GetSymbol();
sym->type = sym->type->ResolveUnboundVariability(Type::Varying); if (sym == NULL || sym->type == NULL)
#ifndef NDEBUG
bool ok = m->symbolTable->AddVariable(sym);
if (ok == false)
Assert(m->errorCount > 0); Assert(m->errorCount > 0);
else {
sym->type = sym->type->ResolveUnboundVariability(Type::Varying);
#ifndef NDEBUG
bool ok = m->symbolTable->AddVariable(sym);
if (ok == false)
Assert(m->errorCount > 0);
#else #else
m->symbolTable->AddVariable(sym); m->symbolTable->AddVariable(sym);
#endif #endif
}
} }
// The corresponding pop scope happens in function_definition rules // The corresponding pop scope happens in function_definition rules
@@ -1622,7 +1845,7 @@ lAddFunctionParams(Declarator *decl) {
/** Add a symbol for the built-in mask variable to the symbol table */ /** Add a symbol for the built-in mask variable to the symbol table */
static void lAddMaskToSymbolTable(SourcePos pos) { static void lAddMaskToSymbolTable(SourcePos pos) {
const Type *t = g->target.isa == Target::GENERIC ? const Type *t = g->target.maskBitCount == 1 ?
AtomicType::VaryingConstBool : AtomicType::VaryingConstUInt32; AtomicType::VaryingConstBool : AtomicType::VaryingConstUInt32;
Symbol *maskSymbol = new Symbol("__mask", pos, t); Symbol *maskSymbol = new Symbol("__mask", pos, t);
m->symbolTable->AddVariable(maskSymbol); m->symbolTable->AddVariable(maskSymbol);

View File

@@ -42,23 +42,19 @@ parser.add_option('-j', '--jobs', dest='num_jobs', help='Maximum number of jobs
default="1024", type="int") default="1024", type="int")
parser.add_option('-v', '--verbose', dest='verbose', help='Enable verbose output', parser.add_option('-v', '--verbose', dest='verbose', help='Enable verbose output',
default=False, action="store_true") default=False, action="store_true")
if not is_windows: parser.add_option('--wrap-exe', dest='wrapexe',
parser.add_option('--valgrind', dest='valgrind', help='Run tests with valgrind', help='Executable to wrap test runs with (e.g. "valgrind")',
default=False, action="store_true") default="")
(options, args) = parser.parse_args() (options, args) = parser.parse_args()
if not is_windows and options.valgrind:
valgrind_exe = "valgrind "
else:
valgrind_exe = ""
if not is_windows: if not is_windows:
ispc_exe = "./ispc" ispc_exe = "./ispc"
else: else:
ispc_exe = "../Release/ispc.exe" ispc_exe = "../Release/ispc.exe"
is_generic_target = options.target.find("generic-") != -1 is_generic_target = (options.target.find("generic-") != -1 and
options.target != "generic-1")
if is_generic_target and options.include_file == None: if is_generic_target and options.include_file == None:
if options.target == "generic-4": if options.target == "generic-4":
sys.stderr.write("No generics #include specified; using examples/intrinsics/sse4.h\n") sys.stderr.write("No generics #include specified; using examples/intrinsics/sse4.h\n")
@@ -76,14 +72,31 @@ if options.compiler_exe == None:
else: else:
options.compiler_exe = "g++" options.compiler_exe = "g++"
# if no specific test files are specified, run all of the tests in tests/ def fix_windows_paths(files):
# and failing_tests/ ret = [ ]
for fn in files:
ret += [ string.replace(fn, '\\', '/') ]
return ret
# if no specific test files are specified, run all of the tests in tests/,
# failing_tests/, and tests_errors/
if len(args) == 0: if len(args) == 0:
files = glob.glob("tests/*ispc") + glob.glob("failing_tests/*ispc") + \ files = glob.glob("tests/*ispc") + glob.glob("failing_tests/*ispc") + \
glob.glob("tests_errors/*ispc") glob.glob("tests_errors/*ispc")
files = fix_windows_paths(files)
else: else:
if is_windows:
argfiles = [ ]
for f in args:
# we have to glob ourselves if this is being run under a DOS
# shell..
argfiles += glob.glob(f)
else:
argfiles = args
files = [ ] files = [ ]
for f in args: for f in argfiles:
if os.path.splitext(string.lower(f))[1] != ".ispc": if os.path.splitext(string.lower(f))[1] != ".ispc":
sys.stdout.write("Ignoring file %s, which doesn't have an .ispc extension.\n" % f) sys.stdout.write("Ignoring file %s, which doesn't have an .ispc extension.\n" % f)
else: else:
@@ -103,6 +116,7 @@ finished_tests_counter_lock = multiprocessing.Lock()
# utility routine to print an update on the number of tests that have been # utility routine to print an update on the number of tests that have been
# finished. Should be called with the lock held.. # finished. Should be called with the lock held..
def update_progress(fn): def update_progress(fn):
global total_tests
finished_tests_counter.value = finished_tests_counter.value + 1 finished_tests_counter.value = finished_tests_counter.value + 1
progress_str = " Done %d / %d [%s]" % (finished_tests_counter.value, total_tests, fn) progress_str = " Done %d / %d [%s]" % (finished_tests_counter.value, total_tests, fn)
# spaces to clear out detrius from previous printing... # spaces to clear out detrius from previous printing...
@@ -211,7 +225,7 @@ def run_test(filename):
"in test %s\n" % filename) "in test %s\n" % filename)
return (1, 0) return (1, 0)
else: else:
is_generic_target = options.target.find("generic-") != -1 global is_generic_target
if is_generic_target: if is_generic_target:
obj_name = "%s.cpp" % filename obj_name = "%s.cpp" % filename
@@ -248,9 +262,8 @@ def run_test(filename):
ispc_cmd += " --emit-c++ --c++-include-file=%s" % options.include_file ispc_cmd += " --emit-c++ --c++-include-file=%s" % options.include_file
# compile the ispc code, make the executable, and run it... # compile the ispc code, make the executable, and run it...
global valgrind_exe
(compile_error, run_error) = run_cmds([ispc_cmd, cc_cmd], (compile_error, run_error) = run_cmds([ispc_cmd, cc_cmd],
valgrind_exe + " " + exe_name, \ options.wrapexe + " " + exe_name, \
filename, should_fail) filename, should_fail)
# clean up after running the test # clean up after running the test

View File

@@ -795,217 +795,6 @@ static inline uniform int64 clock() {
return __clock(); return __clock();
} }
///////////////////////////////////////////////////////////////////////////
// Atomics and memory barriers
static inline void memory_barrier() {
__memory_barrier();
}
#define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE) \
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
memory_barrier(); \
TA ret = __atomic_##OPB##_##TB##_global(ptr, value, (MASKTYPE)__mask); \
memory_barrier(); \
return ret; \
} \
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
uniform TA value) { \
memory_barrier(); \
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
memory_barrier(); \
return ret; \
} \
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
uniform TA * uniform ptrArray[programCount]; \
ptrArray[programIndex] = ptr; \
memory_barrier(); \
TA ret; \
uniform int mask = lanemask(); \
for (uniform int i = 0; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
uniform TA * uniform p = ptrArray[i]; \
uniform TA v = extract(value, i); \
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
ret = insert(ret, i, r); \
} \
memory_barrier(); \
return ret; \
} \
#define DEFINE_ATOMIC_SWAP(TA,TB) \
static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
memory_barrier(); \
uniform int i = 0; \
TA ret[programCount]; \
TA memVal; \
uniform int lastSwap; \
uniform int mask = lanemask(); \
/* First, have the first running program instance (if any) perform \
the swap with memory with its value of "value"; record the \
value returned. */ \
for (; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
memVal = __atomic_swap_uniform_##TB##_global(ptr, extract(value, i)); \
lastSwap = i; \
break; \
} \
/* Now, for all of the remaining running program instances, set the \
return value of the last instance that did a swap with this \
instance's value of "value"; this gives the same effect as if the \
current instance had executed a hardware atomic swap right before \
the last one that did a swap. */ \
for (; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
ret[lastSwap] = extract(value, i); \
lastSwap = i; \
} \
/* And the last instance that wanted to swap gets the value we \
originally got back from memory... */ \
ret[lastSwap] = memVal; \
memory_barrier(); \
return ret[programIndex]; \
} \
static inline uniform TA atomic_swap_global(uniform TA * uniform ptr, \
uniform TA value) { \
memory_barrier(); \
uniform TA ret = __atomic_swap_uniform_##TB##_global(ptr, value); \
memory_barrier(); \
return ret; \
} \
static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
uniform TA * uniform ptrArray[programCount]; \
ptrArray[programIndex] = ptr; \
memory_barrier(); \
TA ret; \
uniform int mask = lanemask(); \
for (uniform int i = 0; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
uniform TA * uniform p = ptrArray[i]; \
uniform TA v = extract(value, i); \
uniform TA r = __atomic_swap_uniform_##TB##_global(p, v); \
ret = insert(ret, i, r); \
} \
memory_barrier(); \
return ret; \
} \
#define DEFINE_ATOMIC_MINMAX_OP(TA,TB,OPA,OPB) \
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
uniform TA oneval = reduce_##OPA(value); \
TA ret; \
if (lanemask() != 0) { \
memory_barrier(); \
ret = __atomic_##OPB##_uniform_##TB##_global(ptr, oneval); \
memory_barrier(); \
} \
return ret; \
} \
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
uniform TA value) { \
memory_barrier(); \
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
memory_barrier(); \
return ret; \
} \
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
TA value) { \
uniform TA * uniform ptrArray[programCount]; \
ptrArray[programIndex] = ptr; \
memory_barrier(); \
TA ret; \
uniform int mask = lanemask(); \
for (uniform int i = 0; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
uniform TA * uniform p = ptrArray[i]; \
uniform TA v = extract(value, i); \
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
ret = insert(ret, i, r); \
} \
memory_barrier(); \
return ret; \
}
DEFINE_ATOMIC_OP(int32,int32,add,add,IntMaskType)
DEFINE_ATOMIC_OP(int32,int32,subtract,sub,IntMaskType)
DEFINE_ATOMIC_MINMAX_OP(int32,int32,min,min)
DEFINE_ATOMIC_MINMAX_OP(int32,int32,max,max)
DEFINE_ATOMIC_OP(int32,int32,and,and,IntMaskType)
DEFINE_ATOMIC_OP(int32,int32,or,or,IntMaskType)
DEFINE_ATOMIC_OP(int32,int32,xor,xor,IntMaskType)
DEFINE_ATOMIC_SWAP(int32,int32)
// For everything but atomic min and max, we can use the same
// implementations for unsigned as for signed.
DEFINE_ATOMIC_OP(unsigned int32,int32,add,add,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int32,int32,subtract,sub,UIntMaskType)
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,min,umin)
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,max,umax)
DEFINE_ATOMIC_OP(unsigned int32,int32,and,and,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int32,int32,or,or,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor,UIntMaskType)
DEFINE_ATOMIC_SWAP(unsigned int32,int32)
DEFINE_ATOMIC_SWAP(float,float)
DEFINE_ATOMIC_OP(int64,int64,add,add,IntMaskType)
DEFINE_ATOMIC_OP(int64,int64,subtract,sub,IntMaskType)
DEFINE_ATOMIC_MINMAX_OP(int64,int64,min,min)
DEFINE_ATOMIC_MINMAX_OP(int64,int64,max,max)
DEFINE_ATOMIC_OP(int64,int64,and,and,IntMaskType)
DEFINE_ATOMIC_OP(int64,int64,or,or,IntMaskType)
DEFINE_ATOMIC_OP(int64,int64,xor,xor,IntMaskType)
DEFINE_ATOMIC_SWAP(int64,int64)
// For everything but atomic min and max, we can use the same
// implementations for unsigned as for signed.
DEFINE_ATOMIC_OP(unsigned int64,int64,add,add,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int64,int64,subtract,sub,UIntMaskType)
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,min,umin)
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,max,umax)
DEFINE_ATOMIC_OP(unsigned int64,int64,and,and,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int64,int64,or,or,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor,UIntMaskType)
DEFINE_ATOMIC_SWAP(unsigned int64,int64)
DEFINE_ATOMIC_SWAP(double,double)
#undef DEFINE_ATOMIC_OP
#undef DEFINE_ATOMIC_MINMAX_OP
#undef DEFINE_ATOMIC_SWAP
#define ATOMIC_DECL_CMPXCHG(TA, TB, MASKTYPE) \
static inline TA atomic_compare_exchange_global( \
uniform TA * uniform ptr, TA oldval, TA newval) { \
memory_barrier(); \
TA ret = __atomic_compare_exchange_##TB##_global(ptr, oldval, newval, \
(MASKTYPE)__mask); \
memory_barrier(); \
return ret; \
} \
static inline uniform TA atomic_compare_exchange_global( \
uniform TA * uniform ptr, uniform TA oldval, uniform TA newval) { \
memory_barrier(); \
uniform TA ret = \
__atomic_compare_exchange_uniform_##TB##_global(ptr, oldval, newval); \
memory_barrier(); \
return ret; \
}
ATOMIC_DECL_CMPXCHG(int32, int32, IntMaskType)
ATOMIC_DECL_CMPXCHG(unsigned int32, int32, UIntMaskType)
ATOMIC_DECL_CMPXCHG(float, float, IntMaskType)
ATOMIC_DECL_CMPXCHG(int64, int64, IntMaskType)
ATOMIC_DECL_CMPXCHG(unsigned int64, int64, UIntMaskType)
ATOMIC_DECL_CMPXCHG(double, double, IntMaskType)
#undef ATOMIC_DECL_CMPXCHG
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// Floating-Point Math // Floating-Point Math
@@ -1389,6 +1178,419 @@ static inline uniform int64 clamp(uniform int64 v, uniform int64 low,
return min(max(v, low), high); return min(max(v, low), high);
} }
///////////////////////////////////////////////////////////////////////////
// Global atomics and memory barriers
static inline void memory_barrier() {
__memory_barrier();
}
#define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE) \
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
memory_barrier(); \
TA ret = __atomic_##OPB##_##TB##_global(ptr, value, (MASKTYPE)__mask); \
memory_barrier(); \
return ret; \
} \
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
uniform TA value) { \
memory_barrier(); \
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
memory_barrier(); \
return ret; \
} \
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
uniform TA * uniform ptrArray[programCount]; \
ptrArray[programIndex] = ptr; \
memory_barrier(); \
TA ret; \
uniform int mask = lanemask(); \
for (uniform int i = 0; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
uniform TA * uniform p = ptrArray[i]; \
uniform TA v = extract(value, i); \
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
ret = insert(ret, i, r); \
} \
memory_barrier(); \
return ret; \
} \
#define DEFINE_ATOMIC_SWAP(TA,TB) \
static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
memory_barrier(); \
uniform int i = 0; \
TA ret[programCount]; \
TA memVal; \
uniform int lastSwap; \
uniform int mask = lanemask(); \
/* First, have the first running program instance (if any) perform \
the swap with memory with its value of "value"; record the \
value returned. */ \
for (; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
memVal = __atomic_swap_uniform_##TB##_global(ptr, extract(value, i)); \
lastSwap = i; \
break; \
} \
/* Now, for all of the remaining running program instances, set the \
return value of the last instance that did a swap with this \
instance's value of "value"; this gives the same effect as if the \
current instance had executed a hardware atomic swap right before \
the last one that did a swap. */ \
for (; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
ret[lastSwap] = extract(value, i); \
lastSwap = i; \
} \
/* And the last instance that wanted to swap gets the value we \
originally got back from memory... */ \
ret[lastSwap] = memVal; \
memory_barrier(); \
return ret[programIndex]; \
} \
static inline uniform TA atomic_swap_global(uniform TA * uniform ptr, \
uniform TA value) { \
memory_barrier(); \
uniform TA ret = __atomic_swap_uniform_##TB##_global(ptr, value); \
memory_barrier(); \
return ret; \
} \
static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
uniform TA * uniform ptrArray[programCount]; \
ptrArray[programIndex] = ptr; \
memory_barrier(); \
TA ret; \
uniform int mask = lanemask(); \
for (uniform int i = 0; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
uniform TA * uniform p = ptrArray[i]; \
uniform TA v = extract(value, i); \
uniform TA r = __atomic_swap_uniform_##TB##_global(p, v); \
ret = insert(ret, i, r); \
} \
memory_barrier(); \
return ret; \
} \
#define DEFINE_ATOMIC_MINMAX_OP(TA,TB,OPA,OPB) \
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
uniform TA oneval = reduce_##OPA(value); \
TA ret; \
if (lanemask() != 0) { \
memory_barrier(); \
ret = __atomic_##OPB##_uniform_##TB##_global(ptr, oneval); \
memory_barrier(); \
} \
return ret; \
} \
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
uniform TA value) { \
memory_barrier(); \
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
memory_barrier(); \
return ret; \
} \
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
TA value) { \
uniform TA * uniform ptrArray[programCount]; \
ptrArray[programIndex] = ptr; \
memory_barrier(); \
TA ret; \
uniform int mask = lanemask(); \
for (uniform int i = 0; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
uniform TA * uniform p = ptrArray[i]; \
uniform TA v = extract(value, i); \
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
ret = insert(ret, i, r); \
} \
memory_barrier(); \
return ret; \
}
DEFINE_ATOMIC_OP(int32,int32,add,add,IntMaskType)
DEFINE_ATOMIC_OP(int32,int32,subtract,sub,IntMaskType)
DEFINE_ATOMIC_MINMAX_OP(int32,int32,min,min)
DEFINE_ATOMIC_MINMAX_OP(int32,int32,max,max)
DEFINE_ATOMIC_OP(int32,int32,and,and,IntMaskType)
DEFINE_ATOMIC_OP(int32,int32,or,or,IntMaskType)
DEFINE_ATOMIC_OP(int32,int32,xor,xor,IntMaskType)
DEFINE_ATOMIC_SWAP(int32,int32)
// For everything but atomic min and max, we can use the same
// implementations for unsigned as for signed.
DEFINE_ATOMIC_OP(unsigned int32,int32,add,add,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int32,int32,subtract,sub,UIntMaskType)
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,min,umin)
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,max,umax)
DEFINE_ATOMIC_OP(unsigned int32,int32,and,and,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int32,int32,or,or,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor,UIntMaskType)
DEFINE_ATOMIC_SWAP(unsigned int32,int32)
DEFINE_ATOMIC_SWAP(float,float)
DEFINE_ATOMIC_OP(int64,int64,add,add,IntMaskType)
DEFINE_ATOMIC_OP(int64,int64,subtract,sub,IntMaskType)
DEFINE_ATOMIC_MINMAX_OP(int64,int64,min,min)
DEFINE_ATOMIC_MINMAX_OP(int64,int64,max,max)
DEFINE_ATOMIC_OP(int64,int64,and,and,IntMaskType)
DEFINE_ATOMIC_OP(int64,int64,or,or,IntMaskType)
DEFINE_ATOMIC_OP(int64,int64,xor,xor,IntMaskType)
DEFINE_ATOMIC_SWAP(int64,int64)
// For everything but atomic min and max, we can use the same
// implementations for unsigned as for signed.
DEFINE_ATOMIC_OP(unsigned int64,int64,add,add,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int64,int64,subtract,sub,UIntMaskType)
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,min,umin)
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,max,umax)
DEFINE_ATOMIC_OP(unsigned int64,int64,and,and,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int64,int64,or,or,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor,UIntMaskType)
DEFINE_ATOMIC_SWAP(unsigned int64,int64)
DEFINE_ATOMIC_SWAP(double,double)
#undef DEFINE_ATOMIC_OP
#undef DEFINE_ATOMIC_MINMAX_OP
#undef DEFINE_ATOMIC_SWAP
#define ATOMIC_DECL_CMPXCHG(TA, TB, MASKTYPE) \
static inline uniform TA atomic_compare_exchange_global( \
uniform TA * uniform ptr, uniform TA oldval, uniform TA newval) { \
memory_barrier(); \
uniform TA ret = \
__atomic_compare_exchange_uniform_##TB##_global(ptr, oldval, newval); \
memory_barrier(); \
return ret; \
} \
static inline TA atomic_compare_exchange_global( \
uniform TA * uniform ptr, TA oldval, TA newval) { \
memory_barrier(); \
TA ret = __atomic_compare_exchange_##TB##_global(ptr, oldval, newval, \
(MASKTYPE)__mask); \
memory_barrier(); \
return ret; \
} \
static inline TA atomic_compare_exchange_global( \
uniform TA * varying ptr, TA oldval, TA newval) { \
uniform TA * uniform ptrArray[programCount]; \
ptrArray[programIndex] = ptr; \
memory_barrier(); \
TA ret; \
uniform int mask = lanemask(); \
for (uniform int i = 0; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
uniform TA r = \
__atomic_compare_exchange_uniform_##TB##_global(ptrArray[i], \
extract(oldval, i), \
extract(newval, i)); \
ret = insert(ret, i, r); \
} \
memory_barrier(); \
return ret; \
}
ATOMIC_DECL_CMPXCHG(int32, int32, IntMaskType)
ATOMIC_DECL_CMPXCHG(unsigned int32, int32, UIntMaskType)
ATOMIC_DECL_CMPXCHG(float, float, IntMaskType)
ATOMIC_DECL_CMPXCHG(int64, int64, IntMaskType)
ATOMIC_DECL_CMPXCHG(unsigned int64, int64, UIntMaskType)
ATOMIC_DECL_CMPXCHG(double, double, IntMaskType)
#undef ATOMIC_DECL_CMPXCHG
///////////////////////////////////////////////////////////////////////////
// local atomics
#define LOCAL_ATOMIC(TYPE,NAME,OPFUNC) \
static inline uniform TYPE atomic_##NAME##_local(uniform TYPE * uniform ptr, \
uniform TYPE value) { \
uniform TYPE ret = *ptr; \
*ptr = OPFUNC(*ptr, value); \
return ret; \
} \
static inline TYPE atomic_##NAME##_local(uniform TYPE * uniform ptr, TYPE value) { \
TYPE ret; \
uniform int mask = lanemask(); \
for (uniform int i = 0; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
ret = insert(ret, i, *ptr); \
*ptr = OPFUNC(*ptr, extract(value, i)); \
} \
return ret; \
} \
static inline TYPE atomic_##NAME##_local(uniform TYPE * p, TYPE value) { \
TYPE ret; \
uniform TYPE * uniform ptrs[programCount]; \
ptrs[programIndex] = p; \
uniform int mask = lanemask(); \
for (uniform int i = 0; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
ret = insert(ret, i, *ptrs[i]); \
*ptrs[i] = OPFUNC(*ptrs[i], extract(value, i)); \
} \
return ret; \
}
static inline uniform int32 __add(uniform int32 a, uniform int32 b) { return a+b; }
static inline uniform int32 __sub(uniform int32 a, uniform int32 b) { return a-b; }
static inline uniform int32 __and(uniform int32 a, uniform int32 b) { return a & b; }
static inline uniform int32 __or(uniform int32 a, uniform int32 b) { return a | b; }
static inline uniform int32 __xor(uniform int32 a, uniform int32 b) { return a ^ b; }
static inline uniform int32 __swap(uniform int32 a, uniform int32 b) { return b; }
static inline uniform unsigned int32 __add(uniform unsigned int32 a,
uniform unsigned int32 b) { return a+b; }
static inline uniform unsigned int32 __sub(uniform unsigned int32 a,
uniform unsigned int32 b) { return a-b; }
static inline uniform unsigned int32 __and(uniform unsigned int32 a,
uniform unsigned int32 b) { return a & b; }
static inline uniform unsigned int32 __or(uniform unsigned int32 a,
uniform unsigned int32 b) { return a | b; }
static inline uniform unsigned int32 __xor(uniform unsigned int32 a,
uniform unsigned int32 b) { return a ^ b; }
static inline uniform unsigned int32 __swap(uniform unsigned int32 a,
uniform unsigned int32 b) { return b; }
static inline uniform float __add(uniform float a, uniform float b) { return a+b; }
static inline uniform float __sub(uniform float a, uniform float b) { return a-b; }
static inline uniform float __swap(uniform float a, uniform float b) { return b; }
static inline uniform int64 __add(uniform int64 a, uniform int64 b) { return a+b; }
static inline uniform int64 __sub(uniform int64 a, uniform int64 b) { return a-b; }
static inline uniform int64 __and(uniform int64 a, uniform int64 b) { return a & b; }
static inline uniform int64 __or(uniform int64 a, uniform int64 b) { return a | b; }
static inline uniform int64 __xor(uniform int64 a, uniform int64 b) { return a ^ b; }
static inline uniform int64 __swap(uniform int64 a, uniform int64 b) { return b; }
static inline uniform unsigned int64 __add(uniform unsigned int64 a,
uniform unsigned int64 b) { return a+b; }
static inline uniform unsigned int64 __sub(uniform unsigned int64 a,
uniform unsigned int64 b) { return a-b; }
static inline uniform unsigned int64 __and(uniform unsigned int64 a,
uniform unsigned int64 b) { return a & b; }
static inline uniform unsigned int64 __or(uniform unsigned int64 a,
uniform unsigned int64 b) { return a | b; }
static inline uniform unsigned int64 __xor(uniform unsigned int64 a,
uniform unsigned int64 b) { return a ^ b; }
static inline uniform unsigned int64 __swap(uniform unsigned int64 a,
uniform unsigned int64 b) { return b; }
static inline uniform double __add(uniform double a, uniform double b) { return a+b; }
static inline uniform double __sub(uniform double a, uniform double b) { return a-b; }
static inline uniform double __swap(uniform double a, uniform double b) { return a-b; }
LOCAL_ATOMIC(int32, add, __add)
LOCAL_ATOMIC(int32, subtract, __sub)
LOCAL_ATOMIC(int32, and, __and)
LOCAL_ATOMIC(int32, or, __or)
LOCAL_ATOMIC(int32, xor, __xor)
LOCAL_ATOMIC(int32, min, min)
LOCAL_ATOMIC(int32, max, max)
LOCAL_ATOMIC(int32, swap, __swap)
LOCAL_ATOMIC(unsigned int32, add, __add)
LOCAL_ATOMIC(unsigned int32, subtract, __sub)
LOCAL_ATOMIC(unsigned int32, and, __and)
LOCAL_ATOMIC(unsigned int32, or, __or)
LOCAL_ATOMIC(unsigned int32, xor, __xor)
LOCAL_ATOMIC(unsigned int32, min, min)
LOCAL_ATOMIC(unsigned int32, max, max)
LOCAL_ATOMIC(unsigned int32, swap, __swap)
LOCAL_ATOMIC(float, add, __add)
LOCAL_ATOMIC(float, subtract, __sub)
LOCAL_ATOMIC(float, min, min)
LOCAL_ATOMIC(float, max, max)
LOCAL_ATOMIC(float, swap, __swap)
LOCAL_ATOMIC(int64, add, __add)
LOCAL_ATOMIC(int64, subtract, __sub)
LOCAL_ATOMIC(int64, and, __and)
LOCAL_ATOMIC(int64, or, __or)
LOCAL_ATOMIC(int64, xor, __xor)
LOCAL_ATOMIC(int64, min, min)
LOCAL_ATOMIC(int64, max, max)
LOCAL_ATOMIC(int64, swap, __swap)
LOCAL_ATOMIC(unsigned int64, add, __add)
LOCAL_ATOMIC(unsigned int64, subtract, __sub)
LOCAL_ATOMIC(unsigned int64, and, __and)
LOCAL_ATOMIC(unsigned int64, or, __or)
LOCAL_ATOMIC(unsigned int64, xor, __xor)
LOCAL_ATOMIC(unsigned int64, min, min)
LOCAL_ATOMIC(unsigned int64, max, max)
LOCAL_ATOMIC(unsigned int64, swap, __swap)
LOCAL_ATOMIC(double, add, __add)
LOCAL_ATOMIC(double, subtract, __sub)
LOCAL_ATOMIC(double, min, min)
LOCAL_ATOMIC(double, max, max)
LOCAL_ATOMIC(double, swap, __swap)
// compare exchange
#define LOCAL_CMPXCHG(TYPE) \
static inline uniform TYPE atomic_compare_exchange_local(uniform TYPE * uniform ptr, \
uniform TYPE cmp, \
uniform TYPE update) { \
uniform TYPE old = *ptr; \
if (old == cmp) \
*ptr = update; \
return old; \
} \
static inline TYPE atomic_compare_exchange_local(uniform TYPE * uniform ptr, \
TYPE cmp, TYPE update) { \
TYPE ret; \
uniform int mask = lanemask(); \
for (uniform int i = 0; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
uniform TYPE old = *ptr; \
if (old == extract(cmp, i)) \
*ptr = extract(update, i); \
ret = insert(ret, i, old); \
} \
return ret; \
} \
static inline TYPE atomic_compare_exchange_local(uniform TYPE * varying p, \
TYPE cmp, TYPE update) { \
uniform TYPE * uniform ptrs[programCount]; \
ptrs[programIndex] = p; \
TYPE ret; \
uniform int mask = lanemask(); \
for (uniform int i = 0; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
continue; \
uniform TYPE old = *ptrs[i]; \
if (old == extract(cmp, i)) \
*ptrs[i] = extract(update, i); \
ret = insert(ret, i, old); \
} \
return ret; \
}
LOCAL_CMPXCHG(int32)
LOCAL_CMPXCHG(unsigned int32)
LOCAL_CMPXCHG(float)
LOCAL_CMPXCHG(int64)
LOCAL_CMPXCHG(unsigned int64)
LOCAL_CMPXCHG(double)
#undef LOCAL_ATOMIC
#undef LOCAL_CMPXCHG
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// Transcendentals (float precision) // Transcendentals (float precision)
@@ -3246,14 +3448,23 @@ static inline uniform unsigned int __seed4(RNGState * uniform state,
} }
static inline void seed_rng(uniform RNGState * uniform state, uniform unsigned int seed) { static inline void seed_rng(uniform RNGState * uniform state, uniform unsigned int seed) {
seed = __seed4(state, 0, seed); if (programCount == 1) {
if (programCount == 8) state->z1 = seed;
__seed4(state, 4, seed ^ 0xbeeff00d); state->z2 = seed ^ 0xbeeff00d;
if (programCount == 16) { state->z3 = ((seed & 0xffff) << 16) | (seed >> 16);
__seed4(state, 4, seed ^ 0xbeeff00d); state->z4 = (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
__seed4(state, 8, ((seed & 0xffff) << 16) | (seed >> 16)); ((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24);
__seed4(state, 12, (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) | }
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24)); else {
seed = __seed4(state, 0, seed);
if (programCount == 8)
__seed4(state, 4, seed ^ 0xbeeff00d);
if (programCount == 16) {
__seed4(state, 4, seed ^ 0xbeeff00d);
__seed4(state, 8, ((seed & 0xffff) << 16) | (seed >> 16));
__seed4(state, 12, (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24));
}
} }
} }

341
stmt.cpp
View File

@@ -119,153 +119,6 @@ DeclStmt::DeclStmt(const std::vector<VariableDeclaration> &v, SourcePos p)
} }
static bool
lPossiblyResolveFunctionOverloads(Expr *expr, const Type *type) {
FunctionSymbolExpr *fse = NULL;
const FunctionType *funcType = NULL;
if (dynamic_cast<const PointerType *>(type) != NULL &&
(funcType = dynamic_cast<const FunctionType *>(type->GetBaseType())) &&
(fse = dynamic_cast<FunctionSymbolExpr *>(expr)) != NULL) {
// We're initializing a function pointer with a function symbol,
// which in turn may represent an overloaded function. So we need
// to try to resolve the overload based on the type of the symbol
// we're initializing here.
std::vector<const Type *> paramTypes;
for (int i = 0; i < funcType->GetNumParameters(); ++i)
paramTypes.push_back(funcType->GetParameterType(i));
if (fse->ResolveOverloads(expr->pos, paramTypes) == false)
return false;
}
return true;
}
/** Utility routine that emits code to initialize a symbol given an
initializer expression.
@param lvalue Memory location of storage for the symbol's data
@param symName Name of symbol (used in error messages)
@param symType Type of variable being initialized
@param initExpr Expression for the initializer
@param ctx FunctionEmitContext to use for generating instructions
@param pos Source file position of the variable being initialized
*/
static void
lInitSymbol(llvm::Value *lvalue, const char *symName, const Type *symType,
Expr *initExpr, FunctionEmitContext *ctx, SourcePos pos) {
if (initExpr == NULL)
// leave it uninitialized
return;
// If the initializer is a straight up expression that isn't an
// ExprList, then we'll see if we can type convert it to the type of
// the variable.
if (dynamic_cast<ExprList *>(initExpr) == NULL) {
if (lPossiblyResolveFunctionOverloads(initExpr, symType) == false)
return;
initExpr = TypeConvertExpr(initExpr, symType, "initializer");
if (initExpr != NULL) {
llvm::Value *initializerValue = initExpr->GetValue(ctx);
if (initializerValue != NULL)
// Bingo; store the value in the variable's storage
ctx->StoreInst(initializerValue, lvalue);
return;
}
}
// Atomic types and enums can't be initialized with { ... } initializer
// expressions, so print an error and return if that's what we've got
// here..
if (dynamic_cast<const AtomicType *>(symType) != NULL ||
dynamic_cast<const EnumType *>(symType) != NULL ||
dynamic_cast<const PointerType *>(symType) != NULL) {
ExprList *elist = dynamic_cast<ExprList *>(initExpr);
if (elist != NULL) {
if (elist->exprs.size() == 1)
lInitSymbol(lvalue, symName, symType, elist->exprs[0], ctx,
pos);
else
Error(initExpr->pos, "Expression list initializers can't be used for "
"variable \"%s\' with type \"%s\".", symName,
symType->GetString().c_str());
}
return;
}
const ReferenceType *rt = dynamic_cast<const ReferenceType *>(symType);
if (rt) {
if (!Type::Equal(initExpr->GetType(), rt)) {
Error(initExpr->pos, "Initializer for reference type \"%s\" must have same "
"reference type itself. \"%s\" is incompatible.",
rt->GetString().c_str(), initExpr->GetType()->GetString().c_str());
return;
}
llvm::Value *initializerValue = initExpr->GetValue(ctx);
if (initializerValue)
ctx->StoreInst(initializerValue, lvalue);
return;
}
// There are two cases for initializing structs, arrays and vectors;
// either a single initializer may be provided (float foo[3] = 0;), in
// which case all of the elements are initialized to the given value,
// or an initializer list may be provided (float foo[3] = { 1,2,3 }),
// in which case the elements are initialized with the corresponding
// values.
const CollectionType *collectionType =
dynamic_cast<const CollectionType *>(symType);
if (collectionType != NULL) {
std::string name;
if (dynamic_cast<const StructType *>(symType) != NULL)
name = "struct";
else if (dynamic_cast<const ArrayType *>(symType) != NULL)
name = "array";
else if (dynamic_cast<const VectorType *>(symType) != NULL)
name = "vector";
else
FATAL("Unexpected CollectionType in lInitSymbol()");
ExprList *exprList = dynamic_cast<ExprList *>(initExpr);
if (exprList != NULL) {
// The { ... } case; make sure we have the same number of
// expressions in the ExprList as we have struct members
int nInits = exprList->exprs.size();
if (nInits != collectionType->GetElementCount()) {
Error(initExpr->pos, "Initializer for %s \"%s\" requires "
"%d values; %d provided.", name.c_str(), symName,
collectionType->GetElementCount(), nInits);
return;
}
// Initialize each element with the corresponding value from
// the ExprList
for (int i = 0; i < nInits; ++i) {
llvm::Value *ep;
if (dynamic_cast<const StructType *>(symType) != NULL)
ep = ctx->AddElementOffset(lvalue, i, NULL, "element");
else
ep = ctx->GetElementPtrInst(lvalue, LLVMInt32(0), LLVMInt32(i),
PointerType::GetUniform(collectionType->GetElementType(i)),
"gep");
lInitSymbol(ep, symName, collectionType->GetElementType(i),
exprList->exprs[i], ctx, pos);
}
}
else
Error(initExpr->pos, "Can't assign type \"%s\" to \"%s\".",
initExpr->GetType()->GetString().c_str(),
collectionType->GetString().c_str());
return;
}
FATAL("Unexpected Type in lInitSymbol()");
}
static bool static bool
lHasUnsizedArrays(const Type *type) { lHasUnsizedArrays(const Type *type) {
const ArrayType *at = dynamic_cast<const ArrayType *>(type); const ArrayType *at = dynamic_cast<const ArrayType *>(type);
@@ -333,7 +186,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
// zero value. // zero value.
llvm::Constant *cinit = NULL; llvm::Constant *cinit = NULL;
if (initExpr != NULL) { if (initExpr != NULL) {
if (lPossiblyResolveFunctionOverloads(initExpr, sym->type) == false) if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false)
continue; continue;
// FIXME: we only need this for function pointers; it was // FIXME: we only need this for function pointers; it was
// already done for atomic types and enums in // already done for atomic types and enums in
@@ -377,8 +230,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
// And then get it initialized... // And then get it initialized...
sym->parentFunction = ctx->GetFunction(); sym->parentFunction = ctx->GetFunction();
lInitSymbol(sym->storagePtr, sym->name.c_str(), sym->type, InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
initExpr, ctx, sym->pos);
} }
} }
} }
@@ -575,7 +427,7 @@ IfStmt::TypeCheck() {
int int
IfStmt::EstimateCost() const { IfStmt::EstimateCost() const {
const Type *type; const Type *type;
if (test == NULL || (type = test->GetType()) != NULL) if (test == NULL || (type = test->GetType()) == NULL)
return 0; return 0;
return type->IsUniformType() ? COST_UNIFORM_IF : COST_VARYING_IF; return type->IsUniformType() ? COST_UNIFORM_IF : COST_VARYING_IF;
@@ -621,103 +473,6 @@ IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
} }
/** Given an AST node, check to see if it's safe if we happen to run the
code for that node with the execution mask all off.
*/
static bool
lCheckAllOffSafety(ASTNode *node, void *data) {
bool *okPtr = (bool *)data;
if (dynamic_cast<FunctionCallExpr *>(node) != NULL) {
// FIXME: If we could somehow determine that the function being
// called was safe (and all of the args Exprs were safe, then it'd
// be nice to be able to return true here. (Consider a call to
// e.g. floatbits() in the stdlib.) Unfortunately for now we just
// have to be conservative.
*okPtr = false;
return false;
}
if (dynamic_cast<AssertStmt *>(node) != NULL) {
// While it's fine to run the assert for varying tests, it's not
// desirable to check an assert on a uniform variable if all of the
// lanes are off.
*okPtr = false;
return false;
}
if (g->target.allOffMaskIsSafe == true)
// Don't worry about memory accesses if we have a target that can
// safely run them with the mask all off
return true;
IndexExpr *ie;
if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL && ie->baseExpr != NULL) {
const Type *type = ie->baseExpr->GetType();
if (type == NULL)
return true;
if (dynamic_cast<const ReferenceType *>(type) != NULL)
type = type->GetReferenceTarget();
ConstExpr *ce = dynamic_cast<ConstExpr *>(ie->index);
if (ce == NULL) {
// indexing with a variable... -> not safe
*okPtr = false;
return false;
}
const PointerType *pointerType =
dynamic_cast<const PointerType *>(type);
if (pointerType != NULL) {
// pointer[index] -> can't be sure -> not safe
*okPtr = false;
return false;
}
const SequentialType *seqType =
dynamic_cast<const SequentialType *>(type);
Assert(seqType != NULL);
int nElements = seqType->GetElementCount();
if (nElements == 0) {
// Unsized array, so we can't be sure -> not safe
*okPtr = false;
return false;
}
int32_t indices[ISPC_MAX_NVEC];
int count = ce->AsInt32(indices);
for (int i = 0; i < count; ++i) {
if (indices[i] < 0 || indices[i] >= nElements) {
// Index is out of bounds -> not safe
*okPtr = false;
return false;
}
}
// All indices are in-bounds
return true;
}
MemberExpr *me;
if ((me = dynamic_cast<MemberExpr *>(node)) != NULL &&
me->dereferenceExpr) {
*okPtr = false;
return false;
}
DereferenceExpr *de;
if ((de = dynamic_cast<DereferenceExpr *>(node)) != NULL) {
const Type *exprType = de->expr->GetType();
if (dynamic_cast<const PointerType *>(exprType) != NULL) {
*okPtr = false;
return false;
}
}
return true;
}
/** Emit code for an if test that checks the mask and the test values and /** Emit code for an if test that checks the mask and the test values and
tries to be smart about jumping over code that doesn't need to be run. tries to be smart about jumping over code that doesn't need to be run.
*/ */
@@ -771,7 +526,7 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
// //
// Where the overhead of checking if any of the program instances wants // Where the overhead of checking if any of the program instances wants
// to run one side or the other is more than the actual computation. // to run one side or the other is more than the actual computation.
// The lSafeToRunWithAllLanesOff() checks to make sure that we don't do this // SafeToRunWithMaskAllOff() checks to make sure that we don't do this
// for potentially dangerous code like: // for potentially dangerous code like:
// //
// if (index < count) array[index] = 0; // if (index < count) array[index] = 0;
@@ -783,9 +538,8 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
bool costIsAcceptable = (trueFalseCost < bool costIsAcceptable = (trueFalseCost <
PREDICATE_SAFE_IF_STATEMENT_COST); PREDICATE_SAFE_IF_STATEMENT_COST);
bool safeToRunWithAllLanesOff = true; bool safeToRunWithAllLanesOff = (SafeToRunWithMaskAllOff(trueStmts) &&
WalkAST(trueStmts, lCheckAllOffSafety, NULL, &safeToRunWithAllLanesOff); SafeToRunWithMaskAllOff(falseStmts));
WalkAST(falseStmts, lCheckAllOffSafety, NULL, &safeToRunWithAllLanesOff);
if (safeToRunWithAllLanesOff && if (safeToRunWithAllLanesOff &&
(costIsAcceptable || g->opt.disableCoherentControlFlow)) { (costIsAcceptable || g->opt.disableCoherentControlFlow)) {
@@ -2123,9 +1877,7 @@ lCheckMask(Stmt *stmts) {
return false; return false;
int cost = EstimateCost(stmts); int cost = EstimateCost(stmts);
bool safeToRunWithAllLanesOff = SafeToRunWithMaskAllOff(stmts);
bool safeToRunWithAllLanesOff = true;
WalkAST(stmts, lCheckAllOffSafety, NULL, &safeToRunWithAllLanesOff);
// The mask should be checked if the code following the // The mask should be checked if the code following the
// 'case'/'default' is relatively complex, or if it would be unsafe to // 'case'/'default' is relatively complex, or if it would be unsafe to
@@ -2880,3 +2632,82 @@ AssertStmt::EstimateCost() const {
return COST_ASSERT; return COST_ASSERT;
} }
///////////////////////////////////////////////////////////////////////////
// DeleteStmt
DeleteStmt::DeleteStmt(Expr *e, SourcePos p)
: Stmt(p) {
expr = e;
}
void
DeleteStmt::EmitCode(FunctionEmitContext *ctx) const {
const Type *exprType;
if (expr == NULL || ((exprType = expr->GetType()) == NULL)) {
Assert(m->errorCount > 0);
return;
}
llvm::Value *exprValue = expr->GetValue(ctx);
if (exprValue == NULL) {
Assert(m->errorCount > 0);
return;
}
// Typechecking should catch this
Assert(dynamic_cast<const PointerType *>(exprType) != NULL);
if (exprType->IsUniformType()) {
// For deletion of a uniform pointer, we just need to cast the
// pointer type to a void pointer type, to match what
// __delete_uniform() from the builtins expects.
exprValue = ctx->BitCastInst(exprValue, LLVMTypes::VoidPointerType,
"ptr_to_void");
llvm::Function *func = m->module->getFunction("__delete_uniform");
Assert(func != NULL);
ctx->CallInst(func, NULL, exprValue, "");
}
else {
// Varying pointers are arrays of ints, and __delete_varying()
// takes a vector of i64s (even for 32-bit targets). Therefore, we
// only need to extend to 64-bit values on 32-bit targets before
// calling it.
llvm::Function *func = m->module->getFunction("__delete_varying");
Assert(func != NULL);
if (g->target.is32Bit)
exprValue = ctx->ZExtInst(exprValue, LLVMTypes::Int64VectorType,
"ptr_to_64");
ctx->CallInst(func, NULL, exprValue, "");
}
}
void
DeleteStmt::Print(int indent) const {
printf("%*cDelete Stmt", indent, ' ');
}
Stmt *
DeleteStmt::TypeCheck() {
const Type *exprType;
if (expr == NULL || ((exprType = expr->GetType()) == NULL))
return NULL;
if (dynamic_cast<const PointerType *>(exprType) == NULL) {
Error(pos, "Illegal to delete non-pointer type \"%s\".",
exprType->GetString().c_str());
return NULL;
}
return this;
}
int
DeleteStmt::EstimateCost() const {
return COST_DELETE;
}

17
stmt.h
View File

@@ -442,4 +442,21 @@ public:
Expr *expr; Expr *expr;
}; };
/** Representation of a delete statement in the program.
*/
class DeleteStmt : public Stmt {
public:
DeleteStmt(Expr *e, SourcePos p);
void EmitCode(FunctionEmitContext *ctx) const;
void Print(int indent) const;
Stmt *TypeCheck();
int EstimateCost() const;
/** Expression that gives the pointer value to be deleted. */
Expr *expr;
};
#endif // ISPC_STMT_H #endif // ISPC_STMT_H

39
sym.cpp
View File

@@ -354,3 +354,42 @@ SymbolTable::Print() {
depth += 4; depth += 4;
} }
} }
inline int ispcRand() {
#ifdef ISPC_IS_WINDOWS
return rand();
#else
return lrand48();
#endif
}
Symbol *
SymbolTable::RandomSymbol() {
int v = ispcRand() % variables.size();
if (variables[v]->size() == 0)
return NULL;
int count = ispcRand() % variables[v]->size();
SymbolMapType::iterator iter = variables[v]->begin();
while (count-- > 0) {
++iter;
Assert(iter != variables[v]->end());
}
return iter->second;
}
const Type *
SymbolTable::RandomType() {
int v = ispcRand() % types.size();
if (types[v]->size() == 0)
return NULL;
int count = ispcRand() % types[v]->size();
TypeMapType::iterator iter = types[v]->begin();
while (count-- > 0) {
++iter;
Assert(iter != types[v]->end());
}
return iter->second;
}

7
sym.h
View File

@@ -244,6 +244,13 @@ public:
(Debugging method). */ (Debugging method). */
void Print(); void Print();
/** Returns a random symbol from the symbol table. (It is not
guaranteed that it is equally likely to return all symbols). */
Symbol *RandomSymbol();
/** Returns a random type from the symbol table. */
const Type *RandomType();
private: private:
std::vector<std::string> closestTypeMatch(const char *str, std::vector<std::string> closestTypeMatch(const char *str,
bool structsVsEnums) const; bool structsVsEnums) const;

View File

@@ -15,7 +15,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
if (programCount == 4) if (programCount == 1)
RET[programIndex] = 1;
else if (programCount == 4)
RET[programIndex] = 5.; RET[programIndex] = 5.;
else else
RET[programIndex] = 10.; RET[programIndex] = 10.;

View File

@@ -3,13 +3,13 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float a[programCount]; uniform float a[programCount+4];
for (unsigned int i = 0; i < programCount; ++i) for (unsigned int i = 0; i < programCount+4; ++i)
a[i] = aFOO[i]; a[i] = aFOO[min((int)i, programCount)];
RET[programIndex] = *(a + 2); RET[programIndex] = *(a + 2);
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
RET[programIndex] = 3; RET[programIndex] = (programCount == 1) ? 2 : 3;
} }

View File

@@ -14,4 +14,4 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
} }
export void result(uniform float RET[]) { RET[programIndex] = 5; } export void result(uniform float RET[]) { RET[programIndex] = programCount == 1 ? 0 : 5; }

View File

@@ -14,7 +14,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
if (programCount == 4) if (programCount == 1)
RET[programIndex] = 0;
else if (programCount == 4)
RET[programIndex] = 2; RET[programIndex] = 2;
else else
RET[programIndex] = 4; RET[programIndex] = 4;

View File

@@ -13,5 +13,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
RET[programIndex] = 2; RET[programIndex] = programCount == 1 ? 1 : 2;
} }

View File

@@ -12,5 +12,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
RET[programIndex] = (programCount/2) - 1; RET[programIndex] = programCount == 1 ? 0 : ((programCount/2) - 1);
} }

View File

@@ -5,11 +5,11 @@ uniform int32 s = 0xff;
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
int32 bits = 0xfffffff0; int32 bits = 0xfff0;
float b = atomic_xor_global(&s, bits); float b = atomic_xor_global(&s, bits);
RET[programIndex] = s; RET[programIndex] = s;
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
RET[programIndex] = 0xff; RET[programIndex] = (programCount & 1) ? 0xff0f : 0xff;
} }

View File

@@ -10,5 +10,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
RET[programIndex] = 3000; RET[programIndex] = (programCount == 1) ? 2 : 3000;
} }

View File

@@ -12,5 +12,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
RET[programIndex] = programCount; RET[programIndex] = (programCount == 1) ? 0 : programCount;
} }

View File

@@ -13,5 +13,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
RET[programIndex] = 1; RET[programIndex] = (programCount == 1) ? 0 : 1;
} }

View File

@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int a = aFOO[programIndex]; int a = aFOO[programIndex];
int br = broadcast(a, (uniform int)b-2); int br = (programCount == 1) ? 4 : broadcast(a, (uniform int)b-2);
RET[programIndex] = br; RET[programIndex] = br;
} }

View File

@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
int16 a = aFOO[programIndex]; int16 a = aFOO[programIndex];
int16 b = broadcast(a, 2); int16 b = (programCount == 1) ? 3 : broadcast(a, 2);
RET[programIndex] = b; RET[programIndex] = b;
} }

View File

@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
int8 a = aFOO[programIndex]; int8 a = aFOO[programIndex];
int8 br = broadcast(a, (uniform int)b-2); int8 br = (programCount == 1) ? 4 : broadcast(a, (uniform int)b-2);
RET[programIndex] = br; RET[programIndex] = br;
} }

View File

@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex]; float a = aFOO[programIndex];
float b = broadcast(a, 2); float b = (programCount == 1) ? 3 : broadcast(a, 2);
RET[programIndex] = b; RET[programIndex] = b;
} }

View File

@@ -0,0 +1,10 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
RET[programIndex] = (programIndex >= 0) ? 1 : 0;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}

View File

@@ -0,0 +1,10 @@
export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
RET[programIndex] = (programCount < 10000) ? 1 : 0;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}

View File

@@ -3,9 +3,9 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
double a = programIndex; double a = programIndex;
RET[programIndex] = extract(a, 3); RET[programIndex] = extract(a, min(programCount-1, 3));
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
RET[programIndex] = 3; RET[programIndex] = (programCount == 1) ? 0 : 3;
} }

View File

@@ -0,0 +1,30 @@
export uniform int width() { return programCount; }
uniform double one = 1;
void copy(uniform double dst[], uniform double src[], uniform int count) {
foreach (i = 0 ... count)
dst[i] = one * src[i];
}
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform int count = 200 + aFOO[1];
uniform double * uniform src = uniform new uniform double[count];
for (uniform int i = 0; i < count; ++i)
src[i] = i;
uniform double * uniform dst = uniform new uniform double[count];
copy(dst, src, count);
uniform int errors = 0;
for (uniform int i = 0; i < count; ++i)
if (dst[i] != src[i])
++errors;
RET[programIndex] = errors;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
}

View File

@@ -0,0 +1,15 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float delta = 1;
float b = atomic_add_local(&s, delta);
RET[programIndex] = reduce_add(b);
}
export void result(uniform float RET[]) {
RET[programIndex] = reduce_add(programIndex);
}

View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = 0;
float delta = 1;
if (programIndex < 2)
b = atomic_add_local(&s, delta);
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
RET[programIndex] = programCount == 1 ? 1 : 2;
}

View File

@@ -0,0 +1,20 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = 0;
if (programIndex & 1)
b = atomic_add_local(&s, programIndex);
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
uniform int sum = 0;
for (uniform int i = 0; i < programCount; ++i)
if (i & 1)
sum += i;
RET[programIndex] = sum;
}

View File

@@ -0,0 +1,20 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = 0;
if (programIndex & 1)
b = atomic_or_local(&s, (1 << programIndex));
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
uniform int sum = 0;
for (uniform int i = 0; i < programCount; ++i)
if (i & 1)
sum += (1 << i);
RET[programIndex] = sum;
}

View File

@@ -0,0 +1,16 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = 0;
if (programIndex & 1)
b = atomic_or_local(&s, (1 << programIndex));
RET[programIndex] = popcnt(reduce_max((int32)b));
}
export void result(uniform float RET[]) {
RET[programIndex] = programCount == 1 ? 0 : ((programCount/2) - 1);
}

View File

@@ -0,0 +1,20 @@
export uniform int width() { return programCount; }
uniform unsigned int64 s = 0xffffffffff000000;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = 0;
if (programIndex & 1)
b = atomic_or_local(&s, (1 << programIndex));
RET[programIndex] = (s>>20);
}
export void result(uniform float RET[]) {
uniform int sum = 0;
for (uniform int i = 0; i < programCount; ++i)
if (i & 1)
sum += (1 << i);
RET[programIndex] = ((unsigned int64)(0xffffffffff000000 | sum)) >> 20;
}

View File

@@ -0,0 +1,15 @@
export uniform int width() { return programCount; }
uniform int64 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float delta = 1;
float b = atomic_add_local(&s, delta);
RET[programIndex] = reduce_add(b);
}
export void result(uniform float RET[]) {
RET[programIndex] = reduce_add(programIndex);
}

View File

@@ -0,0 +1,15 @@
export uniform int width() { return programCount; }
uniform int32 s = 0xff;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
int32 bits = 0xfff0;
float b = atomic_xor_local(&s, bits);
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
RET[programIndex] = (programCount & 1) ? 0xff0f : 0xff;
}

View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
uniform int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = atomic_or_local(&s, (1<<programIndex));
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
RET[programIndex] = (1<<programCount)-1;
}

View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
uniform int32 s = 0xbeef;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = atomic_swap_local(&s, programIndex);
RET[programIndex] = reduce_max(b);
}
export void result(uniform float RET[]) {
RET[programIndex] = 0xbeef;
}

View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
uniform int32 s = 2;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = atomic_compare_exchange_local(&s, programIndex, a*1000);
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
RET[programIndex] = (programCount == 1) ? 2 : 3000;
}

View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
uniform int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) {
int32 a = aFOO[programIndex];
float b = atomic_min_local(&s, a);
RET[programIndex] = reduce_min(b);
}
export void result(uniform float RET[]) {
RET[programIndex] = reduce_min(programIndex);
}

View File

@@ -0,0 +1,16 @@
export uniform int width() { return programCount; }
uniform int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) {
int32 a = aFOO[programIndex];
int32 b = 0;
if (programIndex & 1)
b = atomic_max_local(&s, a);
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
RET[programIndex] = (programCount == 1) ? 0 : programCount;
}

View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = 0;
int32 delta = 1;
if (programIndex < 2)
b = atomic_add_local(&s, delta);
RET[programIndex] = reduce_add(b);
}
export void result(uniform float RET[]) {
RET[programIndex] = (programCount == 1) ? 0 : 1;
}

View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
uniform int32 s = 1234;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = 0;
if (programIndex & 1) {
b = atomic_swap_local(&s, programIndex);
}
RET[programIndex] = reduce_add(b) + s;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1234 + reduce_add(programIndex & 1 ? programIndex : 0);
}

View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s = 10;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform unsigned int32 b = atomic_add_local(&s, 1);
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
RET[programIndex] = 11;
}

View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s = 0b1010;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform unsigned int32 b = atomic_or_local(&s, 1);
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0b1011;
}

View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s = 0b1010;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform unsigned int32 b = atomic_or_local(&s, 1);
RET[programIndex] = b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0b1010;
}

View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s = 0xffff;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform unsigned int32 b = atomic_min_local(&s, 1);
RET[programIndex] = b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 0xffff;
}

View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s = 0xffff;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform unsigned int32 b = atomic_min_local(&s, 1);
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1;
}

View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
uniform float s = 100.;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform float b = atomic_swap_local(&s, 1.);
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1.;
}

View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
uniform float s = 100.;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform float b = atomic_swap_local(&s, 1.);
RET[programIndex] = b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 100.;
}

View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
uniform float s = 100.;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform float b = atomic_compare_exchange_local(&s, 1., -100.);
RET[programIndex] = b;
}
export void result(uniform float RET[]) {
RET[programIndex] = 100.;
}

View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
uniform int64 s = 100.;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
uniform int64 b = atomic_compare_exchange_local(&s, 100, -100);
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
RET[programIndex] = -100.;
}

View File

@@ -0,0 +1,18 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s[programCount];
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = 0;
float delta = 1;
if (programIndex < 2)
atomic_add_local(&s[programIndex], delta);
RET[programIndex] = s[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = 0;
RET[0] = RET[1] = 1;
}

View File

@@ -0,0 +1,16 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s[programCount];
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = 0;
float delta = 1;
atomic_add_local(&s[programCount-1-programIndex], programIndex);
RET[programIndex] = s[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = programCount-1-programIndex;
}

View File

@@ -0,0 +1,18 @@
export uniform int width() { return programCount; }
uniform unsigned int32 s[programCount];
export void f_f(uniform float RET[], uniform float aFOO[]) {
for (uniform int i = 0; i < programCount; ++i)
s[i] = 1234;
float a = aFOO[programIndex];
float b = 0;
float delta = 1;
a = atomic_max_local(&s[programIndex], programIndex);
RET[programIndex] = a;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1234;
}

View File

@@ -0,0 +1,15 @@
export uniform int width() { return programCount; }
uniform int32 s[programCount];
export void f_f(uniform float RET[], uniform float aFOO[]) {
for (uniform int i = 0; i < programCount; ++i)
s[i] = -1234;
atomic_max_local(&s[programIndex], programIndex);
RET[programIndex] = s[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = programIndex;
}

15
tests/new-delete-1.ispc Normal file
View File

@@ -0,0 +1,15 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
float * uniform buf = uniform new float[programCount];
for (uniform int i = 0; i < programCount; ++i)
buf[i] = i;
RET[programIndex] = buf[a-1];
delete buf;
}
export void result(uniform float RET[]) {
RET[programIndex] = programIndex;
}

15
tests/new-delete-2.ispc Normal file
View File

@@ -0,0 +1,15 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
uniform float * uniform buf = uniform new uniform float[programCount];
for (uniform int i = 0; i < programCount; ++i)
buf[i] = i;
RET[programIndex] = buf[a-1];
delete buf;
}
export void result(uniform float RET[]) {
RET[programIndex] = programIndex;
}

17
tests/new-delete-3.ispc Normal file
View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
float * uniform buf = uniform new float[programCount+1];
for (uniform int i = 0; i < programCount+1; ++i) {
buf[i] = i+a;
}
RET[programIndex] = buf[a];
delete buf;
}
export void result(uniform float RET[]) {
RET[programIndex] = 2 + 2*programIndex;
}

14
tests/new-delete-4.ispc Normal file
View File

@@ -0,0 +1,14 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
float * uniform buf = uniform new float(2*b);
RET[programIndex] = buf[0];
delete[] buf;
}
export void result(uniform float RET[]) {
RET[programIndex] = 10;
}

17
tests/new-delete-5.ispc Normal file
View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
struct Point {
uniform float x, y, z;
};
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
varying Point * uniform buf = uniform new varying Point(a, b, 1234.);
RET[programIndex] = buf->y;
delete buf;
}
export void result(uniform float RET[]) {
RET[programIndex] = 5;
}

17
tests/new-delete-6.ispc Normal file
View File

@@ -0,0 +1,17 @@
export uniform int width() { return programCount; }
struct Point {
float x, y, z;
};
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
Point * varying buf = new Point(0., b, a);
RET[programIndex] = buf->z;
delete buf;
}
export void result(uniform float RET[]) {
RET[programIndex] = 1+programIndex;
}

View File

@@ -12,5 +12,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
RET[programIndex] = 2; RET[programIndex] = (programCount == 1) ? 1 : 2;
} }

View File

@@ -15,8 +15,12 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
for (uniform int i = 0; i < programCount/2; ++i) { if (programCount == 1)
RET[2*i+1] = 10+i; RET[0] = 10;
RET[2*i] = 10+programCount/2+i; else {
for (uniform int i = 0; i < programCount/2; ++i) {
RET[2*i+1] = 10+i;
RET[2*i] = 10+programCount/2+i;
}
} }
} }

View File

@@ -13,5 +13,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
} }
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
RET[programIndex] = programCount/2; RET[programIndex] = (programCount == 1) ? 1 : programCount/2;
} }

View File

@@ -8,4 +8,4 @@ export void f_f(uniform float RET[4], uniform float aFOO[]) {
RET[programIndex] = popcnt(a < 3); RET[programIndex] = popcnt(a < 3);
} }
export void result(uniform float RET[]) { RET[programIndex] = 2; } export void result(uniform float RET[]) { RET[programIndex] = programCount == 1 ? 1 : 2; }

View File

@@ -2,8 +2,9 @@
export uniform int width() { return programCount; } export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) { export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float a[programCount]; uniform float a[programCount+1];
a[programIndex] = aFOO[programIndex]; a[programIndex] = aFOO[programIndex];
a[programCount] = 1;
uniform float * uniform ptr = a; uniform float * uniform ptr = a;
*(ptr+1) = 0; *(ptr+1) = 0;
@@ -12,5 +13,6 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
RET[programIndex] = 1+programIndex; RET[programIndex] = 1+programIndex;
RET[1] = 0; if (programCount > 0)
RET[1] = 0;
} }

View File

@@ -5,7 +5,7 @@ export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) { export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
double v = aFOO[programIndex]; double v = aFOO[programIndex];
uniform float m; uniform float m = 42;
int iv = (int)v; int iv = (int)v;
if (iv & 1) if (iv & 1)
m = reduce_add((double)iv); m = reduce_add((double)iv);
@@ -14,7 +14,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
uniform int x = -1234; uniform int x = -1234;
if (programCount == 4) x = 4; if (programCount == 1) x = 1;
else if (programCount == 4) x = 4;
else if (programCount == 8) x = 16; else if (programCount == 8) x = 16;
else if (programCount == 16) x = 64; else if (programCount == 16) x = 64;
RET[programIndex] = x; RET[programIndex] = x;

View File

@@ -13,7 +13,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
uniform int x = -1234; uniform int x = -1234;
if (programCount == 4) x = 10; if (programCount == 1) x = 1;
else if (programCount == 4) x = 10;
else if (programCount == 8) x = 36; else if (programCount == 8) x = 36;
else if (programCount == 16) x = 136; else if (programCount == 16) x = 136;
RET[programIndex] = x; RET[programIndex] = x;

View File

@@ -11,4 +11,4 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
RET[programIndex] = m; RET[programIndex] = m;
} }
export void result(uniform float RET[]) { RET[programIndex] = -3; } export void result(uniform float RET[]) { RET[programIndex] = (programCount == 1) ? -1 : -3; }

View File

@@ -14,7 +14,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
uniform int x = -1234; uniform int x = -1234;
if (programCount == 4) x = 4; if (programCount == 1) x = 1;
else if (programCount == 4) x = 4;
else if (programCount == 8) x = 16; else if (programCount == 8) x = 16;
else if (programCount == 16) x = 64; else if (programCount == 16) x = 64;
RET[programIndex] = x; RET[programIndex] = x;

View File

@@ -13,7 +13,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
uniform int x = -1234; uniform int x = -1234;
if (programCount == 4) x = 10; if (programCount == 1) x = 1;
else if (programCount == 4) x = 10;
else if (programCount == 8) x = 36; else if (programCount == 8) x = 36;
else if (programCount == 16) x = 136; else if (programCount == 16) x = 136;
RET[programIndex] = x; RET[programIndex] = x;

View File

@@ -11,4 +11,4 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
RET[programIndex] = m; RET[programIndex] = m;
} }
export void result(uniform float RET[]) { RET[programIndex] = -3; } export void result(uniform float RET[]) { RET[programIndex] = (programCount == 1) ? -1 : -3; }

View File

@@ -14,7 +14,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
export void result(uniform float RET[]) { export void result(uniform float RET[]) {
uniform int x = -1234; uniform int x = -1234;
if (programCount == 4) x = 4; if (programCount == 1) x = 1;
else if (programCount == 4) x = 4;
else if (programCount == 8) x = 16; else if (programCount == 8) x = 16;
else if (programCount == 16) x = 64; else if (programCount == 16) x = 64;
RET[programIndex] = x; RET[programIndex] = x;

Some files were not shown because too many files have changed in this diff Show More