From 78a05777bc6a6d41bfb553cdc66cdf16fc4fc507 Mon Sep 17 00:00:00 2001 From: egaburov Date: Tue, 22 Oct 2013 16:18:40 +0200 Subject: [PATCH 01/13] added taskIndex_x,y,z and taskCount_x,y,z --- func.cpp | 39 +++++++++++++++++++++++++++++++++++++++ func.h | 5 ++++- parse.yy | 17 ++++++++++++++++- type.cpp | 6 ++++++ 4 files changed, 65 insertions(+), 2 deletions(-) diff --git a/func.cpp b/func.cpp index b975049b..dea45afc 100644 --- a/func.cpp +++ b/func.cpp @@ -132,9 +132,28 @@ Function::Function(Symbol *s, Stmt *c) { Assert(taskIndexSym); taskCountSym = m->symbolTable->LookupVariable("taskCount"); Assert(taskCountSym); + + taskIndexSym_x = m->symbolTable->LookupVariable("taskIndex_x"); + Assert(taskIndexSym_x); + taskIndexSym_y = m->symbolTable->LookupVariable("taskIndex_y"); + Assert(taskIndexSym_y); + taskIndexSym_z = m->symbolTable->LookupVariable("taskIndex_z"); + Assert(taskIndexSym_z); + + + taskCountSym_x = m->symbolTable->LookupVariable("taskCount_x"); + Assert(taskCountSym_x); + taskCountSym_y = m->symbolTable->LookupVariable("taskCount_y"); + Assert(taskCountSym_y); + taskCountSym_z = m->symbolTable->LookupVariable("taskCount_z"); + Assert(taskCountSym_z); } else + { threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL; + taskIndexSym_x = taskIndexSym_y = taskIndexSym_z = NULL; + taskCountSym_x = taskCountSym_y = taskCountSym_z = NULL; + } } @@ -225,6 +244,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, llvm::Value *threadCount = argIter++; llvm::Value *taskIndex = argIter++; llvm::Value *taskCount = argIter++; + llvm::Value *taskIndex_x = argIter++; + llvm::Value *taskIndex_y = argIter++; + llvm::Value *taskIndex_z = argIter++; + llvm::Value *taskCount_x = argIter++; + llvm::Value *taskCount_y = argIter++; + llvm::Value *taskCount_z = argIter++; // Copy the function parameter values from the structure into local // storage @@ -256,6 +281,20 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount"); ctx->StoreInst(taskCount, taskCountSym->storagePtr); + + taskIndexSym_x->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex_x"); + ctx->StoreInst(taskIndex_x, taskIndexSym_x->storagePtr); + taskIndexSym_y->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex_y"); + ctx->StoreInst(taskIndex_y, taskIndexSym_y->storagePtr); + taskIndexSym_z->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex_z"); + ctx->StoreInst(taskIndex_z, taskIndexSym_z->storagePtr); + + taskCountSym_x->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount_x"); + ctx->StoreInst(taskCount_x, taskCountSym_x->storagePtr); + taskCountSym_y->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount_y"); + ctx->StoreInst(taskCount_y, taskCountSym_y->storagePtr); + taskCountSym_z->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount_z"); + ctx->StoreInst(taskCount_z, taskCountSym_z->storagePtr); } else { // Regular, non-task function diff --git a/func.h b/func.h index ac3e1447..ee44a6c5 100644 --- a/func.h +++ b/func.h @@ -60,7 +60,10 @@ private: Stmt *code; Symbol *maskSymbol; Symbol *threadIndexSym, *threadCountSym; - Symbol *taskIndexSym, *taskCountSym; + Symbol *taskIndexSym, *taskCountSym; + Symbol *taskIndexSym_x, *taskCountSym_x; + Symbol *taskIndexSym_y, *taskCountSym_y; + Symbol *taskIndexSym_z, *taskCountSym_z; }; #endif // ISPC_FUNC_H diff --git a/parse.yy b/parse.yy index 38c5ba77..1de4644f 100644 --- a/parse.yy +++ b/parse.yy @@ -2214,9 +2214,24 @@ static void lAddThreadIndexCountToSymbolTable(SourcePos pos) { Symbol *taskIndexSym = new Symbol("taskIndex", pos, type); m->symbolTable->AddVariable(taskIndexSym); - + Symbol *taskCountSym = new Symbol("taskCount", pos, type); m->symbolTable->AddVariable(taskCountSym); + + Symbol *taskIndexSym_x = new Symbol("taskIndex_x", pos, type); + m->symbolTable->AddVariable(taskIndexSym_x); + Symbol *taskIndexSym_y = new Symbol("taskIndex_y", pos, type); + m->symbolTable->AddVariable(taskIndexSym_y); + Symbol *taskIndexSym_z = new Symbol("taskIndex_z", pos, type); + m->symbolTable->AddVariable(taskIndexSym_z); + + + Symbol *taskCountSym_x = new Symbol("taskCount_x", pos, type); + m->symbolTable->AddVariable(taskCountSym_x); + Symbol *taskCountSym_y = new Symbol("taskCount_y", pos, type); + m->symbolTable->AddVariable(taskCountSym_y); + Symbol *taskCountSym_z = new Symbol("taskCount_z", pos, type); + m->symbolTable->AddVariable(taskCountSym_z); } diff --git a/type.cpp b/type.cpp index 5fa1845b..d36c63c2 100644 --- a/type.cpp +++ b/type.cpp @@ -2961,6 +2961,12 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const { callTypes.push_back(LLVMTypes::Int32Type); // threadCount callTypes.push_back(LLVMTypes::Int32Type); // taskIndex callTypes.push_back(LLVMTypes::Int32Type); // taskCount + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex_x + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex_y + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex_z + callTypes.push_back(LLVMTypes::Int32Type); // taskCount_x + callTypes.push_back(LLVMTypes::Int32Type); // taskCount_y + callTypes.push_back(LLVMTypes::Int32Type); // taskCount_z } else // Otherwise we already have the types of the arguments From ade8751442d1dbd427b13eec3a59016cdd01807d Mon Sep 17 00:00:00 2001 From: egaburov Date: Wed, 23 Oct 2013 08:39:17 +0200 Subject: [PATCH 02/13] taskIndex_x,y,z are passed to the task --- examples/common.mk | 8 ++++---- examples/tasksys.cpp | 2 ++ 2 files changed, 6 insertions(+), 4 deletions(-) diff --git a/examples/common.mk b/examples/common.mk index db7b8eee..252c1196 100644 --- a/examples/common.mk +++ b/examples/common.mk @@ -1,11 +1,11 @@ -TASK_CXX=../tasksys.cpp +TASK_CXX=../tasksys3d.cpp TASK_LIB=-lpthread -TASK_OBJ=objs/tasksys.o +TASK_OBJ=objs/tasksys3d.o -CXX=clang++ +CXX=g++ -fopenmp CXXFLAGS+=-Iobjs/ -O2 -CC=clang +CC=gcc -fopenmp CCFLAGS+=-Iobjs/ -O2 LIBS=-lm $(TASK_LIB) -lstdc++ diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index c9c2fa7b..d7b524a8 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -59,7 +59,9 @@ #define ISPC_USE_PTHREADS #define ISPC_USE_PTHREADS_FULLY_SUBSCRIBED #define ISPC_USE_CILK +*/ #define ISPC_USE_OMP +/* #define ISPC_USE_TBB_TASK_GROUP #define ISPC_USE_TBB_PARALLEL_FOR From f89bad1e945bfeb3f19c3190d87dafcd2e75e405 Mon Sep 17 00:00:00 2001 From: egaburov Date: Wed, 23 Oct 2013 12:51:06 +0200 Subject: [PATCH 03/13] launch now passes the right info into tasking --- ast.cpp | 3 ++- builtins/util.m4 | 2 +- ctx.cpp | 6 ++++-- ctx.h | 2 +- expr.cpp | 31 +++++++++++++++++---------- expr.h | 5 +++-- lex.ll | 7 ++++++ parse.yy | 56 ++++++++++++++++++++++++++++++++++++++++++++---- 8 files changed, 90 insertions(+), 22 deletions(-) diff --git a/ast.cpp b/ast.cpp index 83ee207d..60b20a80 100644 --- a/ast.cpp +++ b/ast.cpp @@ -223,7 +223,8 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc, else if ((fce = dynamic_cast(node)) != NULL) { fce->func = (Expr *)WalkAST(fce->func, preFunc, postFunc, data); fce->args = (ExprList *)WalkAST(fce->args, preFunc, postFunc, data); - fce->launchCountExpr = (Expr *)WalkAST(fce->launchCountExpr, preFunc, + for (int k = 0; k < 3; k++) + fce->launchCountExpr[0] = (Expr *)WalkAST(fce->launchCountExpr[0], preFunc, postFunc, data); } else if ((ie = dynamic_cast(node)) != NULL) { diff --git a/builtins/util.m4 b/builtins/util.m4 index 11501780..c90e8adc 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1813,7 +1813,7 @@ define(`stdlib_core', ` declare i32 @__fast_masked_vload() declare i8* @ISPCAlloc(i8**, i64, i32) nounwind -declare void @ISPCLaunch(i8**, i8*, i8*, i32) nounwind +declare void @ISPCLaunch(i8**, i8*, i8*, i32,i32,i32) nounwind declare void @ISPCSync(i8*) nounwind declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind diff --git a/ctx.cpp b/ctx.cpp index c50d22f9..3aee776a 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -3502,7 +3502,7 @@ FunctionEmitContext::ReturnInst() { llvm::Value * FunctionEmitContext::LaunchInst(llvm::Value *callee, std::vector &argVals, - llvm::Value *launchCount) { + llvm::Value *launchCount[3]){ if (callee == NULL) { AssertPos(currentPos, m->errorCount > 0); return NULL; @@ -3563,7 +3563,9 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee, args.push_back(launchGroupHandlePtr); args.push_back(fptr); args.push_back(voidmem); - args.push_back(launchCount); + args.push_back(launchCount[0]); + args.push_back(launchCount[1]); + args.push_back(launchCount[2]); return CallInst(flaunch, NULL, args, ""); } diff --git a/ctx.h b/ctx.h index 58f9aae3..4dd30053 100644 --- a/ctx.h +++ b/ctx.h @@ -542,7 +542,7 @@ public: he given argument values. */ llvm::Value *LaunchInst(llvm::Value *callee, std::vector &argVals, - llvm::Value *launchCount); + llvm::Value *launchCount[3]); void SyncInst(); diff --git a/expr.cpp b/expr.cpp index c92503e0..60d9ce66 100644 --- a/expr.cpp +++ b/expr.cpp @@ -3540,11 +3540,13 @@ SelectExpr::Print() const { // FunctionCallExpr FunctionCallExpr::FunctionCallExpr(Expr *f, ExprList *a, SourcePos p, - bool il, Expr *lce) + bool il, Expr *lce[3]) : Expr(p), isLaunch(il) { func = f; args = a; - launchCountExpr = lce; + launchCountExpr[0] = lce[0]; + launchCountExpr[1] = lce[1]; + launchCountExpr[2] = lce[2]; } @@ -3662,9 +3664,13 @@ FunctionCallExpr::GetValue(FunctionEmitContext *ctx) const { llvm::Value *retVal = NULL; ctx->SetDebugPos(pos); if (ft->isTask) { - AssertPos(pos, launchCountExpr != NULL); - llvm::Value *launchCount = launchCountExpr->GetValue(ctx); - if (launchCount != NULL) + AssertPos(pos, launchCountExpr[0] != NULL); + llvm::Value *launchCount[3] = + { launchCountExpr[0]->GetValue(ctx), + launchCountExpr[1]->GetValue(ctx), + launchCountExpr[2]->GetValue(ctx) }; + + if (launchCount[0] != NULL) ctx->LaunchInst(callee, argVals, launchCount); } else @@ -3787,14 +3793,17 @@ FunctionCallExpr::TypeCheck() { if (!isLaunch) Error(pos, "\"launch\" expression needed to call function " "with \"task\" qualifier."); - if (!launchCountExpr) + for (int k = 0; k < 3; k++) + { + if (!launchCountExpr[k]) return NULL; - launchCountExpr = - TypeConvertExpr(launchCountExpr, AtomicType::UniformInt32, - "task launch count"); - if (launchCountExpr == NULL) + launchCountExpr[k] = + TypeConvertExpr(launchCountExpr[k], AtomicType::UniformInt32, + "task launch count"); + if (launchCountExpr[k] == NULL) return NULL; + } } else { if (isLaunch) { @@ -3802,7 +3811,7 @@ FunctionCallExpr::TypeCheck() { "qualified function."); return NULL; } - AssertPos(pos, launchCountExpr == NULL); + AssertPos(pos, launchCountExpr[0] == NULL); } } else { diff --git a/expr.h b/expr.h index f8b96abd..0d46191b 100644 --- a/expr.h +++ b/expr.h @@ -246,7 +246,8 @@ public: class FunctionCallExpr : public Expr { public: FunctionCallExpr(Expr *func, ExprList *args, SourcePos p, - bool isLaunch = false, Expr *launchCountExpr = NULL); + bool isLaunch = false, + Expr *launchCountExpr[3] = (Expr*[3]){NULL, NULL, NULL}); llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetLValue(FunctionEmitContext *ctx) const; @@ -261,7 +262,7 @@ public: Expr *func; ExprList *args; bool isLaunch; - Expr *launchCountExpr; + Expr *launchCountExpr[3]; }; diff --git a/lex.ll b/lex.ll index 87a80145..b5db747d 100644 --- a/lex.ll +++ b/lex.ll @@ -76,6 +76,7 @@ static int allTokens[] = { TOKEN_TASK, TOKEN_TRUE, TOKEN_TYPEDEF, TOKEN_UNIFORM, TOKEN_UNMASKED, TOKEN_UNSIGNED, TOKEN_VARYING, TOKEN_VOID, TOKEN_WHILE, TOKEN_STRING_C_LITERAL, TOKEN_DOTDOTDOT, + TOKEN_TRIPLECHEVRON_OPEN, TOKEN_TRIPLECHEVRON_CLOSE, TOKEN_FLOAT_CONSTANT, TOKEN_DOUBLE_CONSTANT, TOKEN_INT8_CONSTANT, TOKEN_UINT8_CONSTANT, TOKEN_INT16_CONSTANT, TOKEN_UINT16_CONSTANT, @@ -151,6 +152,8 @@ void ParserInit() { tokenToName[TOKEN_WHILE] = "while"; tokenToName[TOKEN_STRING_C_LITERAL] = "\"C\""; tokenToName[TOKEN_DOTDOTDOT] = "..."; + tokenToName[TOKEN_TRIPLECHEVRON_OPEN] = "<<<"; + tokenToName[TOKEN_TRIPLECHEVRON_CLOSE] = ">>>"; tokenToName[TOKEN_FLOAT_CONSTANT] = "TOKEN_FLOAT_CONSTANT"; tokenToName[TOKEN_DOUBLE_CONSTANT] = "TOKEN_DOUBLE_CONSTANT"; tokenToName[TOKEN_INT8_CONSTANT] = "TOKEN_INT8_CONSTANT"; @@ -266,6 +269,8 @@ void ParserInit() { tokenNameRemap["TOKEN_WHILE"] = "\'while\'"; tokenNameRemap["TOKEN_STRING_C_LITERAL"] = "\"C\""; tokenNameRemap["TOKEN_DOTDOTDOT"] = "\'...\'"; + tokenNameRemap["TOKEN_TRIPLECHEVRON_OPEN"] = "\'<<<\'"; + tokenNameRemap["TOKEN_TRIPLECHEVRON_CLOSE"] = "\'>>>\'"; tokenNameRemap["TOKEN_FLOAT_CONSTANT"] = "float constant"; tokenNameRemap["TOKEN_DOUBLE_CONSTANT"] = "double constant"; tokenNameRemap["TOKEN_INT8_CONSTANT"] = "int8 constant"; @@ -418,6 +423,8 @@ void { RT; return TOKEN_VOID; } while { RT; return TOKEN_WHILE; } \"C\" { RT; return TOKEN_STRING_C_LITERAL; } \.\.\. { RT; return TOKEN_DOTDOTDOT; } +\<\<\< { RT; return TOKEN_TRIPLECHEVRON_OPEN; } +\>\>\> { RT; return TOKEN_TRIPLECHEVRON_CLOSE; } "operator*" { return TOKEN_IDENTIFIER; } "operator+" { return TOKEN_IDENTIFIER; } diff --git a/parse.yy b/parse.yy index 1de4644f..dfb50134 100644 --- a/parse.yy +++ b/parse.yy @@ -204,6 +204,7 @@ struct ForeachDimension { %token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH %token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH TOKEN_FOREACH TOKEN_FOREACH_TILED %token TOKEN_FOREACH_UNIQUE TOKEN_FOREACH_ACTIVE TOKEN_DOTDOTDOT +%token TOKEN_TRIPLECHEVRON_OPEN TOKEN_TRIPLECHEVRON_CLOSE %token TOKEN_FOR TOKEN_GOTO TOKEN_CONTINUE TOKEN_BREAK TOKEN_RETURN %token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE %token TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT @@ -353,17 +354,64 @@ launch_expression : TOKEN_LAUNCH postfix_expression '(' argument_expression_list ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2); - $$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, oneExpr); + Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($2, $4, Union(@2, @5), true, launchCount); } | TOKEN_LAUNCH postfix_expression '(' ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @2); - $$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, oneExpr); + Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, launchCount); } | TOKEN_LAUNCH '[' expression ']' postfix_expression '(' argument_expression_list ')' - { $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, $3); } + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); + Expr *launchCount[3] = {$3, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount); + } + | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' argument_expression_list ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); + Expr *launchCount[3] = {$3, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount); + } | TOKEN_LAUNCH '[' expression ']' postfix_expression '(' ')' - { $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, $3); } + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); + Expr *launchCount[3] = {$3, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount); + } + | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); + Expr *launchCount[3] = {$3, oneExpr, oneExpr}; + $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount); + } + + | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' argument_expression_list ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7); + Expr *launchCount[3] = {$3, $5, oneExpr}; + $$ = new FunctionCallExpr($7, $9, Union(@7,@10), true, launchCount); + } + | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7); + Expr *launchCount[3] = {$3, $5, oneExpr}; + $$ = new FunctionCallExpr($7, new ExprList(Union(@7,@8)), Union(@7,@9), true, launchCount); + } + + | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' argument_expression_list ')' + { + Expr *launchCount[3] = {$3, $5, $7}; + $$ = new FunctionCallExpr($9, $11, Union(@9,@12), true, launchCount); + } + | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' ')' + { + Expr *launchCount[3] = {$3, $5, $7}; + $$ = new FunctionCallExpr($9, new ExprList(Union(@9,@10)), Union(@9,@11), true, launchCount); + } + | TOKEN_LAUNCH '<' postfix_expression '(' argument_expression_list ')' '>' { From e6c8765891af519b3b10607d90dde27ae16a53f4 Mon Sep 17 00:00:00 2001 From: egaburov Date: Wed, 23 Oct 2013 13:18:22 +0200 Subject: [PATCH 04/13] fixed tasksys.cpp for 3d tasking --- examples/common.mk | 8 +- examples/mandelbrot_tasks3d/.gitignore | 2 + examples/mandelbrot_tasks3d/Makefile | 8 + .../mandelbrot_tasks.vcxproj | 180 ++++++++++++++++++ .../mandelbrot_tasks3d/mandelbrot_tasks3d.cpp | 146 ++++++++++++++ .../mandelbrot_tasks3d.ispc | 100 ++++++++++ .../mandelbrot_tasks_serial.cpp | 68 +++++++ examples/tasksys.cpp | 62 ++++-- 8 files changed, 557 insertions(+), 17 deletions(-) create mode 100644 examples/mandelbrot_tasks3d/.gitignore create mode 100644 examples/mandelbrot_tasks3d/Makefile create mode 100644 examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj create mode 100644 examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp create mode 100644 examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc create mode 100644 examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp diff --git a/examples/common.mk b/examples/common.mk index 252c1196..db7b8eee 100644 --- a/examples/common.mk +++ b/examples/common.mk @@ -1,11 +1,11 @@ -TASK_CXX=../tasksys3d.cpp +TASK_CXX=../tasksys.cpp TASK_LIB=-lpthread -TASK_OBJ=objs/tasksys3d.o +TASK_OBJ=objs/tasksys.o -CXX=g++ -fopenmp +CXX=clang++ CXXFLAGS+=-Iobjs/ -O2 -CC=gcc -fopenmp +CC=clang CCFLAGS+=-Iobjs/ -O2 LIBS=-lm $(TASK_LIB) -lstdc++ diff --git a/examples/mandelbrot_tasks3d/.gitignore b/examples/mandelbrot_tasks3d/.gitignore new file mode 100644 index 00000000..c2471c27 --- /dev/null +++ b/examples/mandelbrot_tasks3d/.gitignore @@ -0,0 +1,2 @@ +mandelbrot +*.ppm diff --git a/examples/mandelbrot_tasks3d/Makefile b/examples/mandelbrot_tasks3d/Makefile new file mode 100644 index 00000000..3dd44d65 --- /dev/null +++ b/examples/mandelbrot_tasks3d/Makefile @@ -0,0 +1,8 @@ + +EXAMPLE=mandelbrot_tasks3d +CPP_SRC=mandelbrot_tasks3d.cpp mandelbrot_tasks_serial.cpp +ISPC_SRC=mandelbrot_tasks3d.ispc +ISPC_IA_TARGETS=avx,sse2,sse4 +ISPC_ARM_TARGETS=neon + +include ../common.mk diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj new file mode 100644 index 00000000..3a8fca79 --- /dev/null +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj @@ -0,0 +1,180 @@ + + + + + Debug + Win32 + + + Debug + x64 + + + Release + Win32 + + + Release + x64 + + + + {E80DA7D4-AB22-4648-A068-327307156BE6} + Win32Proj + mandelbrot_tasks + + + + Application + true + Unicode + + + Application + true + Unicode + + + Application + false + true + Unicode + + + Application + false + true + Unicode + + + + + + + + + + + + + + + + + + + true + $(ProjectDir)..\..;$(ExecutablePath) + mandelbrot_tasks + + + true + $(ProjectDir)..\..;$(ExecutablePath) + mandelbrot_tasks + + + false + $(ProjectDir)..\..;$(ExecutablePath) + mandelbrot_tasks + + + false + $(ProjectDir)..\..;$(ExecutablePath) + mandelbrot_tasks + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + + + Level3 + Disabled + WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + true + Fast + + + Console + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + + Level3 + + + MaxSpeed + true + true + WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) + $(TargetDir) + Fast + + + Console + true + true + true + + + + + + + + + + Document + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 + + ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 + + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h + + + + + + diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp new file mode 100644 index 00000000..9cbb966a --- /dev/null +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp @@ -0,0 +1,146 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#define NOMINMAX +#pragma warning (disable: 4244) +#pragma warning (disable: 4305) +#endif + +#include +#include +#include +#include "../timing.h" +#include "mandelbrot_tasks3d_ispc.h" +using namespace ispc; + +extern void mandelbrot_serial(float x0, float y0, float x1, float y1, + int width, int height, int maxIterations, + int output[]); + +/* Write a PPM image file with the image of the Mandelbrot set */ +static void +writePPM(int *buf, int width, int height, const char *fn) { + FILE *fp = fopen(fn, "wb"); + fprintf(fp, "P6\n"); + fprintf(fp, "%d %d\n", width, height); + fprintf(fp, "255\n"); + for (int i = 0; i < width*height; ++i) { + // Map the iteration count to colors by just alternating between + // two greys. + char c = (buf[i] & 0x1) ? 240 : 20; + for (int j = 0; j < 3; ++j) + fputc(c, fp); + } + fclose(fp); + printf("Wrote image file %s\n", fn); +} + + +static void usage() { + fprintf(stderr, "usage: mandelbrot [--scale=]\n"); + exit(1); +} + +int main(int argc, char *argv[]) { + unsigned int width = 1536; + unsigned int height = 1024; + float x0 = -2; + float x1 = 1; + float y0 = -1; + float y1 = 1; + + if (argc == 1) + ; + else if (argc == 2) { + if (strncmp(argv[1], "--scale=", 8) == 0) { + float scale = atof(argv[1] + 8); + if (scale == 0.f) + usage(); + width *= scale; + height *= scale; + // round up to multiples of 16 + width = (width + 0xf) & ~0xf; + height = (height + 0xf) & ~0xf; + } + else + usage(); + } + else + usage(); + + int maxIterations = 512; + int *buf = new int[width*height]; + + // + // Compute the image using the ispc implementation; report the minimum + // time of three runs. + // + double minISPC = 1e30; + for (int i = 0; i < 3; ++i) { + // Clear out the buffer + for (unsigned int i = 0; i < width * height; ++i) + buf[i] = 0; + reset_and_start_timer(); + mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf); + double dt = get_elapsed_mcycles(); + minISPC = std::min(minISPC, dt); + } + + printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC); + writePPM(buf, width, height, "mandelbrot-ispc.ppm"); + + + // + // And run the serial implementation 3 times, again reporting the + // minimum time. + // + double minSerial = 1e30; + for (int i = 0; i < 3; ++i) { + // Clear out the buffer + for (unsigned int i = 0; i < width * height; ++i) + buf[i] = 0; + reset_and_start_timer(); + mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf); + double dt = get_elapsed_mcycles(); + minSerial = std::min(minSerial, dt); + } + + printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial); + writePPM(buf, width, height, "mandelbrot-serial.ppm"); + + printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC); + + return 0; +} diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc new file mode 100644 index 00000000..60473a7f --- /dev/null +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc @@ -0,0 +1,100 @@ +/* + Copyright (c) 2010-2012, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + +static inline int +mandel(float c_re, float c_im, int count) { + float z_re = c_re, z_im = c_im; + int i; + for (i = 0; i < count; ++i) { + if (z_re * z_re + z_im * z_im > 4.) + break; + + float new_re = z_re*z_re - z_im*z_im; + float new_im = 2.f * z_re * z_im; + unmasked { + z_re = c_re + new_re; + z_im = c_im + new_im; + } + } + + return i; +} + + +/* Task to compute the Mandelbrot iterations for a single scanline. + */ +task void +mandelbrot_scanline(uniform float x0, uniform float dx, + uniform float y0, uniform float dy, + uniform int width, uniform int height, + uniform int xspan, uniform int yspan, + uniform int maxIterations, uniform int output[]) { +#if 0 + print("taskIndex = % : % \n", taskIndex); + print("taskIndex_x= % : % \n", taskIndex_x); + print("taskIndex_y= % : % \n", taskIndex_y); + print(" --- \n"); +#endif + const uniform int xstart = taskIndex_x * xspan; + const uniform int xend = min(xstart + xspan, width); + + const uniform int ystart = taskIndex_y * yspan; + const uniform int yend = min(ystart + yspan, height); + + + foreach (yi = ystart ... yend, xi = xstart ... xend) { + float x = x0 + xi * dx; + float y = y0 + yi * dy; + + int index = yi * width + xi; + output[index] = mandel(x, y, maxIterations); + } + +} + +#if 1 +export void +mandelbrot_ispc(uniform float x0, uniform float y0, + uniform float x1, uniform float y1, + uniform int width, uniform int height, + uniform int maxIterations, uniform int output[]) { + uniform float dx = (x1 - x0) / width; + uniform float dy = (y1 - y0) / height; + const uniform int xspan = 16; + const uniform int yspan = 16; + + launch <<>> + mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan, + maxIterations, output); +} +#endif diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp b/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp new file mode 100644 index 00000000..a76fb5ca --- /dev/null +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp @@ -0,0 +1,68 @@ +/* + Copyright (c) 2010-2011, Intel Corporation + All rights reserved. + + Redistribution and use in source and binary forms, with or without + modification, are permitted provided that the following conditions are + met: + + * Redistributions of source code must retain the above copyright + notice, this list of conditions and the following disclaimer. + + * Redistributions in binary form must reproduce the above copyright + notice, this list of conditions and the following disclaimer in the + documentation and/or other materials provided with the distribution. + + * Neither the name of Intel Corporation nor the names of its + contributors may be used to endorse or promote products derived from + this software without specific prior written permission. + + + THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS + IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED + TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A + PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER + OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR + PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF + LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING + NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS + SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +*/ + + +static int mandel(float c_re, float c_im, int count) { + float z_re = c_re, z_im = c_im; + int i; + for (i = 0; i < count; ++i) { + if (z_re * z_re + z_im * z_im > 4.f) + break; + + float new_re = z_re*z_re - z_im*z_im; + float new_im = 2.f * z_re * z_im; + z_re = c_re + new_re; + z_im = c_im + new_im; + } + + return i; +} + +void mandelbrot_serial(float x0, float y0, float x1, float y1, + int width, int height, int maxIterations, + int output[]) +{ + float dx = (x1 - x0) / width; + float dy = (y1 - y0) / height; + + for (int j = 0; j < height; j++) { + for (int i = 0; i < width; ++i) { + float x = x0 + i * dx; + float y = y0 + j * dy; + + int index = (j * width + i); + output[index] = mandel(x, y, maxIterations); + } + } +} + diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index d7b524a8..fed368dc 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -59,9 +59,7 @@ #define ISPC_USE_PTHREADS #define ISPC_USE_PTHREADS_FULLY_SUBSCRIBED #define ISPC_USE_CILK -*/ #define ISPC_USE_OMP -/* #define ISPC_USE_TBB_TASK_GROUP #define ISPC_USE_TBB_PARALLEL_FOR @@ -172,21 +170,39 @@ // Signature of ispc-generated 'task' functions typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount, - int taskIndex, int taskCount); + int taskIndex, int taskCount, + int taskIndex_x, int taskIndex_y, int taskIndex_z, + int taskCount_x, int taskCount_y, int taskCount_z); // Small structure used to hold the data for each task struct TaskInfo { TaskFuncType func; void *data; int taskIndex, taskCount; + int taskCount3d[3]; #if defined(ISPC_IS_WINDOWS) event taskEvent; #endif + int taskIndex_x() const + { + return taskIndex % taskCount3d[0]; + } + int taskIndex_y() const + { + return ( taskIndex / taskCount3d[0] ) % taskCount3d[1]; + } + int taskIndex_z() const + { + return taskIndex / ( taskCount3d[0]*taskCount3d[1] ); + } + int taskCount_x() const { return taskCount3d[0]; } + int taskCount_y() const { return taskCount3d[1]; } + int taskCount_z() const { return taskCount3d[2]; } }; // ispc expects these functions to have C linkage / not be mangled extern "C" { - void ISPCLaunch(void **handlePtr, void *f, void *data, int count); + void ISPCLaunch(void **handlePtr, void *f, void *data, int countx,int county, int countz); void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment); void ISPCSync(void *handle); } @@ -520,7 +536,9 @@ lRunTask(void *ti) { // Actually run the task taskInfo->func(taskInfo->data, threadIndex, threadCount, - taskInfo->taskIndex, taskInfo->taskCount); + taskInfo->taskIndex, taskInfo->taskCount, + taskInfo->taskIndex_x(), taskInfo->taskIndex_y(), taskInfo->taskIndex_z(), + taskInfo->taskCount_x(), taskInfo->taskCount_y(), taskInfo->taskCount_z()); } @@ -561,7 +579,9 @@ lRunTask(LPVOID param) { // will cause bugs in code that uses those. int threadIndex = 0; int threadCount = 1; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); // Signal the event that this task is done ti->taskEvent.set(); @@ -662,7 +682,9 @@ lTaskEntry(void *arg) { DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg)); TaskInfo *myTask = tg->GetTaskInfo(taskNumber); myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex, - myTask->taskCount); + myTask->taskCount, + myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(), + myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z()); // // Decrement the "number of unfinished tasks" counter in the task @@ -863,7 +885,9 @@ TaskGroup::Sync() { // Do work for _myTask_ // // FIXME: bogus values for thread index/thread count here as well.. - myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount); + myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount, + myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(), + myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z()); // // Decrement the number of unfinished tasks counter @@ -893,7 +917,9 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. // Cilk does not expose the task -> thread mapping so we pretend it's 1:1 - ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); } } @@ -922,7 +948,9 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. int threadIndex = omp_get_thread_num(); int threadCount = omp_get_num_threads(); - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); } } @@ -953,7 +981,9 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); }); } @@ -980,7 +1010,9 @@ TaskGroup::Launch(int baseIndex, int count) { // TBB does not expose the task -> thread mapping so we pretend it's 1:1 int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount); + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), + ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); }); } } @@ -1033,7 +1065,8 @@ FreeTaskGroup(TaskGroup *tg) { /////////////////////////////////////////////////////////////////////////// void -ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) { +ISPCLaunch(void **taskGroupPtr, void *func, void *data, int countx, int county, int countz) { + const int count = countx*county*countz; TaskGroup *taskGroup; if (*taskGroupPtr == NULL) { InitTaskSystem(); @@ -1050,6 +1083,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count) { ti->data = data; ti->taskIndex = i; ti->taskCount = count; + ti->taskCount3d[0] = countx; + ti->taskCount3d[1] = county; + ti->taskCount3d[2] = countz; } taskGroup->Launch(baseIndex, count); } From 43761173ec8f653531d73b346ff2d190f9206dba Mon Sep 17 00:00:00 2001 From: Evghenii Date: Thu, 24 Oct 2013 13:16:23 +0200 Subject: [PATCH 05/13] changed notation, task[Index,Count]_[x,y,z] -> task[Index,Count][1,2,3]. Change launch <<< nx,ny,nz >>> into launch [nx,ny,nz] or equivalent launch [nz][ny][nx]. Programmer can pick the one the is liked the most --- .../mandelbrot_tasks3d.ispc | 19 +++-- examples/tasksys.cpp | 58 +++++++------- func.cpp | 64 ++++++++-------- func.h | 6 +- lex.ll | 7 -- parse.yy | 76 +++++++++++-------- type.cpp | 12 +-- 7 files changed, 122 insertions(+), 120 deletions(-) diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc index 60473a7f..8bdf6f7a 100644 --- a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc @@ -59,16 +59,10 @@ mandelbrot_scanline(uniform float x0, uniform float dx, uniform int width, uniform int height, uniform int xspan, uniform int yspan, uniform int maxIterations, uniform int output[]) { -#if 0 - print("taskIndex = % : % \n", taskIndex); - print("taskIndex_x= % : % \n", taskIndex_x); - print("taskIndex_y= % : % \n", taskIndex_y); - print(" --- \n"); -#endif - const uniform int xstart = taskIndex_x * xspan; + const uniform int xstart = taskIndex1 * xspan; const uniform int xend = min(xstart + xspan, width); - const uniform int ystart = taskIndex_y * yspan; + const uniform int ystart = taskIndex2 * yspan; const uniform int yend = min(ystart + yspan, height); @@ -90,10 +84,15 @@ mandelbrot_ispc(uniform float x0, uniform float y0, uniform int maxIterations, uniform int output[]) { uniform float dx = (x1 - x0) / width; uniform float dy = (y1 - y0) / height; - const uniform int xspan = 16; + const uniform int xspan = 16; /* make sure it is big enough to avoid false-sharing */ const uniform int yspan = 16; - launch <<>> + +#if 1 + launch [width/xspan, height/yspan] +#else + launch [height/yspan][width/xspan] +#endif mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan, maxIterations, output); } diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index fed368dc..5ef72ed9 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -171,8 +171,8 @@ // Signature of ispc-generated 'task' functions typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount, int taskIndex, int taskCount, - int taskIndex_x, int taskIndex_y, int taskIndex_z, - int taskCount_x, int taskCount_y, int taskCount_z); + int taskIndex1, int taskIndex2, int taskIndex3, + int taskCount1, int taskCount2, int taskCount3); // Small structure used to hold the data for each task struct TaskInfo { @@ -183,21 +183,21 @@ struct TaskInfo { #if defined(ISPC_IS_WINDOWS) event taskEvent; #endif - int taskIndex_x() const + int taskIndex1() const { return taskIndex % taskCount3d[0]; } - int taskIndex_y() const + int taskIndex2() const { return ( taskIndex / taskCount3d[0] ) % taskCount3d[1]; } - int taskIndex_z() const + int taskIndex3() const { return taskIndex / ( taskCount3d[0]*taskCount3d[1] ); } - int taskCount_x() const { return taskCount3d[0]; } - int taskCount_y() const { return taskCount3d[1]; } - int taskCount_z() const { return taskCount3d[2]; } + int taskCount1() const { return taskCount3d[0]; } + int taskCount2() const { return taskCount3d[1]; } + int taskCount3() const { return taskCount3d[2]; } }; // ispc expects these functions to have C linkage / not be mangled @@ -537,8 +537,8 @@ lRunTask(void *ti) { // Actually run the task taskInfo->func(taskInfo->data, threadIndex, threadCount, taskInfo->taskIndex, taskInfo->taskCount, - taskInfo->taskIndex_x(), taskInfo->taskIndex_y(), taskInfo->taskIndex_z(), - taskInfo->taskCount_x(), taskInfo->taskCount_y(), taskInfo->taskCount_z()); + taskInfo->taskIndex1(), taskInfo->taskIndex2(), taskInfo->taskIndex3(), + taskInfo->taskCount1(), taskInfo->taskCount2(), taskInfo->taskCount3()); } @@ -580,8 +580,8 @@ lRunTask(LPVOID param) { int threadIndex = 0; int threadCount = 1; ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, - ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), - ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); + ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), + ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); // Signal the event that this task is done ti->taskEvent.set(); @@ -683,8 +683,8 @@ lTaskEntry(void *arg) { TaskInfo *myTask = tg->GetTaskInfo(taskNumber); myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex, myTask->taskCount, - myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(), - myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z()); + myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(), + myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3()); // // Decrement the "number of unfinished tasks" counter in the task @@ -886,8 +886,8 @@ TaskGroup::Sync() { // // FIXME: bogus values for thread index/thread count here as well.. myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount, - myTask->taskIndex_x(), myTask->taskIndex_y(), myTask->taskIndex_z(), - myTask->taskCount_x(), myTask->taskCount_y(), myTask->taskCount_z()); + myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(), + myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3()); // // Decrement the number of unfinished tasks counter @@ -918,8 +918,8 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. // Cilk does not expose the task -> thread mapping so we pretend it's 1:1 ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount, - ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), - ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); + ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), + ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); } } @@ -949,8 +949,8 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = omp_get_thread_num(); int threadCount = omp_get_num_threads(); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, - ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), - ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); + ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), + ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); } } @@ -982,8 +982,8 @@ TaskGroup::Launch(int baseIndex, int count) { int threadCount = ti->taskCount; ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, - ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), - ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); + ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), + ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); }); } @@ -1011,8 +1011,8 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, - ti->taskIndex_x(), ti->taskIndex_y(), ti->taskIndex_z(), - ti->taskCount_x(), ti->taskCount_y(), ti->taskCount_z()); + ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), + ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); }); } } @@ -1065,8 +1065,8 @@ FreeTaskGroup(TaskGroup *tg) { /////////////////////////////////////////////////////////////////////////// void -ISPCLaunch(void **taskGroupPtr, void *func, void *data, int countx, int county, int countz) { - const int count = countx*county*countz; +ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count1, int count2, int count3) { + const int count = count1*count2*count3; TaskGroup *taskGroup; if (*taskGroupPtr == NULL) { InitTaskSystem(); @@ -1083,9 +1083,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int countx, int county, ti->data = data; ti->taskIndex = i; ti->taskCount = count; - ti->taskCount3d[0] = countx; - ti->taskCount3d[1] = county; - ti->taskCount3d[2] = countz; + ti->taskCount3d[0] = count1; + ti->taskCount3d[1] = count2; + ti->taskCount3d[2] = count3; } taskGroup->Launch(baseIndex, count); } diff --git a/func.cpp b/func.cpp index dea45afc..086be6fe 100644 --- a/func.cpp +++ b/func.cpp @@ -133,26 +133,26 @@ Function::Function(Symbol *s, Stmt *c) { taskCountSym = m->symbolTable->LookupVariable("taskCount"); Assert(taskCountSym); - taskIndexSym_x = m->symbolTable->LookupVariable("taskIndex_x"); - Assert(taskIndexSym_x); - taskIndexSym_y = m->symbolTable->LookupVariable("taskIndex_y"); - Assert(taskIndexSym_y); - taskIndexSym_z = m->symbolTable->LookupVariable("taskIndex_z"); - Assert(taskIndexSym_z); + taskIndexSym1 = m->symbolTable->LookupVariable("taskIndex1"); + Assert(taskIndexSym1); + taskIndexSym2 = m->symbolTable->LookupVariable("taskIndex2"); + Assert(taskIndexSym2); + taskIndexSym3 = m->symbolTable->LookupVariable("taskIndex3"); + Assert(taskIndexSym3); - taskCountSym_x = m->symbolTable->LookupVariable("taskCount_x"); - Assert(taskCountSym_x); - taskCountSym_y = m->symbolTable->LookupVariable("taskCount_y"); - Assert(taskCountSym_y); - taskCountSym_z = m->symbolTable->LookupVariable("taskCount_z"); - Assert(taskCountSym_z); + taskCountSym1 = m->symbolTable->LookupVariable("taskCount1"); + Assert(taskCountSym1); + taskCountSym2 = m->symbolTable->LookupVariable("taskCount2"); + Assert(taskCountSym2); + taskCountSym3 = m->symbolTable->LookupVariable("taskCount3"); + Assert(taskCountSym3); } else { threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL; - taskIndexSym_x = taskIndexSym_y = taskIndexSym_z = NULL; - taskCountSym_x = taskCountSym_y = taskCountSym_z = NULL; + taskIndexSym1 = taskIndexSym2 = taskIndexSym3 = NULL; + taskCountSym1 = taskCountSym2 = taskCountSym3 = NULL; } } @@ -244,12 +244,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, llvm::Value *threadCount = argIter++; llvm::Value *taskIndex = argIter++; llvm::Value *taskCount = argIter++; - llvm::Value *taskIndex_x = argIter++; - llvm::Value *taskIndex_y = argIter++; - llvm::Value *taskIndex_z = argIter++; - llvm::Value *taskCount_x = argIter++; - llvm::Value *taskCount_y = argIter++; - llvm::Value *taskCount_z = argIter++; + llvm::Value *taskIndex1 = argIter++; + llvm::Value *taskIndex2 = argIter++; + llvm::Value *taskIndex3 = argIter++; + llvm::Value *taskCount1 = argIter++; + llvm::Value *taskCount2 = argIter++; + llvm::Value *taskCount3 = argIter++; // Copy the function parameter values from the structure into local // storage @@ -282,19 +282,19 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount"); ctx->StoreInst(taskCount, taskCountSym->storagePtr); - taskIndexSym_x->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex_x"); - ctx->StoreInst(taskIndex_x, taskIndexSym_x->storagePtr); - taskIndexSym_y->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex_y"); - ctx->StoreInst(taskIndex_y, taskIndexSym_y->storagePtr); - taskIndexSym_z->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex_z"); - ctx->StoreInst(taskIndex_z, taskIndexSym_z->storagePtr); + taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1"); + ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr); + taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2"); + ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr); + taskIndexSym3->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex3"); + ctx->StoreInst(taskIndex3, taskIndexSym3->storagePtr); - taskCountSym_x->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount_x"); - ctx->StoreInst(taskCount_x, taskCountSym_x->storagePtr); - taskCountSym_y->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount_y"); - ctx->StoreInst(taskCount_y, taskCountSym_y->storagePtr); - taskCountSym_z->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount_z"); - ctx->StoreInst(taskCount_z, taskCountSym_z->storagePtr); + taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1"); + ctx->StoreInst(taskCount1, taskCountSym1->storagePtr); + taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2"); + ctx->StoreInst(taskCount2, taskCountSym2->storagePtr); + taskCountSym3->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount3"); + ctx->StoreInst(taskCount3, taskCountSym3->storagePtr); } else { // Regular, non-task function diff --git a/func.h b/func.h index ee44a6c5..4181bba5 100644 --- a/func.h +++ b/func.h @@ -61,9 +61,9 @@ private: Symbol *maskSymbol; Symbol *threadIndexSym, *threadCountSym; Symbol *taskIndexSym, *taskCountSym; - Symbol *taskIndexSym_x, *taskCountSym_x; - Symbol *taskIndexSym_y, *taskCountSym_y; - Symbol *taskIndexSym_z, *taskCountSym_z; + Symbol *taskIndexSym1, *taskCountSym1; + Symbol *taskIndexSym2, *taskCountSym2; + Symbol *taskIndexSym3, *taskCountSym3; }; #endif // ISPC_FUNC_H diff --git a/lex.ll b/lex.ll index b5db747d..87a80145 100644 --- a/lex.ll +++ b/lex.ll @@ -76,7 +76,6 @@ static int allTokens[] = { TOKEN_TASK, TOKEN_TRUE, TOKEN_TYPEDEF, TOKEN_UNIFORM, TOKEN_UNMASKED, TOKEN_UNSIGNED, TOKEN_VARYING, TOKEN_VOID, TOKEN_WHILE, TOKEN_STRING_C_LITERAL, TOKEN_DOTDOTDOT, - TOKEN_TRIPLECHEVRON_OPEN, TOKEN_TRIPLECHEVRON_CLOSE, TOKEN_FLOAT_CONSTANT, TOKEN_DOUBLE_CONSTANT, TOKEN_INT8_CONSTANT, TOKEN_UINT8_CONSTANT, TOKEN_INT16_CONSTANT, TOKEN_UINT16_CONSTANT, @@ -152,8 +151,6 @@ void ParserInit() { tokenToName[TOKEN_WHILE] = "while"; tokenToName[TOKEN_STRING_C_LITERAL] = "\"C\""; tokenToName[TOKEN_DOTDOTDOT] = "..."; - tokenToName[TOKEN_TRIPLECHEVRON_OPEN] = "<<<"; - tokenToName[TOKEN_TRIPLECHEVRON_CLOSE] = ">>>"; tokenToName[TOKEN_FLOAT_CONSTANT] = "TOKEN_FLOAT_CONSTANT"; tokenToName[TOKEN_DOUBLE_CONSTANT] = "TOKEN_DOUBLE_CONSTANT"; tokenToName[TOKEN_INT8_CONSTANT] = "TOKEN_INT8_CONSTANT"; @@ -269,8 +266,6 @@ void ParserInit() { tokenNameRemap["TOKEN_WHILE"] = "\'while\'"; tokenNameRemap["TOKEN_STRING_C_LITERAL"] = "\"C\""; tokenNameRemap["TOKEN_DOTDOTDOT"] = "\'...\'"; - tokenNameRemap["TOKEN_TRIPLECHEVRON_OPEN"] = "\'<<<\'"; - tokenNameRemap["TOKEN_TRIPLECHEVRON_CLOSE"] = "\'>>>\'"; tokenNameRemap["TOKEN_FLOAT_CONSTANT"] = "float constant"; tokenNameRemap["TOKEN_DOUBLE_CONSTANT"] = "double constant"; tokenNameRemap["TOKEN_INT8_CONSTANT"] = "int8 constant"; @@ -423,8 +418,6 @@ void { RT; return TOKEN_VOID; } while { RT; return TOKEN_WHILE; } \"C\" { RT; return TOKEN_STRING_C_LITERAL; } \.\.\. { RT; return TOKEN_DOTDOTDOT; } -\<\<\< { RT; return TOKEN_TRIPLECHEVRON_OPEN; } -\>\>\> { RT; return TOKEN_TRIPLECHEVRON_CLOSE; } "operator*" { return TOKEN_IDENTIFIER; } "operator+" { return TOKEN_IDENTIFIER; } diff --git a/parse.yy b/parse.yy index dfb50134..653bba62 100644 --- a/parse.yy +++ b/parse.yy @@ -204,7 +204,6 @@ struct ForeachDimension { %token TOKEN_CASE TOKEN_DEFAULT TOKEN_IF TOKEN_ELSE TOKEN_SWITCH %token TOKEN_WHILE TOKEN_DO TOKEN_LAUNCH TOKEN_FOREACH TOKEN_FOREACH_TILED %token TOKEN_FOREACH_UNIQUE TOKEN_FOREACH_ACTIVE TOKEN_DOTDOTDOT -%token TOKEN_TRIPLECHEVRON_OPEN TOKEN_TRIPLECHEVRON_CLOSE %token TOKEN_FOR TOKEN_GOTO TOKEN_CONTINUE TOKEN_BREAK TOKEN_RETURN %token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE %token TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT @@ -363,54 +362,65 @@ launch_expression Expr *launchCount[3] = {oneExpr, oneExpr, oneExpr}; $$ = new FunctionCallExpr($2, new ExprList(Union(@3,@4)), Union(@2, @4), true, launchCount); } - | TOKEN_LAUNCH '[' expression ']' postfix_expression '(' argument_expression_list ')' + + | TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); Expr *launchCount[3] = {$3, oneExpr, oneExpr}; $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount); } - | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' argument_expression_list ')' - { - ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); - Expr *launchCount[3] = {$3, oneExpr, oneExpr}; - $$ = new FunctionCallExpr($5, $7, Union(@5,@8), true, launchCount); - } - | TOKEN_LAUNCH '[' expression ']' postfix_expression '(' ')' - { - ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); - Expr *launchCount[3] = {$3, oneExpr, oneExpr}; - $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount); - } - | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' ')' + | TOKEN_LAUNCH '[' assignment_expression ']' postfix_expression '(' ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @5); Expr *launchCount[3] = {$3, oneExpr, oneExpr}; $$ = new FunctionCallExpr($5, new ExprList(Union(@5,@6)), Union(@5,@7), true, launchCount); } - | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' argument_expression_list ')' + | TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7); Expr *launchCount[3] = {$3, $5, oneExpr}; $$ = new FunctionCallExpr($7, $9, Union(@7,@10), true, launchCount); } - | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' ')' + | TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')' { ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @7); Expr *launchCount[3] = {$3, $5, oneExpr}; $$ = new FunctionCallExpr($7, new ExprList(Union(@7,@8)), Union(@7,@9), true, launchCount); } + | TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8); + Expr *launchCount[3] = {$6, $3, oneExpr}; + $$ = new FunctionCallExpr($8, $10, Union(@8,@11), true, launchCount); + } + | TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')' + { + ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, (int32_t)1, @8); + Expr *launchCount[3] = {$6, $3, oneExpr}; + $$ = new FunctionCallExpr($8, new ExprList(Union(@8,@9)), Union(@8,@10), true, launchCount); + } - | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' argument_expression_list ')' + | TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' argument_expression_list ')' { Expr *launchCount[3] = {$3, $5, $7}; $$ = new FunctionCallExpr($9, $11, Union(@9,@12), true, launchCount); } - | TOKEN_LAUNCH TOKEN_TRIPLECHEVRON_OPEN assignment_expression ',' assignment_expression ',' assignment_expression TOKEN_TRIPLECHEVRON_CLOSE postfix_expression '(' ')' + | TOKEN_LAUNCH '[' assignment_expression ',' assignment_expression ',' assignment_expression ']' postfix_expression '(' ')' { Expr *launchCount[3] = {$3, $5, $7}; $$ = new FunctionCallExpr($9, new ExprList(Union(@9,@10)), Union(@9,@11), true, launchCount); } + | TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' argument_expression_list ')' + { + Expr *launchCount[3] = {$9, $6, $3}; + $$ = new FunctionCallExpr($11, $13, Union(@11,@14), true, launchCount); + } + | TOKEN_LAUNCH '[' assignment_expression ']' '[' assignment_expression ']' '[' assignment_expression ']' postfix_expression '(' ')' + { + Expr *launchCount[3] = {$9, $6, $3}; + $$ = new FunctionCallExpr($11, new ExprList(Union(@11,@12)), Union(@11,@13), true, launchCount); + } | TOKEN_LAUNCH '<' postfix_expression '(' argument_expression_list ')' '>' @@ -425,13 +435,13 @@ launch_expression "around function call expression."); $$ = NULL; } - | TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' argument_expression_list ')' '>' + | TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' argument_expression_list ')' '>' { Error(Union(@5, @10), "\"launch\" expressions no longer take '<' '>' " "around function call expression."); $$ = NULL; } - | TOKEN_LAUNCH '[' expression ']' '<' postfix_expression '(' ')' '>' + | TOKEN_LAUNCH '[' assignment_expression ']' '<' postfix_expression '(' ')' '>' { Error(Union(@5, @9), "\"launch\" expressions no longer take '<' '>' " "around function call expression."); @@ -2266,20 +2276,20 @@ static void lAddThreadIndexCountToSymbolTable(SourcePos pos) { Symbol *taskCountSym = new Symbol("taskCount", pos, type); m->symbolTable->AddVariable(taskCountSym); - Symbol *taskIndexSym_x = new Symbol("taskIndex_x", pos, type); - m->symbolTable->AddVariable(taskIndexSym_x); - Symbol *taskIndexSym_y = new Symbol("taskIndex_y", pos, type); - m->symbolTable->AddVariable(taskIndexSym_y); - Symbol *taskIndexSym_z = new Symbol("taskIndex_z", pos, type); - m->symbolTable->AddVariable(taskIndexSym_z); + Symbol *taskIndexSym1 = new Symbol("taskIndex1", pos, type); + m->symbolTable->AddVariable(taskIndexSym1); + Symbol *taskIndexSym2 = new Symbol("taskIndex2", pos, type); + m->symbolTable->AddVariable(taskIndexSym2); + Symbol *taskIndexSym3 = new Symbol("taskIndex3", pos, type); + m->symbolTable->AddVariable(taskIndexSym3); - Symbol *taskCountSym_x = new Symbol("taskCount_x", pos, type); - m->symbolTable->AddVariable(taskCountSym_x); - Symbol *taskCountSym_y = new Symbol("taskCount_y", pos, type); - m->symbolTable->AddVariable(taskCountSym_y); - Symbol *taskCountSym_z = new Symbol("taskCount_z", pos, type); - m->symbolTable->AddVariable(taskCountSym_z); + Symbol *taskCountSym1 = new Symbol("taskCount1", pos, type); + m->symbolTable->AddVariable(taskCountSym1); + Symbol *taskCountSym2 = new Symbol("taskCount2", pos, type); + m->symbolTable->AddVariable(taskCountSym2); + Symbol *taskCountSym3 = new Symbol("taskCount3", pos, type); + m->symbolTable->AddVariable(taskCountSym3); } diff --git a/type.cpp b/type.cpp index d36c63c2..3ae0cab4 100644 --- a/type.cpp +++ b/type.cpp @@ -2961,12 +2961,12 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const { callTypes.push_back(LLVMTypes::Int32Type); // threadCount callTypes.push_back(LLVMTypes::Int32Type); // taskIndex callTypes.push_back(LLVMTypes::Int32Type); // taskCount - callTypes.push_back(LLVMTypes::Int32Type); // taskIndex_x - callTypes.push_back(LLVMTypes::Int32Type); // taskIndex_y - callTypes.push_back(LLVMTypes::Int32Type); // taskIndex_z - callTypes.push_back(LLVMTypes::Int32Type); // taskCount_x - callTypes.push_back(LLVMTypes::Int32Type); // taskCount_y - callTypes.push_back(LLVMTypes::Int32Type); // taskCount_z + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1 + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2 + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex3 + callTypes.push_back(LLVMTypes::Int32Type); // taskCount1 + callTypes.push_back(LLVMTypes::Int32Type); // taskCount2 + callTypes.push_back(LLVMTypes::Int32Type); // taskCount3 } else // Otherwise we already have the types of the arguments From c5fc47cc1959cf8fb4b170a6b378614c811591a7 Mon Sep 17 00:00:00 2001 From: egaburov Date: Thu, 24 Oct 2013 14:09:46 +0200 Subject: [PATCH 06/13] tasksys cleaning --- examples/tasksys.cpp | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index 5ef72ed9..4c85e119 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -178,11 +178,12 @@ typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount, struct TaskInfo { TaskFuncType func; void *data; - int taskIndex, taskCount; + int taskIndex; int taskCount3d[3]; #if defined(ISPC_IS_WINDOWS) event taskEvent; #endif + int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; } int taskIndex1() const { return taskIndex % taskCount3d[0]; @@ -198,7 +199,8 @@ struct TaskInfo { int taskCount1() const { return taskCount3d[0]; } int taskCount2() const { return taskCount3d[1]; } int taskCount3() const { return taskCount3d[2]; } -}; + TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); } +} __attribute__((aligned(32))); // ispc expects these functions to have C linkage / not be mangled extern "C" { @@ -536,7 +538,7 @@ lRunTask(void *ti) { // Actually run the task taskInfo->func(taskInfo->data, threadIndex, threadCount, - taskInfo->taskIndex, taskInfo->taskCount, + taskInfo->taskIndex, taskInfo->taskCount(), taskInfo->taskIndex1(), taskInfo->taskIndex2(), taskInfo->taskIndex3(), taskInfo->taskCount1(), taskInfo->taskCount2(), taskInfo->taskCount3()); } @@ -579,7 +581,7 @@ lRunTask(LPVOID param) { // will cause bugs in code that uses those. int threadIndex = 0; int threadCount = 1; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); @@ -682,7 +684,7 @@ lTaskEntry(void *arg) { DBG(fprintf(stderr, "running task %d from group %p\n", taskNumber, tg)); TaskInfo *myTask = tg->GetTaskInfo(taskNumber); myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex, - myTask->taskCount, + myTask->taskCount(), myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(), myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3()); @@ -885,7 +887,7 @@ TaskGroup::Sync() { // Do work for _myTask_ // // FIXME: bogus values for thread index/thread count here as well.. - myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount, + myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(), myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(), myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3()); @@ -917,7 +919,7 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. // Cilk does not expose the task -> thread mapping so we pretend it's 1:1 - ti->func(ti->data, ti->taskIndex, ti->taskCount, ti->taskIndex, ti->taskCount, + ti->func(ti->data, ti->taskIndex, ti->taskCount(), ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); } @@ -948,7 +950,7 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. int threadIndex = omp_get_thread_num(); int threadCount = omp_get_num_threads(); - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); } @@ -981,7 +983,7 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); }); @@ -1010,7 +1012,7 @@ TaskGroup::Launch(int baseIndex, int count) { // TBB does not expose the task -> thread mapping so we pretend it's 1:1 int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; - ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount, + ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); }); @@ -1082,7 +1084,6 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count1, int count2, ti->func = (TaskFuncType)func; ti->data = data; ti->taskIndex = i; - ti->taskCount = count; ti->taskCount3d[0] = count1; ti->taskCount3d[1] = count2; ti->taskCount3d[2] = count3; From 383e804ec1e591370899babf1c1aa549995e8bae Mon Sep 17 00:00:00 2001 From: Evghenii Date: Thu, 24 Oct 2013 17:20:56 +0200 Subject: [PATCH 07/13] changed notation form taskIndex1,2,3 -> taskIndex0,1,2 --- .../mandelbrot_tasks3d.ispc | 4 +- examples/tasksys.cpp | 58 +++++++++---------- func.cpp | 24 ++++---- func.h | 2 +- parse.yy | 8 +-- type.cpp | 4 +- 6 files changed, 50 insertions(+), 50 deletions(-) diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc index 8bdf6f7a..395bdca4 100644 --- a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc +++ b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc @@ -59,10 +59,10 @@ mandelbrot_scanline(uniform float x0, uniform float dx, uniform int width, uniform int height, uniform int xspan, uniform int yspan, uniform int maxIterations, uniform int output[]) { - const uniform int xstart = taskIndex1 * xspan; + const uniform int xstart = taskIndex0 * xspan; const uniform int xend = min(xstart + xspan, width); - const uniform int ystart = taskIndex2 * yspan; + const uniform int ystart = taskIndex1 * yspan; const uniform int yend = min(ystart + yspan, height); diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index 4c85e119..6bc60129 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -171,8 +171,8 @@ // Signature of ispc-generated 'task' functions typedef void (*TaskFuncType)(void *data, int threadIndex, int threadCount, int taskIndex, int taskCount, - int taskIndex1, int taskIndex2, int taskIndex3, - int taskCount1, int taskCount2, int taskCount3); + int taskIndex0, int taskIndex1, int taskIndex2, + int taskCount0, int taskCount1, int taskCount2); // Small structure used to hold the data for each task struct TaskInfo { @@ -184,21 +184,21 @@ struct TaskInfo { event taskEvent; #endif int taskCount() const { return taskCount3d[0]*taskCount3d[1]*taskCount3d[2]; } - int taskIndex1() const + int taskIndex0() const { return taskIndex % taskCount3d[0]; } - int taskIndex2() const + int taskIndex1() const { return ( taskIndex / taskCount3d[0] ) % taskCount3d[1]; } - int taskIndex3() const + int taskIndex2() const { return taskIndex / ( taskCount3d[0]*taskCount3d[1] ); } - int taskCount1() const { return taskCount3d[0]; } - int taskCount2() const { return taskCount3d[1]; } - int taskCount3() const { return taskCount3d[2]; } + int taskCount0() const { return taskCount3d[0]; } + int taskCount1() const { return taskCount3d[1]; } + int taskCount2() const { return taskCount3d[2]; } TaskInfo() { assert(sizeof(TaskInfo) % 32 == 0); } } __attribute__((aligned(32))); @@ -539,8 +539,8 @@ lRunTask(void *ti) { // Actually run the task taskInfo->func(taskInfo->data, threadIndex, threadCount, taskInfo->taskIndex, taskInfo->taskCount(), - taskInfo->taskIndex1(), taskInfo->taskIndex2(), taskInfo->taskIndex3(), - taskInfo->taskCount1(), taskInfo->taskCount2(), taskInfo->taskCount3()); + taskInfo->taskIndex0(), taskInfo->taskIndex1(), taskInfo->taskIndex2(), + taskInfo->taskCount0(), taskInfo->taskCount1(), taskInfo->taskCount2()); } @@ -582,8 +582,8 @@ lRunTask(LPVOID param) { int threadIndex = 0; int threadCount = 1; ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), - ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), - ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); // Signal the event that this task is done ti->taskEvent.set(); @@ -685,8 +685,8 @@ lTaskEntry(void *arg) { TaskInfo *myTask = tg->GetTaskInfo(taskNumber); myTask->func(myTask->data, threadIndex, threadCount, myTask->taskIndex, myTask->taskCount(), - myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(), - myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3()); + myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(), + myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2()); // // Decrement the "number of unfinished tasks" counter in the task @@ -888,8 +888,8 @@ TaskGroup::Sync() { // // FIXME: bogus values for thread index/thread count here as well.. myTask->func(myTask->data, 0, 1, myTask->taskIndex, myTask->taskCount(), - myTask->taskIndex1(), myTask->taskIndex2(), myTask->taskIndex3(), - myTask->taskCount1(), myTask->taskCount2(), myTask->taskCount3()); + myTask->taskIndex0(), myTask->taskIndex1(), myTask->taskIndex2(), + myTask->taskCount0(), myTask->taskCount1(), myTask->taskCount2()); // // Decrement the number of unfinished tasks counter @@ -920,8 +920,8 @@ TaskGroup::Launch(int baseIndex, int count) { // Actually run the task. // Cilk does not expose the task -> thread mapping so we pretend it's 1:1 ti->func(ti->data, ti->taskIndex, ti->taskCount(), - ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), - ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); } } @@ -951,8 +951,8 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = omp_get_thread_num(); int threadCount = omp_get_num_threads(); ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), - ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), - ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); } } @@ -984,8 +984,8 @@ TaskGroup::Launch(int baseIndex, int count) { int threadCount = ti->taskCount; ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), - ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), - ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); }); } @@ -1013,8 +1013,8 @@ TaskGroup::Launch(int baseIndex, int count) { int threadIndex = ti->taskIndex; int threadCount = ti->taskCount; ti->func(ti->data, threadIndex, threadCount, ti->taskIndex, ti->taskCount(), - ti->taskIndex1(), ti->taskIndex2(), ti->taskIndex3(), - ti->taskCount1(), ti->taskCount2(), ti->taskCount3()); + ti->taskIndex0(), ti->taskIndex1(), ti->taskIndex2(), + ti->taskCount0(), ti->taskCount1(), ti->taskCount2()); }); } } @@ -1067,8 +1067,8 @@ FreeTaskGroup(TaskGroup *tg) { /////////////////////////////////////////////////////////////////////////// void -ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count1, int count2, int count3) { - const int count = count1*count2*count3; +ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count0, int count1, int count2) { + const int count = count0*count1*count2; TaskGroup *taskGroup; if (*taskGroupPtr == NULL) { InitTaskSystem(); @@ -1084,9 +1084,9 @@ ISPCLaunch(void **taskGroupPtr, void *func, void *data, int count1, int count2, ti->func = (TaskFuncType)func; ti->data = data; ti->taskIndex = i; - ti->taskCount3d[0] = count1; - ti->taskCount3d[1] = count2; - ti->taskCount3d[2] = count3; + ti->taskCount3d[0] = count0; + ti->taskCount3d[1] = count1; + ti->taskCount3d[2] = count2; } taskGroup->Launch(baseIndex, count); } diff --git a/func.cpp b/func.cpp index 086be6fe..af2cc05a 100644 --- a/func.cpp +++ b/func.cpp @@ -133,26 +133,26 @@ Function::Function(Symbol *s, Stmt *c) { taskCountSym = m->symbolTable->LookupVariable("taskCount"); Assert(taskCountSym); + taskIndexSym0 = m->symbolTable->LookupVariable("taskIndex0"); + Assert(taskIndexSym0); taskIndexSym1 = m->symbolTable->LookupVariable("taskIndex1"); Assert(taskIndexSym1); taskIndexSym2 = m->symbolTable->LookupVariable("taskIndex2"); Assert(taskIndexSym2); - taskIndexSym3 = m->symbolTable->LookupVariable("taskIndex3"); - Assert(taskIndexSym3); + taskCountSym0 = m->symbolTable->LookupVariable("taskCount0"); + Assert(taskCountSym0); taskCountSym1 = m->symbolTable->LookupVariable("taskCount1"); Assert(taskCountSym1); taskCountSym2 = m->symbolTable->LookupVariable("taskCount2"); Assert(taskCountSym2); - taskCountSym3 = m->symbolTable->LookupVariable("taskCount3"); - Assert(taskCountSym3); } else { threadIndexSym = threadCountSym = taskIndexSym = taskCountSym = NULL; - taskIndexSym1 = taskIndexSym2 = taskIndexSym3 = NULL; - taskCountSym1 = taskCountSym2 = taskCountSym3 = NULL; + taskIndexSym0 = taskIndexSym1 = taskIndexSym2 = NULL; + taskCountSym0 = taskCountSym1 = taskCountSym2 = NULL; } } @@ -244,12 +244,12 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, llvm::Value *threadCount = argIter++; llvm::Value *taskIndex = argIter++; llvm::Value *taskCount = argIter++; + llvm::Value *taskIndex0 = argIter++; llvm::Value *taskIndex1 = argIter++; llvm::Value *taskIndex2 = argIter++; - llvm::Value *taskIndex3 = argIter++; + llvm::Value *taskCount0 = argIter++; llvm::Value *taskCount1 = argIter++; llvm::Value *taskCount2 = argIter++; - llvm::Value *taskCount3 = argIter++; // Copy the function parameter values from the structure into local // storage @@ -282,19 +282,19 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function, taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount"); ctx->StoreInst(taskCount, taskCountSym->storagePtr); + taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0"); + ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr); taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1"); ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr); taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2"); ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr); - taskIndexSym3->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex3"); - ctx->StoreInst(taskIndex3, taskIndexSym3->storagePtr); + taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0"); + ctx->StoreInst(taskCount0, taskCountSym0->storagePtr); taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1"); ctx->StoreInst(taskCount1, taskCountSym1->storagePtr); taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2"); ctx->StoreInst(taskCount2, taskCountSym2->storagePtr); - taskCountSym3->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount3"); - ctx->StoreInst(taskCount3, taskCountSym3->storagePtr); } else { // Regular, non-task function diff --git a/func.h b/func.h index 4181bba5..88a96dbc 100644 --- a/func.h +++ b/func.h @@ -61,9 +61,9 @@ private: Symbol *maskSymbol; Symbol *threadIndexSym, *threadCountSym; Symbol *taskIndexSym, *taskCountSym; + Symbol *taskIndexSym0, *taskCountSym0; Symbol *taskIndexSym1, *taskCountSym1; Symbol *taskIndexSym2, *taskCountSym2; - Symbol *taskIndexSym3, *taskCountSym3; }; #endif // ISPC_FUNC_H diff --git a/parse.yy b/parse.yy index 653bba62..9a0377c5 100644 --- a/parse.yy +++ b/parse.yy @@ -2276,20 +2276,20 @@ static void lAddThreadIndexCountToSymbolTable(SourcePos pos) { Symbol *taskCountSym = new Symbol("taskCount", pos, type); m->symbolTable->AddVariable(taskCountSym); + Symbol *taskIndexSym0 = new Symbol("taskIndex0", pos, type); + m->symbolTable->AddVariable(taskIndexSym0); Symbol *taskIndexSym1 = new Symbol("taskIndex1", pos, type); m->symbolTable->AddVariable(taskIndexSym1); Symbol *taskIndexSym2 = new Symbol("taskIndex2", pos, type); m->symbolTable->AddVariable(taskIndexSym2); - Symbol *taskIndexSym3 = new Symbol("taskIndex3", pos, type); - m->symbolTable->AddVariable(taskIndexSym3); + Symbol *taskCountSym0 = new Symbol("taskCount0", pos, type); + m->symbolTable->AddVariable(taskCountSym0); Symbol *taskCountSym1 = new Symbol("taskCount1", pos, type); m->symbolTable->AddVariable(taskCountSym1); Symbol *taskCountSym2 = new Symbol("taskCount2", pos, type); m->symbolTable->AddVariable(taskCountSym2); - Symbol *taskCountSym3 = new Symbol("taskCount3", pos, type); - m->symbolTable->AddVariable(taskCountSym3); } diff --git a/type.cpp b/type.cpp index 3ae0cab4..516276f0 100644 --- a/type.cpp +++ b/type.cpp @@ -2961,12 +2961,12 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const { callTypes.push_back(LLVMTypes::Int32Type); // threadCount callTypes.push_back(LLVMTypes::Int32Type); // taskIndex callTypes.push_back(LLVMTypes::Int32Type); // taskCount + callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0 callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1 callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2 - callTypes.push_back(LLVMTypes::Int32Type); // taskIndex3 + callTypes.push_back(LLVMTypes::Int32Type); // taskCount0 callTypes.push_back(LLVMTypes::Int32Type); // taskCount1 callTypes.push_back(LLVMTypes::Int32Type); // taskCount2 - callTypes.push_back(LLVMTypes::Int32Type); // taskCount3 } else // Otherwise we already have the types of the arguments From 84a7a5d1cbb85bb29c210828056326b83cea2d1b Mon Sep 17 00:00:00 2001 From: Evghenii Date: Sat, 26 Oct 2013 16:16:28 +0200 Subject: [PATCH 08/13] added tests for 3d launch --- test_static.cpp | 15 +++++++++------ tests/launch-8.ispc | 42 ++++++++++++++++++++++++++++++++++++++++++ tests/launch-9.ispc | 42 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 93 insertions(+), 6 deletions(-) create mode 100644 tests/launch-8.ispc create mode 100644 tests/launch-9.ispc diff --git a/test_static.cpp b/test_static.cpp index 8985fdb3..fceeb64e 100644 --- a/test_static.cpp +++ b/test_static.cpp @@ -62,17 +62,20 @@ extern "C" { extern void f_di(float *result, double *a, int *b); extern void result(float *val); - void ISPCLaunch(void **handlePtr, void *f, void *d, int); + void ISPCLaunch(void **handlePtr, void *f, void *d, int,int,int); void ISPCSync(void *handle); void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment); } - -void ISPCLaunch(void **handle, void *f, void *d, int count) { + +void ISPCLaunch(void **handle, void *f, void *d, int count0, int count1, int count2) { *handle = (void *)0xdeadbeef; - typedef void (*TaskFuncType)(void *, int, int, int, int); + typedef void (*TaskFuncType)(void *, int, int, int, int, int,int,int, int,int,int); TaskFuncType func = (TaskFuncType)f; - for (int i = 0; i < count; ++i) - func(d, 0, 1, i, count); + int count = count0*count1*count2, idx = 0; + for (int k = 0; k < count2; ++k) + for (int j = 0; j < count1; ++j) + for (int i = 0; i < count0; ++i) + func(d, 0, 1, idx++, count, i,j,k,count0,count1,count2); } void ISPCSync(void *) { diff --git a/tests/launch-8.ispc b/tests/launch-8.ispc new file mode 100644 index 00000000..a0b976e4 --- /dev/null +++ b/tests/launch-8.ispc @@ -0,0 +1,42 @@ + +export uniform int width() { return programCount; } + + +#define N0 10 +#define N1 20 +#define N2 50 +static uniform float array[N2][N1][N0]; + +task void x(const float f) { + uniform int j; + + assert(taskCount == N0*N1*N2); + assert(taskCount0 == N0); + assert(taskCount1 == N1); + assert(taskCount2 == N2); + assert(taskIndex == taskIndex0 + N0*(taskIndex1 + N1*taskIndex2)); + assert(taskIndex0 < N0); + assert(taskIndex1 < N1); + assert(taskIndex2 < N2); + + const uniform int i0 = taskIndex0; + const uniform int i1 = taskIndex1; + const uniform int i2 = taskIndex2; + const uniform int i = taskIndex; + array[i2][i1][i0] = i / 10000.; + cfor (j = 0; j < 10000; ++j) + array[i2][i1][i0] = sin(array[i2][i1][i0]); + if (array[i2][i1][i0] < .02) + array[i2][i1][i0] = i; +} +export void f_f(uniform float RET[], uniform float fFOO[]) { + float f = fFOO[programIndex]; + launch[N2][N1][N0] x(f); + sync; + RET[programIndex] = array[N2-1][N1-1][N0-1]; +} + + +export void result(uniform float RET[]) { + RET[programIndex] = 9999.000000; +} diff --git a/tests/launch-9.ispc b/tests/launch-9.ispc new file mode 100644 index 00000000..761b070c --- /dev/null +++ b/tests/launch-9.ispc @@ -0,0 +1,42 @@ + +export uniform int width() { return programCount; } + + +#define N0 10 +#define N1 20 +#define N2 50 +static uniform float array[N2][N1][N0]; + +task void x(const float f) { + uniform int j; + + assert(taskCount == N0*N1*N2); + assert(taskCount0 == N0); + assert(taskCount1 == N1); + assert(taskCount2 == N2); + assert(taskIndex == taskIndex0 + N0*(taskIndex1 + N1*taskIndex2)); + assert(taskIndex0 < N0); + assert(taskIndex1 < N1); + assert(taskIndex2 < N2); + + const uniform int i0 = taskIndex0; + const uniform int i1 = taskIndex1; + const uniform int i2 = taskIndex2; + const uniform int i = taskIndex; + array[i2][i1][i0] = i / 10000.; + cfor (j = 0; j < 10000; ++j) + array[i2][i1][i0] = sin(array[i2][i1][i0]); + if (array[i2][i1][i0] < .02) + array[i2][i1][i0] = i; +} +export void f_f(uniform float RET[], uniform float fFOO[]) { + float f = fFOO[programIndex]; + launch[N0,N1,N2] x(f); + sync; + RET[programIndex] = array[N2-1][N1-1][N0-1]; +} + + +export void result(uniform float RET[]) { + RET[programIndex] = 9999.000000; +} From 2951cad365f7dbaf4bc3f73b0a822ddd4a40357b Mon Sep 17 00:00:00 2001 From: evghenii Date: Mon, 9 Dec 2013 13:10:26 +0100 Subject: [PATCH 09/13] added description for multi-dimensional tasking --- docs/ispc.rst | 63 +++++++++++++++++++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 12 deletions(-) diff --git a/docs/ispc.rst b/docs/ispc.rst index eac9b24e..3aab730b 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -3012,8 +3012,8 @@ Intel® Cilk(tm), Intel® Thread Building Blocks or another task system), and for tasks to use ``ispc`` for SPMD parallelism across the vector lanes as appropriate. Alternatively, ``ispc`` also has support for launching tasks from ``ispc`` code. The approach is similar to Intel® Cilk's task launch -feature. (See the ``examples/mandelbrot_tasks`` example to see it used in -a small example.) +feature. (See the ``examples/mandelbrot_tasks`` and +``examples/mandelbrot_tasks3d`` examples to see it used in a small example.) Any function that is launched as a task must be declared with the ``task`` qualifier: @@ -3108,6 +3108,38 @@ executing the current task. The ``threadIndex`` can be used for accessing data that is private to the current thread and thus doesn't require synchronization to access under parallel execution. +The tasking system also supports multi-dimensional partitioning (currently up +to three dimensions). To launch a 3D grid of tasks, for example with ``N0``, +``N1`` and ``N2`` tasks in x-, y- and z-dimension respectively + +:: + + float data[N2][N1][N0] + task void foo_task() + { + data[taskIndex2][taskIndex1][threadIndex0] = taskIndex; + } + +we use the following ``launch`` expressions: + +:: + + launch [N2][N1][N0] foo_task() + +or + +:: + + launch [N0,N1,N2] foo_task() + +Value of ``taskIndex`` is equal to ``taskIndex0 + taskCount0*(taskIndex1 + +taskCount1*taskIndex2)`` and it ranges from ``0`` to ``taskCount-1``, where +``taskCount = taskCount0*taskCount1*taskCount2``. If ``N1`` or/and ``N2`` are +not specified in the ``launch`` expression, a value of ``1`` is assumed. +Finally, for an one-dimensional grid of tasks, ``taskIndex`` is equivalent to +``taskIndex0`` and ``taskCount`` is equivalent to ``taskCount0``. + + Task Parallelism: Runtime Requirements -------------------------------------- @@ -3138,7 +3170,7 @@ manage tasks in ``ispc``: :: void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment); - void ISPCLaunch(void **handlePtr, void *f, void *data, int count); + void ISPCLaunch(void **handlePtr, void *f, void *data, int count0, int count1, int count2); void ISPCSync(void *handle); All three of these functions take an opaque handle (or a pointer to an @@ -3175,16 +3207,20 @@ tasks. Each ``launch`` statement in ``ispc`` code causes a call to after the handle pointer to the function are relatively straightforward; the ``void *f`` parameter holds a pointer to a function to call to run the work for this task, ``data`` holds a pointer to data to pass to this -function, and ``count`` is the number of instances of this function to -enqueue for asynchronous execution. (In other words, ``count`` corresponds -to the value ``n`` in a multiple-task launch statement like ``launch[n]``.) +function, and ``count0``, ``count1`` and ``count2`` are the number of instances +of this function to enqueue for asynchronous execution. (In other words, +``count0``, ``count1`` and ``count2`` correspond to the value ``n0``, ``n1`` +and ``n2`` in a multiple-task launch statement like ``launch[n2][n1][n0]`` or +``launch [n0,n1,n2]`` respectively.) The signature of the provided function pointer ``f`` is :: void (*TaskFuncPtr)(void *data, int threadIndex, int threadCount, - int taskIndex, int taskCount) + int taskIndex, int taskCount, + int taskIndex0, int taskIndex1, int taskIndex2, + int taskCount0, int taskCount1, int taskCount2); When this function pointer is called by one of the hardware threads managed by the task system, the ``data`` pointer passed to ``ISPCLaunch()`` should @@ -3194,11 +3230,14 @@ number of hardware threads that have been spawned to run tasks and uniquely identifying the hardware thread that is running the task. (These values can be used to index into thread-local storage.) -The value of ``taskCount`` should be the number of tasks launched in the -``launch`` statement that caused the call to ``ISPCLaunch()`` and each of -the calls to this function should be given a unique value of ``taskIndex`` -between zero and ``taskCount``, to distinguish which of the instances -of the set of launched tasks is running. +The value of ``taskCount`` should be the total number of tasks launched in the +``launch`` statement (it must be equal to ``taskCount0*taskCount1*taskCount2``) +that caused the call to ``ISPCLaunch()`` and each of the calls to this function +should be given a unique value of ``taskIndex``, ``taskIndex0``, ``taskIndex1`` +and ``taskIndex2`` between zero and ``taskCount``, ``taskCount0``, +``taskCount1`` and ``taskCount2`` respectively, with ``taskIndex = taskIndex0 ++ taskCount0*(taskIndex1 + taskCount1*taskIndex2)``, to distinguish which of +the instances of the set of launched tasks is running. From c06ec92d0d79acaa398a9c109baa52525b22cd1d Mon Sep 17 00:00:00 2001 From: evghenii Date: Fri, 13 Dec 2013 11:49:11 +0100 Subject: [PATCH 10/13] added commas, added multi-dimensional tasking to mandelbrot_tasks & removed mandelbrot_task3d. Also adjusted documentaiton a bit --- builtins/util.m4 | 2 +- docs/ispc.rst | 3 +- examples/mandelbrot_tasks/Makefile | 2 +- .../mandelbrot_tasks/mandelbrot_tasks.cpp | 3 +- .../mandelbrot_tasks/mandelbrot_tasks.ispc | 29 ++- examples/mandelbrot_tasks3d/.gitignore | 2 - examples/mandelbrot_tasks3d/Makefile | 8 - .../mandelbrot_tasks.vcxproj | 180 ------------------ .../mandelbrot_tasks3d/mandelbrot_tasks3d.cpp | 146 -------------- .../mandelbrot_tasks3d.ispc | 99 ---------- .../mandelbrot_tasks_serial.cpp | 68 ------- examples/tasksys.cpp | 2 +- test_static.cpp | 2 +- 13 files changed, 28 insertions(+), 518 deletions(-) delete mode 100644 examples/mandelbrot_tasks3d/.gitignore delete mode 100644 examples/mandelbrot_tasks3d/Makefile delete mode 100644 examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj delete mode 100644 examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp delete mode 100644 examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc delete mode 100644 examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp diff --git a/builtins/util.m4 b/builtins/util.m4 index c90e8adc..1580dc08 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1813,7 +1813,7 @@ define(`stdlib_core', ` declare i32 @__fast_masked_vload() declare i8* @ISPCAlloc(i8**, i64, i32) nounwind -declare void @ISPCLaunch(i8**, i8*, i8*, i32,i32,i32) nounwind +declare void @ISPCLaunch(i8**, i8*, i8*, i32, i32, i32) nounwind declare void @ISPCSync(i8*) nounwind declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind diff --git a/docs/ispc.rst b/docs/ispc.rst index 3aab730b..04f478dc 100644 --- a/docs/ispc.rst +++ b/docs/ispc.rst @@ -3012,8 +3012,7 @@ Intel® Cilk(tm), Intel® Thread Building Blocks or another task system), and for tasks to use ``ispc`` for SPMD parallelism across the vector lanes as appropriate. Alternatively, ``ispc`` also has support for launching tasks from ``ispc`` code. The approach is similar to Intel® Cilk's task launch -feature. (See the ``examples/mandelbrot_tasks`` and -``examples/mandelbrot_tasks3d`` examples to see it used in a small example.) +feature. (Check the ``examples/mandelbrot_tasks`` example to see how it is used.) Any function that is launched as a task must be declared with the ``task`` qualifier: diff --git a/examples/mandelbrot_tasks/Makefile b/examples/mandelbrot_tasks/Makefile index 1a565ffd..a50631ab 100644 --- a/examples/mandelbrot_tasks/Makefile +++ b/examples/mandelbrot_tasks/Makefile @@ -2,7 +2,7 @@ EXAMPLE=mandelbrot_tasks CPP_SRC=mandelbrot_tasks.cpp mandelbrot_tasks_serial.cpp ISPC_SRC=mandelbrot_tasks.ispc -ISPC_IA_TARGETS=sse2,sse4-x2,avx-x2 +ISPC_IA_TARGETS=sse2,sse4-x2,avx ISPC_ARM_TARGETS=neon include ../common.mk diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.cpp b/examples/mandelbrot_tasks/mandelbrot_tasks.cpp index 698daf0f..802afde0 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.cpp +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.cpp @@ -38,7 +38,8 @@ #pragma warning (disable: 4305) #endif -#include +#include +#include #include #include #include "../timing.h" diff --git a/examples/mandelbrot_tasks/mandelbrot_tasks.ispc b/examples/mandelbrot_tasks/mandelbrot_tasks.ispc index 84d4ccd4..f9b0be4c 100644 --- a/examples/mandelbrot_tasks/mandelbrot_tasks.ispc +++ b/examples/mandelbrot_tasks/mandelbrot_tasks.ispc @@ -57,21 +57,26 @@ task void mandelbrot_scanline(uniform float x0, uniform float dx, uniform float y0, uniform float dy, uniform int width, uniform int height, - uniform int span, + uniform int xspan, uniform int yspan, uniform int maxIterations, uniform int output[]) { - uniform int ystart = taskIndex * span; - uniform int yend = min((taskIndex+1) * span, (unsigned int)height); + const uniform int xstart = taskIndex0 * xspan; + const uniform int xend = min(xstart + xspan, width); - foreach (yi = ystart ... yend, xi = 0 ... width) { + const uniform int ystart = taskIndex1 * yspan; + const uniform int yend = min(ystart + yspan, height); + + + foreach (yi = ystart ... yend, xi = xstart ... xend) { float x = x0 + xi * dx; float y = y0 + yi * dy; int index = yi * width + xi; output[index] = mandel(x, y, maxIterations); } + } - +#if 1 export void mandelbrot_ispc(uniform float x0, uniform float y0, uniform float x1, uniform float y1, @@ -79,8 +84,16 @@ mandelbrot_ispc(uniform float x0, uniform float y0, uniform int maxIterations, uniform int output[]) { uniform float dx = (x1 - x0) / width; uniform float dy = (y1 - y0) / height; - uniform int span = 4; + const uniform int xspan = max(32, programCount*2); /* make sure it is big enough to avoid false-sharing */ + const uniform int yspan = 16; - launch[height/span] mandelbrot_scanline(x0, dx, y0, dy, width, height, span, - maxIterations, output); + +#if 1 + launch [width/xspan, height/yspan] +#else + launch [height/yspan][width/xspan] +#endif + mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan, + maxIterations, output); } +#endif diff --git a/examples/mandelbrot_tasks3d/.gitignore b/examples/mandelbrot_tasks3d/.gitignore deleted file mode 100644 index c2471c27..00000000 --- a/examples/mandelbrot_tasks3d/.gitignore +++ /dev/null @@ -1,2 +0,0 @@ -mandelbrot -*.ppm diff --git a/examples/mandelbrot_tasks3d/Makefile b/examples/mandelbrot_tasks3d/Makefile deleted file mode 100644 index 3dd44d65..00000000 --- a/examples/mandelbrot_tasks3d/Makefile +++ /dev/null @@ -1,8 +0,0 @@ - -EXAMPLE=mandelbrot_tasks3d -CPP_SRC=mandelbrot_tasks3d.cpp mandelbrot_tasks_serial.cpp -ISPC_SRC=mandelbrot_tasks3d.ispc -ISPC_IA_TARGETS=avx,sse2,sse4 -ISPC_ARM_TARGETS=neon - -include ../common.mk diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj b/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj deleted file mode 100644 index 3a8fca79..00000000 --- a/examples/mandelbrot_tasks3d/mandelbrot_tasks.vcxproj +++ /dev/null @@ -1,180 +0,0 @@ - - - - - Debug - Win32 - - - Debug - x64 - - - Release - Win32 - - - Release - x64 - - - - {E80DA7D4-AB22-4648-A068-327307156BE6} - Win32Proj - mandelbrot_tasks - - - - Application - true - Unicode - - - Application - true - Unicode - - - Application - false - true - Unicode - - - Application - false - true - Unicode - - - - - - - - - - - - - - - - - - - true - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - true - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - false - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - false - $(ProjectDir)..\..;$(ExecutablePath) - mandelbrot_tasks - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - - - Level3 - Disabled - WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - true - Fast - - - Console - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - Level3 - - - MaxSpeed - true - true - WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions) - $(TargetDir) - Fast - - - Console - true - true - true - - - - - - - - - - Document - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 - - ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --target=sse2,sse4-x2,avx-x2 - - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - $(TargetDir)%(Filename).obj;$(TargetDir)%(Filename)_sse2.obj;$(TargetDir)%(Filename)_sse4.obj;$(TargetDir)%(Filename)_avx.obj;$(TargetDir)%(Filename)_ispc.h - - - - - - diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp deleted file mode 100644 index 9cbb966a..00000000 --- a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.cpp +++ /dev/null @@ -1,146 +0,0 @@ -/* - Copyright (c) 2010-2011, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS - IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -#ifdef _MSC_VER -#define _CRT_SECURE_NO_WARNINGS -#define NOMINMAX -#pragma warning (disable: 4244) -#pragma warning (disable: 4305) -#endif - -#include -#include -#include -#include "../timing.h" -#include "mandelbrot_tasks3d_ispc.h" -using namespace ispc; - -extern void mandelbrot_serial(float x0, float y0, float x1, float y1, - int width, int height, int maxIterations, - int output[]); - -/* Write a PPM image file with the image of the Mandelbrot set */ -static void -writePPM(int *buf, int width, int height, const char *fn) { - FILE *fp = fopen(fn, "wb"); - fprintf(fp, "P6\n"); - fprintf(fp, "%d %d\n", width, height); - fprintf(fp, "255\n"); - for (int i = 0; i < width*height; ++i) { - // Map the iteration count to colors by just alternating between - // two greys. - char c = (buf[i] & 0x1) ? 240 : 20; - for (int j = 0; j < 3; ++j) - fputc(c, fp); - } - fclose(fp); - printf("Wrote image file %s\n", fn); -} - - -static void usage() { - fprintf(stderr, "usage: mandelbrot [--scale=]\n"); - exit(1); -} - -int main(int argc, char *argv[]) { - unsigned int width = 1536; - unsigned int height = 1024; - float x0 = -2; - float x1 = 1; - float y0 = -1; - float y1 = 1; - - if (argc == 1) - ; - else if (argc == 2) { - if (strncmp(argv[1], "--scale=", 8) == 0) { - float scale = atof(argv[1] + 8); - if (scale == 0.f) - usage(); - width *= scale; - height *= scale; - // round up to multiples of 16 - width = (width + 0xf) & ~0xf; - height = (height + 0xf) & ~0xf; - } - else - usage(); - } - else - usage(); - - int maxIterations = 512; - int *buf = new int[width*height]; - - // - // Compute the image using the ispc implementation; report the minimum - // time of three runs. - // - double minISPC = 1e30; - for (int i = 0; i < 3; ++i) { - // Clear out the buffer - for (unsigned int i = 0; i < width * height; ++i) - buf[i] = 0; - reset_and_start_timer(); - mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf); - double dt = get_elapsed_mcycles(); - minISPC = std::min(minISPC, dt); - } - - printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC); - writePPM(buf, width, height, "mandelbrot-ispc.ppm"); - - - // - // And run the serial implementation 3 times, again reporting the - // minimum time. - // - double minSerial = 1e30; - for (int i = 0; i < 3; ++i) { - // Clear out the buffer - for (unsigned int i = 0; i < width * height; ++i) - buf[i] = 0; - reset_and_start_timer(); - mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf); - double dt = get_elapsed_mcycles(); - minSerial = std::min(minSerial, dt); - } - - printf("[mandelbrot serial]:\t\t[%.3f] million cycles\n", minSerial); - writePPM(buf, width, height, "mandelbrot-serial.ppm"); - - printf("\t\t\t\t(%.2fx speedup from ISPC + tasks)\n", minSerial/minISPC); - - return 0; -} diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc b/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc deleted file mode 100644 index 395bdca4..00000000 --- a/examples/mandelbrot_tasks3d/mandelbrot_tasks3d.ispc +++ /dev/null @@ -1,99 +0,0 @@ -/* - Copyright (c) 2010-2012, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS - IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - -static inline int -mandel(float c_re, float c_im, int count) { - float z_re = c_re, z_im = c_im; - int i; - for (i = 0; i < count; ++i) { - if (z_re * z_re + z_im * z_im > 4.) - break; - - float new_re = z_re*z_re - z_im*z_im; - float new_im = 2.f * z_re * z_im; - unmasked { - z_re = c_re + new_re; - z_im = c_im + new_im; - } - } - - return i; -} - - -/* Task to compute the Mandelbrot iterations for a single scanline. - */ -task void -mandelbrot_scanline(uniform float x0, uniform float dx, - uniform float y0, uniform float dy, - uniform int width, uniform int height, - uniform int xspan, uniform int yspan, - uniform int maxIterations, uniform int output[]) { - const uniform int xstart = taskIndex0 * xspan; - const uniform int xend = min(xstart + xspan, width); - - const uniform int ystart = taskIndex1 * yspan; - const uniform int yend = min(ystart + yspan, height); - - - foreach (yi = ystart ... yend, xi = xstart ... xend) { - float x = x0 + xi * dx; - float y = y0 + yi * dy; - - int index = yi * width + xi; - output[index] = mandel(x, y, maxIterations); - } - -} - -#if 1 -export void -mandelbrot_ispc(uniform float x0, uniform float y0, - uniform float x1, uniform float y1, - uniform int width, uniform int height, - uniform int maxIterations, uniform int output[]) { - uniform float dx = (x1 - x0) / width; - uniform float dy = (y1 - y0) / height; - const uniform int xspan = 16; /* make sure it is big enough to avoid false-sharing */ - const uniform int yspan = 16; - - -#if 1 - launch [width/xspan, height/yspan] -#else - launch [height/yspan][width/xspan] -#endif - mandelbrot_scanline(x0, dx, y0, dy, width, height, xspan, yspan, - maxIterations, output); -} -#endif diff --git a/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp b/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp deleted file mode 100644 index a76fb5ca..00000000 --- a/examples/mandelbrot_tasks3d/mandelbrot_tasks_serial.cpp +++ /dev/null @@ -1,68 +0,0 @@ -/* - Copyright (c) 2010-2011, Intel Corporation - All rights reserved. - - Redistribution and use in source and binary forms, with or without - modification, are permitted provided that the following conditions are - met: - - * Redistributions of source code must retain the above copyright - notice, this list of conditions and the following disclaimer. - - * Redistributions in binary form must reproduce the above copyright - notice, this list of conditions and the following disclaimer in the - documentation and/or other materials provided with the distribution. - - * Neither the name of Intel Corporation nor the names of its - contributors may be used to endorse or promote products derived from - this software without specific prior written permission. - - - THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS - IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED - TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A - PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER - OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR - PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF - LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING - NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS - SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -*/ - - -static int mandel(float c_re, float c_im, int count) { - float z_re = c_re, z_im = c_im; - int i; - for (i = 0; i < count; ++i) { - if (z_re * z_re + z_im * z_im > 4.f) - break; - - float new_re = z_re*z_re - z_im*z_im; - float new_im = 2.f * z_re * z_im; - z_re = c_re + new_re; - z_im = c_im + new_im; - } - - return i; -} - -void mandelbrot_serial(float x0, float y0, float x1, float y1, - int width, int height, int maxIterations, - int output[]) -{ - float dx = (x1 - x0) / width; - float dy = (y1 - y0) / height; - - for (int j = 0; j < height; j++) { - for (int i = 0; i < width; ++i) { - float x = x0 + i * dx; - float y = y0 + j * dy; - - int index = (j * width + i); - output[index] = mandel(x, y, maxIterations); - } - } -} - diff --git a/examples/tasksys.cpp b/examples/tasksys.cpp index 6bc60129..b914068e 100644 --- a/examples/tasksys.cpp +++ b/examples/tasksys.cpp @@ -204,7 +204,7 @@ struct TaskInfo { // ispc expects these functions to have C linkage / not be mangled extern "C" { - void ISPCLaunch(void **handlePtr, void *f, void *data, int countx,int county, int countz); + void ISPCLaunch(void **handlePtr, void *f, void *data, int countx, int county, int countz); void *ISPCAlloc(void **handlePtr, int64_t size, int32_t alignment); void ISPCSync(void *handle); } diff --git a/test_static.cpp b/test_static.cpp index fceeb64e..27a5b136 100644 --- a/test_static.cpp +++ b/test_static.cpp @@ -69,7 +69,7 @@ extern "C" { void ISPCLaunch(void **handle, void *f, void *d, int count0, int count1, int count2) { *handle = (void *)0xdeadbeef; - typedef void (*TaskFuncType)(void *, int, int, int, int, int,int,int, int,int,int); + typedef void (*TaskFuncType)(void *, int, int, int, int, int, int, int, int, int, int); TaskFuncType func = (TaskFuncType)f; int count = count0*count1*count2, idx = 0; for (int k = 0; k < count2; ++k) From b506c92d21e68fd859f6835a255be43edcf43fd9 Mon Sep 17 00:00:00 2001 From: evghenii Date: Fri, 13 Dec 2013 13:55:58 +0100 Subject: [PATCH 11/13] restored-x2 --- examples/mandelbrot_tasks/Makefile | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/mandelbrot_tasks/Makefile b/examples/mandelbrot_tasks/Makefile index a50631ab..1a565ffd 100644 --- a/examples/mandelbrot_tasks/Makefile +++ b/examples/mandelbrot_tasks/Makefile @@ -2,7 +2,7 @@ EXAMPLE=mandelbrot_tasks CPP_SRC=mandelbrot_tasks.cpp mandelbrot_tasks_serial.cpp ISPC_SRC=mandelbrot_tasks.ispc -ISPC_IA_TARGETS=sse2,sse4-x2,avx +ISPC_IA_TARGETS=sse2,sse4-x2,avx-x2 ISPC_ARM_TARGETS=neon include ../common.mk From 63ecf009ecbe6864511a64c6ddb8215aacae50a3 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Tue, 17 Dec 2013 15:06:29 +0100 Subject: [PATCH 12/13] fix compilation for Visual Studio --- expr.cpp | 11 ++++++++--- expr.h | 2 +- 2 files changed, 9 insertions(+), 4 deletions(-) diff --git a/expr.cpp b/expr.cpp index 60d9ce66..9f75ab08 100644 --- a/expr.cpp +++ b/expr.cpp @@ -3544,9 +3544,14 @@ FunctionCallExpr::FunctionCallExpr(Expr *f, ExprList *a, SourcePos p, : Expr(p), isLaunch(il) { func = f; args = a; - launchCountExpr[0] = lce[0]; - launchCountExpr[1] = lce[1]; - launchCountExpr[2] = lce[2]; + if (lce != NULL) + { + launchCountExpr[0] = lce[0]; + launchCountExpr[1] = lce[1]; + launchCountExpr[2] = lce[2]; + } + else + launchCountExpr[0] = launchCountExpr[1] = launchCountExpr[2] = NULL; } diff --git a/expr.h b/expr.h index 0d46191b..e4d7e07b 100644 --- a/expr.h +++ b/expr.h @@ -247,7 +247,7 @@ class FunctionCallExpr : public Expr { public: FunctionCallExpr(Expr *func, ExprList *args, SourcePos p, bool isLaunch = false, - Expr *launchCountExpr[3] = (Expr*[3]){NULL, NULL, NULL}); + Expr *launchCountExpr[3] = NULL); llvm::Value *GetValue(FunctionEmitContext *ctx) const; llvm::Value *GetLValue(FunctionEmitContext *ctx) const; From 59b989d243eb47c1135d464da2badabf5eee7ec2 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Tue, 17 Dec 2013 16:06:20 +0100 Subject: [PATCH 13/13] fix for --target=sse4-i18x16 --- tests/launch-8.ispc | 16 ++++++++-------- tests/launch-9.ispc | 16 ++++++++-------- 2 files changed, 16 insertions(+), 16 deletions(-) diff --git a/tests/launch-8.ispc b/tests/launch-8.ispc index a0b976e4..eacba673 100644 --- a/tests/launch-8.ispc +++ b/tests/launch-8.ispc @@ -10,14 +10,14 @@ static uniform float array[N2][N1][N0]; task void x(const float f) { uniform int j; - assert(taskCount == N0*N1*N2); - assert(taskCount0 == N0); - assert(taskCount1 == N1); - assert(taskCount2 == N2); - assert(taskIndex == taskIndex0 + N0*(taskIndex1 + N1*taskIndex2)); - assert(taskIndex0 < N0); - assert(taskIndex1 < N1); - assert(taskIndex2 < N2); + assert(taskCount == (int32)N0*N1*N2); + assert(taskCount0 == (int32)N0); + assert(taskCount1 == (int32)N1); + assert(taskCount2 == (int32)N2); + assert(taskIndex == (int32)taskIndex0 + (int32)N0*(taskIndex1 +(int32) N1*taskIndex2)); + assert(taskIndex0 < (int32)N0); + assert(taskIndex1 < (int32)N1); + assert(taskIndex2 < (int32)N2); const uniform int i0 = taskIndex0; const uniform int i1 = taskIndex1; diff --git a/tests/launch-9.ispc b/tests/launch-9.ispc index 761b070c..1952e8e7 100644 --- a/tests/launch-9.ispc +++ b/tests/launch-9.ispc @@ -10,14 +10,14 @@ static uniform float array[N2][N1][N0]; task void x(const float f) { uniform int j; - assert(taskCount == N0*N1*N2); - assert(taskCount0 == N0); - assert(taskCount1 == N1); - assert(taskCount2 == N2); - assert(taskIndex == taskIndex0 + N0*(taskIndex1 + N1*taskIndex2)); - assert(taskIndex0 < N0); - assert(taskIndex1 < N1); - assert(taskIndex2 < N2); + assert(taskCount == (int32)N0*N1*N2); + assert(taskCount0 == (int32)N0); + assert(taskCount1 == (int32)N1); + assert(taskCount2 == (int32)N2); + assert(taskIndex == (int32)taskIndex0 + (int32)N0*(taskIndex1 +(int32) N1*taskIndex2)); + assert(taskIndex0 < (int32)N0); + assert(taskIndex1 < (int32)N1); + assert(taskIndex2 < (int32)N2); const uniform int i0 = taskIndex0; const uniform int i1 = taskIndex1;