From 268be7f0b5012031e5fa0c435b3dd5bd44f3b230 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Wed, 13 Nov 2013 11:19:10 +0100 Subject: [PATCH] fixed ISPCSync functionality --- builtins/target-nvptx64.ll | 3 ++- ctx.cpp | 48 ++++++++++++++++++++++------------ examples_cuda/aobench/ao1.ispc | 1 - 3 files changed, 33 insertions(+), 19 deletions(-) diff --git a/builtins/target-nvptx64.ll b/builtins/target-nvptx64.ll index 2f2f004a..2484eb1e 100644 --- a/builtins/target-nvptx64.ll +++ b/builtins/target-nvptx64.ll @@ -102,7 +102,8 @@ define i32 @__lanemask_lt() nounwind readnone alwaysinline define i8* @ISPCAlloc(i8**, i64, i32) nounwind alwaysinline { - ret i8* null + %ptr = inttoptr i64 1 to i8* + ret i8* %ptr } ;; this call allocate parameter buffer for kernel launch diff --git a/ctx.cpp b/ctx.cpp index f33044f8..1ce6588e 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -3673,29 +3673,43 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee, void FunctionEmitContext::SyncInst() { - llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr); - llvm::Value *nullPtrValue = + if (!g->target->isPTX()) + { + llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr); + llvm::Value *nullPtrValue = llvm::Constant::getNullValue(LLVMTypes::VoidPointerType); - llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp, - llvm::CmpInst::ICMP_NE, - launchGroupHandle, nullPtrValue); - llvm::BasicBlock *bSync = CreateBasicBlock("call_sync"); - llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync"); - BranchInst(bSync, bPostSync, nonNull); + llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp, + llvm::CmpInst::ICMP_NE, + launchGroupHandle, nullPtrValue); + llvm::BasicBlock *bSync = CreateBasicBlock("call_sync"); + llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync"); + BranchInst(bSync, bPostSync, nonNull); - SetCurrentBasicBlock(bSync); - llvm::Function *fsync = m->module->getFunction("ISPCSync"); - if (fsync == NULL) + SetCurrentBasicBlock(bSync); + llvm::Function *fsync = m->module->getFunction("ISPCSync"); + if (fsync == NULL) FATAL("Couldn't find ISPCSync declaration?!"); - CallInst(fsync, NULL, launchGroupHandle, ""); + CallInst(fsync, NULL, launchGroupHandle, ""); - // zero out the handle so that if ISPCLaunch is called again in this - // function, it knows it's starting out from scratch - StoreInst(nullPtrValue, launchGroupHandlePtr); + // zero out the handle so that if ISPCLaunch is called again in this + // function, it knows it's starting out from scratch + StoreInst(nullPtrValue, launchGroupHandlePtr); - BranchInst(bPostSync); + BranchInst(bPostSync); - SetCurrentBasicBlock(bPostSync); + SetCurrentBasicBlock(bPostSync); + } + else + { + llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr); + llvm::Value *nullPtrValue = + llvm::Constant::getNullValue(LLVMTypes::VoidPointerType); + llvm::Function *fsync = m->module->getFunction("ISPCSync"); + if (fsync == NULL) + FATAL("Couldn't find ISPCSync declaration?!"); + CallInst(fsync, NULL, launchGroupHandle, ""); + StoreInst(nullPtrValue, launchGroupHandlePtr); + } } diff --git a/examples_cuda/aobench/ao1.ispc b/examples_cuda/aobench/ao1.ispc index a6ed8c85..cccb3d7b 100644 --- a/examples_cuda/aobench/ao1.ispc +++ b/examples_cuda/aobench/ao1.ispc @@ -313,5 +313,4 @@ export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples, const uniform int ntilex = (w+TILEX-1)/TILEX; const uniform int ntiley = (h+TILEY-1)/TILEY; launch[ntilex,ntiley] ao_task(w, h, nsubsamples, image); - sync; }