fixed ISPCSync functionality

This commit is contained in:
Evghenii
2013-11-13 11:19:10 +01:00
parent 55bf0d23c2
commit 268be7f0b5
3 changed files with 33 additions and 19 deletions

View File

@@ -102,7 +102,8 @@ define i32 @__lanemask_lt() nounwind readnone alwaysinline
define i8* @ISPCAlloc(i8**, i64, i32) nounwind alwaysinline define i8* @ISPCAlloc(i8**, i64, i32) nounwind alwaysinline
{ {
ret i8* null %ptr = inttoptr i64 1 to i8*
ret i8* %ptr
} }
;; this call allocate parameter buffer for kernel launch ;; this call allocate parameter buffer for kernel launch

14
ctx.cpp
View File

@@ -3673,6 +3673,8 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
void void
FunctionEmitContext::SyncInst() { FunctionEmitContext::SyncInst() {
if (!g->target->isPTX())
{
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr); llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
llvm::Value *nullPtrValue = llvm::Value *nullPtrValue =
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType); llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
@@ -3697,6 +3699,18 @@ FunctionEmitContext::SyncInst() {
SetCurrentBasicBlock(bPostSync); SetCurrentBasicBlock(bPostSync);
} }
else
{
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
llvm::Value *nullPtrValue =
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
llvm::Function *fsync = m->module->getFunction("ISPCSync");
if (fsync == NULL)
FATAL("Couldn't find ISPCSync declaration?!");
CallInst(fsync, NULL, launchGroupHandle, "");
StoreInst(nullPtrValue, launchGroupHandlePtr);
}
}
/** When we gathering from or scattering to a varying atomic type, we need /** When we gathering from or scattering to a varying atomic type, we need

View File

@@ -313,5 +313,4 @@ export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples,
const uniform int ntilex = (w+TILEX-1)/TILEX; const uniform int ntilex = (w+TILEX-1)/TILEX;
const uniform int ntiley = (h+TILEY-1)/TILEY; const uniform int ntiley = (h+TILEY-1)/TILEY;
launch[ntilex,ntiley] ao_task(w, h, nsubsamples, image); launch[ntilex,ntiley] ao_task(w, h, nsubsamples, image);
sync;
} }