fixed ISPCSync functionality
This commit is contained in:
@@ -102,7 +102,8 @@ define i32 @__lanemask_lt() nounwind readnone alwaysinline
|
|||||||
|
|
||||||
define i8* @ISPCAlloc(i8**, i64, i32) nounwind alwaysinline
|
define i8* @ISPCAlloc(i8**, i64, i32) nounwind alwaysinline
|
||||||
{
|
{
|
||||||
ret i8* null
|
%ptr = inttoptr i64 1 to i8*
|
||||||
|
ret i8* %ptr
|
||||||
}
|
}
|
||||||
|
|
||||||
;; this call allocate parameter buffer for kernel launch
|
;; this call allocate parameter buffer for kernel launch
|
||||||
|
|||||||
14
ctx.cpp
14
ctx.cpp
@@ -3673,6 +3673,8 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
|||||||
|
|
||||||
void
|
void
|
||||||
FunctionEmitContext::SyncInst() {
|
FunctionEmitContext::SyncInst() {
|
||||||
|
if (!g->target->isPTX())
|
||||||
|
{
|
||||||
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
||||||
llvm::Value *nullPtrValue =
|
llvm::Value *nullPtrValue =
|
||||||
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
|
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
|
||||||
@@ -3697,6 +3699,18 @@ FunctionEmitContext::SyncInst() {
|
|||||||
|
|
||||||
SetCurrentBasicBlock(bPostSync);
|
SetCurrentBasicBlock(bPostSync);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
||||||
|
llvm::Value *nullPtrValue =
|
||||||
|
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
|
||||||
|
llvm::Function *fsync = m->module->getFunction("ISPCSync");
|
||||||
|
if (fsync == NULL)
|
||||||
|
FATAL("Couldn't find ISPCSync declaration?!");
|
||||||
|
CallInst(fsync, NULL, launchGroupHandle, "");
|
||||||
|
StoreInst(nullPtrValue, launchGroupHandlePtr);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
/** When we gathering from or scattering to a varying atomic type, we need
|
/** When we gathering from or scattering to a varying atomic type, we need
|
||||||
|
|||||||
@@ -313,5 +313,4 @@ export void ao_ispc_tasks(uniform int w, uniform int h, uniform int nsubsamples,
|
|||||||
const uniform int ntilex = (w+TILEX-1)/TILEX;
|
const uniform int ntilex = (w+TILEX-1)/TILEX;
|
||||||
const uniform int ntiley = (h+TILEY-1)/TILEY;
|
const uniform int ntiley = (h+TILEY-1)/TILEY;
|
||||||
launch[ntilex,ntiley] ao_task(w, h, nsubsamples, image);
|
launch[ntilex,ntiley] ao_task(w, h, nsubsamples, image);
|
||||||
sync;
|
|
||||||
}
|
}
|
||||||
|
|||||||
Reference in New Issue
Block a user