From 5f859e4885e26178d6d06b3615b8c55dc188adc6 Mon Sep 17 00:00:00 2001 From: Evghenii Date: Wed, 8 Jan 2014 19:31:28 +0100 Subject: [PATCH] added addrspace(3,4)->addrspace(0) convertion to ctx->GetElementPtrInst. Appears to work now. --- ctx.cpp | 25 +++++++++++++++-------- examples_ptx/deferred/kernels.ispc | 2 +- examples_ptx/volume_rendering/volume.ispc | 2 +- stmt.cpp | 2 +- 4 files changed, 19 insertions(+), 12 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index 0d0a09a1..1dc1c65b 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1854,7 +1854,7 @@ static llvm::Value* lConvertGepToGenericPtr(FunctionEmitContext *ctx, llvm::Valu return value; llvm::PointerType *pt = llvm::dyn_cast(value->getType()); const int addressSpace = pt->getAddressSpace(); - if (addressSpace != 3) // && addressSpace != 4) + if (addressSpace != 3 && addressSpace != 4) return value; llvm::Type *elTy = pt->getElementType(); @@ -1867,8 +1867,10 @@ static llvm::Value* lConvertGepToGenericPtr(FunctionEmitContext *ctx, llvm::Valu llvm::ArrayType *arrTy = llvm::dyn_cast(pt->getArrayElementType()); assert(arrTy != NULL); llvm::Type *arrElTy = arrTy->getElementType(); +#if 0 if (arrElTy->isArrayTy()) Error(currentPos, "Currently \"nvptx\" target doesn't support array-of-array"); +#endif /* convert elTy addrspace(3)* to i64* addrspace(3)* */ llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, addressSpace); @@ -1886,13 +1888,16 @@ static llvm::Value* lConvertGepToGenericPtr(FunctionEmitContext *ctx, llvm::Valu value = ctx->BitCastInst(value, arrElTyPt0, "gep2gen_cast2"); /* compute offset */ - llvm::Function *funcTid = m->module->getFunction("__tid_x"); - llvm::Function *funcWarpSz = m->module->getFunction("__warpsize"); - llvm::Value *tid = ctx->CallInst(funcTid, NULL, std::vector(), "gep2gen_tid"); - llvm::Value *warpSz = ctx->CallInst(funcWarpSz, NULL, std::vector(), "gep2gen_warpSz"); - llvm::Value *warpId = ctx->BinaryOperator(llvm::Instruction::SDiv, tid, warpSz, "gep2gen_warpId"); - llvm::Value *offset = ctx->BinaryOperator(llvm::Instruction::Mul, warpId, LLVMInt32(numEl), "gep2gen_offset"); - value = llvm::GetElementPtrInst::Create(value, offset, "gep2gen_offset", ctx->GetCurrentBasicBlock()); + if (addressSpace == 3) + { + llvm::Function *funcTid = m->module->getFunction("__tid_x"); + llvm::Function *funcWarpSz = m->module->getFunction("__warpsize"); + llvm::Value *tid = ctx->CallInst(funcTid, NULL, std::vector(), "gep2gen_tid"); + llvm::Value *warpSz = ctx->CallInst(funcWarpSz, NULL, std::vector(), "gep2gen_warpSz"); + llvm::Value *warpId = ctx->BinaryOperator(llvm::Instruction::SDiv, tid, warpSz, "gep2gen_warpId"); + llvm::Value *offset = ctx->BinaryOperator(llvm::Instruction::Mul, warpId, LLVMInt32(numEl), "gep2gen_offset"); + value = llvm::GetElementPtrInst::Create(value, offset, "gep2gen_offset", ctx->GetCurrentBasicBlock()); + } /* convert arrElTy* to elTy* */ llvm::PointerType *elTyPt0 = llvm::PointerType::get(elTy, 0); @@ -3461,19 +3466,21 @@ lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) { llvm::Value * FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, - const std::vector &args_in, + const std::vector &args, const char *name) { if (func == NULL) { AssertPos(currentPos, m->errorCount > 0); return NULL; } +#if 0 std::vector args = args_in; /* NVPTX: * Convert all pointers to addrspace(0) */ for (unsigned int i = 0; i < args.size(); i++) args[i] = lConvertToGenericPtr(this, args[i], currentPos); +#endif std::vector argVals = args; // Most of the time, the mask is passed as the last argument. this // isn't the case for things like intrinsics, builtins, and extern "C" diff --git a/examples_ptx/deferred/kernels.ispc b/examples_ptx/deferred/kernels.ispc index 92108f38..5bc6be41 100644 --- a/examples_ptx/deferred/kernels.ispc +++ b/examples_ptx/deferred/kernels.ispc @@ -506,7 +506,7 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y, uniform float cameraProj_32 = inputHeader.cameraProj[3][2]; // Light intersection: figure out which lights illuminate this tile. -#if 1 //def __NVPTX__ +#if 1 uniform int * uniform tileLightIndices = uniform new uniform int [MAX_LIGHTS]; #define MALLOC #else /* shared memory doesn't full work... why? */ diff --git a/examples_ptx/volume_rendering/volume.ispc b/examples_ptx/volume_rendering/volume.ispc index fd90af9c..6d0bd8f5 100644 --- a/examples_ptx/volume_rendering/volume.ispc +++ b/examples_ptx/volume_rendering/volume.ispc @@ -328,7 +328,7 @@ volume_task(uniform float density[], uniform int _nVoxels[3], { if (taskIndex >= taskCount) return; -#if 0 /* cannot pass shared memory pointers to functions, need to find a way to solve this one :S */ +#if 1 /* cannot pass shared memory pointers to functions, need to find a way to solve this one :S */ uniform int nVoxels[3]; nVoxels[0] = _nVoxels[0]; nVoxels[1] = _nVoxels[1]; diff --git a/stmt.cpp b/stmt.cpp index 4695ffec..37b53336 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -278,7 +278,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { int addressSpace; if (sym->type->IsConstType()) { -#if 0 /* current, addressSpace = 4 generates a compilation fails as it can't be passed as a function arg:S */ +#if 1 addressSpace = 4; /* constant */ #else addressSpace = 0; /* use global for now */