From 69c5e0aae765a88ec297a03a4aaf33f19c69563d Mon Sep 17 00:00:00 2001 From: Evghenii Date: Wed, 8 Jan 2014 15:12:32 +0100 Subject: [PATCH] convert pointers in function arguments to addrspace(3). still there is poroblem with shared memory. need to figure out which one .. --- ctx.cpp | 19 ++++++++++++++++--- examples_ptx/deferred/kernels.ispc | 7 ++++--- stmt.cpp | 4 ++-- 3 files changed, 22 insertions(+), 8 deletions(-) diff --git a/ctx.cpp b/ctx.cpp index 096feeb0..e55a7acb 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1890,14 +1890,21 @@ static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value * addressSpace == 3 ? "__cvt_loc2gen" : "__cvt_const2gen"); std::vector __cvt2gen_args; __cvt2gen_args.push_back(value); +#if 0 value = ctx->CallInst(__cvt2gen, NULL, __cvt2gen_args, "cvt2gen_call"); +#else + value = llvm::CallInst::Create(__cvt2gen, __cvt2gen_args, "cvt2gen_call", ctx->GetCurrentBasicBlock()); +#endif /* convert i64* to elTy* */ llvm::PointerType *typeElPtr = llvm::PointerType::get(typeEl, 0); value = ctx->BitCastInst(value, typeElPtr, "cvtLoc2Gen_i642ptr"); - /* add warp offset to the pointer */ - return lAddWarpOffset(ctx, value); + /* add warp offset to the pointer for local memory */ + if (addressSpace == 3) + value = lAddWarpOffset(ctx, value); + + return value; } llvm::Value * @@ -3394,13 +3401,19 @@ lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) { llvm::Value * FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType, - const std::vector &args, + const std::vector &args_in, const char *name) { if (func == NULL) { AssertPos(currentPos, m->errorCount > 0); return NULL; } + std::vector args = args_in; + /* NVPTX: + * Convert all pointers to addrspace(0) + */ + for (unsigned int i = 0; i < args.size(); i++) + args[i] = lConvertToGenericPtr(this, args[i], currentPos); std::vector argVals = args; // Most of the time, the mask is passed as the last argument. this // isn't the case for things like intrinsics, builtins, and extern "C" diff --git a/examples_ptx/deferred/kernels.ispc b/examples_ptx/deferred/kernels.ispc index aa54d455..92108f38 100644 --- a/examples_ptx/deferred/kernels.ispc +++ b/examples_ptx/deferred/kernels.ispc @@ -506,9 +506,10 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y, uniform float cameraProj_32 = inputHeader.cameraProj[3][2]; // Light intersection: figure out which lights illuminate this tile. -#ifdef __NVPTX__ +#if 1 //def __NVPTX__ uniform int * uniform tileLightIndices = uniform new uniform int [MAX_LIGHTS]; -#else +#define MALLOC +#else /* shared memory doesn't full work... why? */ uniform int tileLightIndices[MAX_LIGHTS]; // Light list for the tile #endif uniform int numTileLights = @@ -532,7 +533,7 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y, cameraProj_00, cameraProj_11, cameraProj_22, cameraProj_32, tileLightIndices, numTileLights, visualizeLightCount, framebuffer_r, framebuffer_g, framebuffer_b); -#ifdef __NVPTX__ +#ifdef MALLOC delete tileLightIndices; #endif } diff --git a/stmt.cpp b/stmt.cpp index fc0148c1..6baa6fdd 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -278,7 +278,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { int addressSpace; if (sym->type->IsConstType()) { -#if 0 /* current, addressSpace = 4 generates a compilation fails as it can't be passed as a function arg:S */ +#if 1 /* current, addressSpace = 4 generates a compilation fails as it can't be passed as a function arg:S */ addressSpace = 4; /* constant */ #else addressSpace = 0; /* use global for now */ @@ -333,7 +333,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { sym->storagePtr = new llvm::GlobalVariable(*m->module, llvmTypeUn, sym->type->IsConstType(), - llvm::GlobalValue::PrivateLinkage, + llvm::GlobalValue::InternalLinkage, cinit, llvm::Twine("local_") + llvm::Twine(sym->pos.first_line) +