convert pointers in function arguments to addrspace(3). still there is poroblem with shared memory. need to figure out which one ..
This commit is contained in:
19
ctx.cpp
19
ctx.cpp
@@ -1890,14 +1890,21 @@ static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *
|
||||
addressSpace == 3 ? "__cvt_loc2gen" : "__cvt_const2gen");
|
||||
std::vector<llvm::Value *> __cvt2gen_args;
|
||||
__cvt2gen_args.push_back(value);
|
||||
#if 0
|
||||
value = ctx->CallInst(__cvt2gen, NULL, __cvt2gen_args, "cvt2gen_call");
|
||||
#else
|
||||
value = llvm::CallInst::Create(__cvt2gen, __cvt2gen_args, "cvt2gen_call", ctx->GetCurrentBasicBlock());
|
||||
#endif
|
||||
|
||||
/* convert i64* to elTy* */
|
||||
llvm::PointerType *typeElPtr = llvm::PointerType::get(typeEl, 0);
|
||||
value = ctx->BitCastInst(value, typeElPtr, "cvtLoc2Gen_i642ptr");
|
||||
|
||||
/* add warp offset to the pointer */
|
||||
return lAddWarpOffset(ctx, value);
|
||||
/* add warp offset to the pointer for local memory */
|
||||
if (addressSpace == 3)
|
||||
value = lAddWarpOffset(ctx, value);
|
||||
|
||||
return value;
|
||||
}
|
||||
|
||||
llvm::Value *
|
||||
@@ -3394,13 +3401,19 @@ lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) {
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
|
||||
const std::vector<llvm::Value *> &args,
|
||||
const std::vector<llvm::Value *> &args_in,
|
||||
const char *name) {
|
||||
if (func == NULL) {
|
||||
AssertPos(currentPos, m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
std::vector<llvm::Value *> args = args_in;
|
||||
/* NVPTX:
|
||||
* Convert all pointers to addrspace(0)
|
||||
*/
|
||||
for (unsigned int i = 0; i < args.size(); i++)
|
||||
args[i] = lConvertToGenericPtr(this, args[i], currentPos);
|
||||
std::vector<llvm::Value *> argVals = args;
|
||||
// Most of the time, the mask is passed as the last argument. this
|
||||
// isn't the case for things like intrinsics, builtins, and extern "C"
|
||||
|
||||
@@ -506,9 +506,10 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y,
|
||||
uniform float cameraProj_32 = inputHeader.cameraProj[3][2];
|
||||
|
||||
// Light intersection: figure out which lights illuminate this tile.
|
||||
#ifdef __NVPTX__
|
||||
#if 1 //def __NVPTX__
|
||||
uniform int * uniform tileLightIndices = uniform new uniform int [MAX_LIGHTS];
|
||||
#else
|
||||
#define MALLOC
|
||||
#else /* shared memory doesn't full work... why? */
|
||||
uniform int tileLightIndices[MAX_LIGHTS]; // Light list for the tile
|
||||
#endif
|
||||
uniform int numTileLights =
|
||||
@@ -532,7 +533,7 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y,
|
||||
cameraProj_00, cameraProj_11, cameraProj_22, cameraProj_32,
|
||||
tileLightIndices, numTileLights, visualizeLightCount,
|
||||
framebuffer_r, framebuffer_g, framebuffer_b);
|
||||
#ifdef __NVPTX__
|
||||
#ifdef MALLOC
|
||||
delete tileLightIndices;
|
||||
#endif
|
||||
}
|
||||
|
||||
4
stmt.cpp
4
stmt.cpp
@@ -278,7 +278,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
int addressSpace;
|
||||
if (sym->type->IsConstType())
|
||||
{
|
||||
#if 0 /* current, addressSpace = 4 generates a compilation fails as it can't be passed as a function arg:S */
|
||||
#if 1 /* current, addressSpace = 4 generates a compilation fails as it can't be passed as a function arg:S */
|
||||
addressSpace = 4; /* constant */
|
||||
#else
|
||||
addressSpace = 0; /* use global for now */
|
||||
@@ -333,7 +333,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
sym->storagePtr =
|
||||
new llvm::GlobalVariable(*m->module, llvmTypeUn,
|
||||
sym->type->IsConstType(),
|
||||
llvm::GlobalValue::PrivateLinkage,
|
||||
llvm::GlobalValue::InternalLinkage,
|
||||
cinit,
|
||||
llvm::Twine("local_") +
|
||||
llvm::Twine(sym->pos.first_line) +
|
||||
|
||||
Reference in New Issue
Block a user