added addrspace(3,4)->addrspace(0) convertion to ctx->GetElementPtrInst. Appears to work now.
This commit is contained in:
25
ctx.cpp
25
ctx.cpp
@@ -1854,7 +1854,7 @@ static llvm::Value* lConvertGepToGenericPtr(FunctionEmitContext *ctx, llvm::Valu
|
||||
return value;
|
||||
llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(value->getType());
|
||||
const int addressSpace = pt->getAddressSpace();
|
||||
if (addressSpace != 3) // && addressSpace != 4)
|
||||
if (addressSpace != 3 && addressSpace != 4)
|
||||
return value;
|
||||
|
||||
llvm::Type *elTy = pt->getElementType();
|
||||
@@ -1867,8 +1867,10 @@ static llvm::Value* lConvertGepToGenericPtr(FunctionEmitContext *ctx, llvm::Valu
|
||||
llvm::ArrayType *arrTy = llvm::dyn_cast<llvm::ArrayType>(pt->getArrayElementType());
|
||||
assert(arrTy != NULL);
|
||||
llvm::Type *arrElTy = arrTy->getElementType();
|
||||
#if 0
|
||||
if (arrElTy->isArrayTy())
|
||||
Error(currentPos, "Currently \"nvptx\" target doesn't support array-of-array");
|
||||
#endif
|
||||
|
||||
/* convert elTy addrspace(3)* to i64* addrspace(3)* */
|
||||
llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, addressSpace);
|
||||
@@ -1886,13 +1888,16 @@ static llvm::Value* lConvertGepToGenericPtr(FunctionEmitContext *ctx, llvm::Valu
|
||||
value = ctx->BitCastInst(value, arrElTyPt0, "gep2gen_cast2");
|
||||
|
||||
/* compute offset */
|
||||
llvm::Function *funcTid = m->module->getFunction("__tid_x");
|
||||
llvm::Function *funcWarpSz = m->module->getFunction("__warpsize");
|
||||
llvm::Value *tid = ctx->CallInst(funcTid, NULL, std::vector<llvm::Value*>(), "gep2gen_tid");
|
||||
llvm::Value *warpSz = ctx->CallInst(funcWarpSz, NULL, std::vector<llvm::Value*>(), "gep2gen_warpSz");
|
||||
llvm::Value *warpId = ctx->BinaryOperator(llvm::Instruction::SDiv, tid, warpSz, "gep2gen_warpId");
|
||||
llvm::Value *offset = ctx->BinaryOperator(llvm::Instruction::Mul, warpId, LLVMInt32(numEl), "gep2gen_offset");
|
||||
value = llvm::GetElementPtrInst::Create(value, offset, "gep2gen_offset", ctx->GetCurrentBasicBlock());
|
||||
if (addressSpace == 3)
|
||||
{
|
||||
llvm::Function *funcTid = m->module->getFunction("__tid_x");
|
||||
llvm::Function *funcWarpSz = m->module->getFunction("__warpsize");
|
||||
llvm::Value *tid = ctx->CallInst(funcTid, NULL, std::vector<llvm::Value*>(), "gep2gen_tid");
|
||||
llvm::Value *warpSz = ctx->CallInst(funcWarpSz, NULL, std::vector<llvm::Value*>(), "gep2gen_warpSz");
|
||||
llvm::Value *warpId = ctx->BinaryOperator(llvm::Instruction::SDiv, tid, warpSz, "gep2gen_warpId");
|
||||
llvm::Value *offset = ctx->BinaryOperator(llvm::Instruction::Mul, warpId, LLVMInt32(numEl), "gep2gen_offset");
|
||||
value = llvm::GetElementPtrInst::Create(value, offset, "gep2gen_offset", ctx->GetCurrentBasicBlock());
|
||||
}
|
||||
|
||||
/* convert arrElTy* to elTy* */
|
||||
llvm::PointerType *elTyPt0 = llvm::PointerType::get(elTy, 0);
|
||||
@@ -3461,19 +3466,21 @@ lCalleeArgCount(llvm::Value *callee, const FunctionType *funcType) {
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::CallInst(llvm::Value *func, const FunctionType *funcType,
|
||||
const std::vector<llvm::Value *> &args_in,
|
||||
const std::vector<llvm::Value *> &args,
|
||||
const char *name) {
|
||||
if (func == NULL) {
|
||||
AssertPos(currentPos, m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
#if 0
|
||||
std::vector<llvm::Value *> args = args_in;
|
||||
/* NVPTX:
|
||||
* Convert all pointers to addrspace(0)
|
||||
*/
|
||||
for (unsigned int i = 0; i < args.size(); i++)
|
||||
args[i] = lConvertToGenericPtr(this, args[i], currentPos);
|
||||
#endif
|
||||
std::vector<llvm::Value *> argVals = args;
|
||||
// Most of the time, the mask is passed as the last argument. this
|
||||
// isn't the case for things like intrinsics, builtins, and extern "C"
|
||||
|
||||
@@ -506,7 +506,7 @@ RenderTile(uniform int num_groups_x, uniform int num_groups_y,
|
||||
uniform float cameraProj_32 = inputHeader.cameraProj[3][2];
|
||||
|
||||
// Light intersection: figure out which lights illuminate this tile.
|
||||
#if 1 //def __NVPTX__
|
||||
#if 1
|
||||
uniform int * uniform tileLightIndices = uniform new uniform int [MAX_LIGHTS];
|
||||
#define MALLOC
|
||||
#else /* shared memory doesn't full work... why? */
|
||||
|
||||
@@ -328,7 +328,7 @@ volume_task(uniform float density[], uniform int _nVoxels[3],
|
||||
{
|
||||
if (taskIndex >= taskCount) return;
|
||||
|
||||
#if 0 /* cannot pass shared memory pointers to functions, need to find a way to solve this one :S */
|
||||
#if 1 /* cannot pass shared memory pointers to functions, need to find a way to solve this one :S */
|
||||
uniform int nVoxels[3];
|
||||
nVoxels[0] = _nVoxels[0];
|
||||
nVoxels[1] = _nVoxels[1];
|
||||
|
||||
2
stmt.cpp
2
stmt.cpp
@@ -278,7 +278,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
int addressSpace;
|
||||
if (sym->type->IsConstType())
|
||||
{
|
||||
#if 0 /* current, addressSpace = 4 generates a compilation fails as it can't be passed as a function arg:S */
|
||||
#if 1
|
||||
addressSpace = 4; /* constant */
|
||||
#else
|
||||
addressSpace = 0; /* use global for now */
|
||||
|
||||
Reference in New Issue
Block a user