diff --git a/ctx.cpp b/ctx.cpp index e55a7acb..0d0a09a1 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1848,6 +1848,59 @@ static llvm::Value* lAddWarpOffset(FunctionEmitContext *ctx, llvm::Value *value) return llvm::GetElementPtrInst::Create(value, __offset, "warpOffset_gep", ctx->GetCurrentBasicBlock()); } +static llvm::Value* lConvertGepToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value, const SourcePos ¤tPos) +{ + if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX) + return value; + llvm::PointerType *pt = llvm::dyn_cast(value->getType()); + const int addressSpace = pt->getAddressSpace(); + if (addressSpace != 3) // && addressSpace != 4) + return value; + + llvm::Type *elTy = pt->getElementType(); + assert(elTy->isArrayTy()); + const int numElTot = elTy->getArrayNumElements(); + const int numEl = numElTot/4; +#if 0 + fprintf(stderr, " --- detected addrspace(3) sz= %d --- \n", numEl); +#endif + llvm::ArrayType *arrTy = llvm::dyn_cast(pt->getArrayElementType()); + assert(arrTy != NULL); + llvm::Type *arrElTy = arrTy->getElementType(); + if (arrElTy->isArrayTy()) + Error(currentPos, "Currently \"nvptx\" target doesn't support array-of-array"); + + /* convert elTy addrspace(3)* to i64* addrspace(3)* */ + llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, addressSpace); + value = ctx->BitCastInst(value, Int64Ptr3, "gep2gen_cast1"); + + /* convert i64* addrspace(3) to i64* */ + llvm::Function *__cvt2gen = m->module->getFunction( + addressSpace == 3 ? "__cvt_loc2gen" : "__cvt_const2gen"); + std::vector __cvt2gen_args; + __cvt2gen_args.push_back(value); + value = llvm::CallInst::Create(__cvt2gen, __cvt2gen_args, "gep2gen_cvt", ctx->GetCurrentBasicBlock()); + + /* convert i64* to errElTy* */ + llvm::PointerType *arrElTyPt0 = llvm::PointerType::get(arrElTy, 0); + value = ctx->BitCastInst(value, arrElTyPt0, "gep2gen_cast2"); + + /* compute offset */ + llvm::Function *funcTid = m->module->getFunction("__tid_x"); + llvm::Function *funcWarpSz = m->module->getFunction("__warpsize"); + llvm::Value *tid = ctx->CallInst(funcTid, NULL, std::vector(), "gep2gen_tid"); + llvm::Value *warpSz = ctx->CallInst(funcWarpSz, NULL, std::vector(), "gep2gen_warpSz"); + llvm::Value *warpId = ctx->BinaryOperator(llvm::Instruction::SDiv, tid, warpSz, "gep2gen_warpId"); + llvm::Value *offset = ctx->BinaryOperator(llvm::Instruction::Mul, warpId, LLVMInt32(numEl), "gep2gen_offset"); + value = llvm::GetElementPtrInst::Create(value, offset, "gep2gen_offset", ctx->GetCurrentBasicBlock()); + + /* convert arrElTy* to elTy* */ + llvm::PointerType *elTyPt0 = llvm::PointerType::get(elTy, 0); + value = ctx->BitCastInst(value, elTyPt0, "gep2gen_cast3"); + + return value; +} + /* NVPTX: * this function compute correct address in local memory for load/store operations */ @@ -1856,6 +1909,9 @@ static llvm::Value* lCorrectLocalPtr(FunctionEmitContext *ctx, llvm::Value* valu assert(value->getType()->isPointerTy()); llvm::PointerType *pt = llvm::dyn_cast(value->getType()); if (g->target->getISA() != Target::NVPTX || pt->getAddressSpace() != 3) return value; + + assert(0); /* we should never enter here */ + return lAddWarpOffset(ctx, value); } @@ -1871,6 +1927,8 @@ static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value * const int addressSpace = pt->getAddressSpace(); if (addressSpace != 3 && addressSpace != 4) return value; + assert(0); /* we should never enter here */ + /* if array, extracts element type */ llvm::Type *type = pt->getElementType(); llvm::Type *typeEl = type; @@ -2262,6 +2320,7 @@ FunctionEmitContext::MakeSlicePointer(llvm::Value *ptr, llvm::Value *offset) { llvm::Value * FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index, const Type *ptrRefType, const char *name) { + basePtr = lConvertGepToGenericPtr(this, basePtr, currentPos); if (basePtr == NULL || index == NULL) { AssertPos(currentPos, m->errorCount > 0); return NULL; @@ -2332,6 +2391,7 @@ llvm::Value * FunctionEmitContext::GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0, llvm::Value *index1, const Type *ptrRefType, const char *name) { + basePtr = lConvertGepToGenericPtr(this, basePtr, currentPos); if (basePtr == NULL || index0 == NULL || index1 == NULL) { AssertPos(currentPos, m->errorCount > 0); return NULL; diff --git a/stmt.cpp b/stmt.cpp index 6baa6fdd..4695ffec 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -266,7 +266,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { if (!sym->type->IsConstType()) PerformanceWarning(sym->pos, "\"uniform\" arrays might be slow with \"nvptx\" target. " - "Unless data sharing between program instances is required, use \"varying\" if possible."); + "Unless data sharing between program instances is required, use \"varying\" or \"uniform new\"+\"delete\" if possible."); if (initExpr != NULL && !sym->type->IsConstType()) Error(initExpr->pos, "It is not possible to initialize non-constant \"uniform\" array \"%s\" with \"nvptx\" target. " @@ -278,7 +278,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { int addressSpace; if (sym->type->IsConstType()) { -#if 1 /* current, addressSpace = 4 generates a compilation fails as it can't be passed as a function arg:S */ +#if 0 /* current, addressSpace = 4 generates a compilation fails as it can't be passed as a function arg:S */ addressSpace = 4; /* constant */ #else addressSpace = 0; /* use global for now */