diff --git a/ctx.cpp b/ctx.cpp index 7d8e7dba..ce8b831f 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1832,34 +1832,65 @@ FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type, return inst; } -static llvm::Value* lCorrectLocalPtr(FunctionEmitContext *ctx, llvm::Value* value) +/* NVPTX: + * this is a helper function which adds a warp offset to a base pointer in local memory + * either in addrspace(3) or converted from addrspace(3) to addrspace(0) + */ +static llvm::Value* lAddWarpOffset(FunctionEmitContext *ctx, llvm::Value *value) { - assert(value->getType()->isPointerTy()); - llvm::PointerType *pt = llvm::dyn_cast(value->getType()); - if (pt->getAddressSpace() != 3) return value; - llvm::Function *func_tid_x = m->module->getFunction("__tid_x"); llvm::Function *func_warpsz = m->module->getFunction("__warpsize"); llvm::Value *__tid_x = ctx->CallInst(func_tid_x, NULL, std::vector(), "tidCorrectLocalPtr"); llvm::Value *__warpsz = ctx->CallInst(func_warpsz, NULL, std::vector(), "warpSzCorrectLocaLPtr"); llvm::Value *_mwarpsz = ctx->BinaryOperator(llvm::Instruction::Sub, LLVMInt32(0), __warpsz, "mwarpSzCorrectLocalPtr"); llvm::Value *__offset = ctx->BinaryOperator(llvm::Instruction::And, __tid_x, _mwarpsz, "offsetCorrectLocalPtr"); - return llvm::GetElementPtrInst::Create(value, __offset, "__gepCorrectLocalPtr", ctx->GetCurrentBasicBlock()); + return llvm::GetElementPtrInst::Create(value, __offset, "warpOffset_gep", ctx->GetCurrentBasicBlock()); } -static llvm::Value* lConvertLocalToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value) +/* NVPTX: + * this function compute correct address in local memory for load/store operations*/ +static llvm::Value* lCorrectLocalPtr(FunctionEmitContext *ctx, llvm::Value* value) +{ + assert(value->getType()->isPointerTy()); + llvm::PointerType *pt = llvm::dyn_cast(value->getType()); + if (g->target->getISA() != Target::NVPTX || pt->getAddressSpace() != 3) return value; + return lAddWarpOffset(ctx, value); +} + +/* NVPTX: + * this function converts pointers from addrspace(3) to addrspace(0) + */ +static llvm::Value* lConvertLocal2GenericPtr(FunctionEmitContext *ctx, llvm::Value *value) { if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX) return value; llvm::PointerType *pt = llvm::dyn_cast(value->getType()); if (pt->getAddressSpace() != 3) return value; - llvm::PointerType *PointerTy = llvm::PointerType::get(LLVMTypes::Int64Type, 3); - value = ctx->BitCastInst(value, PointerTy, "cvtLog2Gen_i64ptr"); - value = lCorrectLocalPtr(ctx, value); + /* if array, extracts its element type */ + llvm::Type *type = pt->getElementType(); + llvm::Type *typeEl = type; + if (type->isArrayTy()) + { + typeEl = type->getArrayElementType(); + assert(!typeEl->isArrayTy()); /* currently we don't support array-of-array in uniform */ + } + + /* convert elTy addrspace(3)* to i64* addrspace(3)* */ + llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, 3); + value = ctx->BitCastInst(value, Int64Ptr3, "cvtLog2Gen_i64ptr"); + + /* convert i64* addrspace(3) to i64* */ llvm::Function *__cvt_loc2gen = m->module->getFunction("__cvt_loc2gen"); std::vector __cvt_loc2gen_args; __cvt_loc2gen_args.push_back(value); - return ctx->CallInst(__cvt_loc2gen, NULL, __cvt_loc2gen_args, "cvtLoc2Gen"); + value = ctx->CallInst(__cvt_loc2gen, NULL, __cvt_loc2gen_args, "cvtLoc2Gen"); + + /* convert i64* to elTy* */ + llvm::PointerType *typeElPtr = llvm::PointerType::get(typeEl, 0); + value = ctx->BitCastInst(value, typeElPtr, "cvtLoc2Gen_i642ptr"); + + /* add warp offset to the pointer */ + return lAddWarpOffset(ctx, value); } llvm::Value * @@ -1876,7 +1907,7 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) { if (name == NULL) name = LLVMGetName(value, "_ptr2int"); - value = lConvertLocalToGenericPtr(this, value); /* NVPTX */ + value = lConvertLocal2GenericPtr(this, value); /* NVPTX : convert addrspace 3->0 before converting pointer */ llvm::Type *type = LLVMTypes::PointerIntType; llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock); AddDebugPos(inst); @@ -1910,7 +1941,7 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType, } } - value = lConvertLocalToGenericPtr(this, value); /* NVPTX */ + value = lConvertLocal2GenericPtr(this, value); /* NVPTX : convert addrspace 3->0 before converting pointer */ llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock); AddDebugPos(inst); return inst; @@ -2489,7 +2520,7 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) { if (name == NULL) name = LLVMGetName(ptr, "_load"); - ptr = lCorrectLocalPtr(this, ptr); /* NVPTX */ + ptr = lCorrectLocalPtr(this, ptr); /* NVPTX: correct addrspace(3) pointer before load/store */ llvm::LoadInst *inst = new llvm::LoadInst(ptr, name, bblock); if (g->opt.forceAlignedMemory && @@ -2622,7 +2653,7 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask, // it's totally unaligned. (This shouldn't make any difference // vs the proper alignment in practice.) align = 1; - ptr = lCorrectLocalPtr(this, ptr); /* NVPTX */ + ptr = lCorrectLocalPtr(this, ptr); /* NVPTX: correct addrspace(3) pointer before load/store */ llvm::Instruction *inst = new llvm::LoadInst(ptr, name, false /* not volatile */, align, bblock); @@ -3050,7 +3081,7 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) { llvm::dyn_cast(ptr->getType()); AssertPos(currentPos, pt != NULL); - ptr = lCorrectLocalPtr(this, ptr); /* NVPTX */ + ptr = lCorrectLocalPtr(this, ptr); /* NVPTX: correct addrspace(3) pointer before load/store */ llvm::StoreInst *inst = new llvm::StoreInst(value, ptr, bblock); if (g->opt.forceAlignedMemory && diff --git a/decl.cpp b/decl.cpp index 0366dd73..54af01cd 100644 --- a/decl.cpp +++ b/decl.cpp @@ -402,6 +402,13 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) { return; } +#if 0 /* NVPTX */ + if (baseType->IsUniformType()) + { + fprintf(stderr, " detected uniform array of size= %d array= %s\n" ,arraySize, + baseType->IsArrayType() ? " true " : " false "); + } +#endif const Type *arrayType = new ArrayType(baseType, arraySize); if (child != NULL) { child->InitFromType(arrayType, ds); diff --git a/module.cpp b/module.cpp index 4be06df7..82fee36c 100644 --- a/module.cpp +++ b/module.cpp @@ -2338,6 +2338,28 @@ Module::CompileAndOutput(const char *srcFile, m = new Module(srcFile); if (m->CompileFile() == 0) { + + + /* NVPTX: + * for PTX target replace '.' with '_' in all global variables + * a PTX identifier name must match [a-zA-Z$_][a-zA-Z$_0-9]* + */ + if (g->target->getISA() == Target::NVPTX) + { + llvm::Module::global_iterator + I = m->module->global_begin(), + E = m->module->global_end(); + for (; I != E; I++) + { + std::string name = I->getName(); + for (int i = 0; i < name.length(); i++) + if (name[i] == '.') + name[i] = '_'; + I->setName(name); + fprintf(stderr, " %s \n", name.c_str()); + + } + } if (outputType == CXX) { if (target == NULL || strncmp(target, "generic-", 8) != 0) { Error(SourcePos(), "When generating C++ output, one of the \"generic-*\" " diff --git a/opt.cpp b/opt.cpp index f70e522d..f7494873 100644 --- a/opt.cpp +++ b/opt.cpp @@ -502,6 +502,7 @@ Optimize(llvm::Module *module, int optLevel) { optPM.add(llvm::createCFGSimplificationPass()); optPM.add(llvm::createGlobalDCEPass()); } +#if 1 else { llvm::PassRegistry *registry = llvm::PassRegistry::getPassRegistry(); llvm::initializeCore(*registry); @@ -685,6 +686,7 @@ Optimize(llvm::Module *module, int optLevel) { // Should be the last optPM.add(CreateFixBooleanSelectPass(), 400); } +#endif // Finish up by making sure we didn't mess anything up in the IR along // the way. diff --git a/stmt.cpp b/stmt.cpp index fac16640..e73fadd6 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -260,44 +260,36 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { if (sym->type->IsArrayType() && sym->type->IsUniformType() && g->target->getISA() == Target::NVPTX) { -#if 0 /* need to test if initializer works ... */ if (initExpr != NULL) - Error(initExpr->pos, "Initializer for static variable " - "\"%s\" must be a constant.", sym->name.c_str()); -#endif + Error(initExpr->pos, + "It is not possible to initialize \"uniform\" array \"%s\" with \"nvptx\" target. " + "Use \"varying\" or \"const static uniform\" if possible.", + sym->name.c_str()); PerformanceWarning(sym->pos, - "\"uniform\" arrays may be slow with \"nvptx\" target. Use \"varying\" if possible."); + "\"uniform\" arrays might be slow with \"nvptx\" target." + " Unless data sharing between program instances is required, use \"varying\" instead."); - llvm::Constant *cinit = NULL; - if (initExpr != NULL) - { - if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false) - continue; - // FIXME: we only need this for function pointers; it was - // already done for atomic types and enums in - // DeclStmt::TypeCheck()... - if (dynamic_cast(initExpr) == NULL) { - initExpr = TypeConvertExpr(initExpr, sym->type, - "initializer"); - // FIXME: and this is only needed to re-establish - // constant-ness so that GetConstant below works for - // constant artithmetic expressions... - initExpr = ::Optimize(initExpr); - } + const ArrayType *at = CastType(sym->type); + const int nel = at->GetElementCount(); + /* we must scale # elements by 4, because a thread-block will run 4 warps + * or 128 threads. + * ***note-to-me***:please define these value (128threads/4warps) + * in nvptx-target definition + * instead of compile-time constants + */ + const int nel4 = nel*4; + ArrayType nat(at->GetElementType(), nel4); + llvm::Type *llvmType = nat.LLVMType(g->ctx); - cinit = initExpr->GetConstant(sym->type); - } - if (cinit == NULL) - cinit = llvm::Constant::getNullValue(llvmType); - - // Allocate space for the static variable in global scope, so - // that it persists across function calls + // addrspace(3) must be undefined at initialization + llvm::Constant *cinit = llvm::UndefValue::get(llvmType); sym->storagePtr = new llvm::GlobalVariable(*m->module, llvmType, sym->type->IsConstType(), - llvm::GlobalValue::InternalLinkage, cinit, - llvm::Twine("local") + + llvm::GlobalValue::PrivateLinkage, + cinit, + llvm::Twine("local_") + llvm::Twine(sym->pos.first_line) + llvm::Twine("_") + sym->name.c_str(), NULL,