diff --git a/builtins/target-nvptx.ll b/builtins/target-nvptx.ll index 83d02116..24b2f0d1 100644 --- a/builtins/target-nvptx.ll +++ b/builtins/target-nvptx.ll @@ -63,11 +63,17 @@ define i32 @__nctaid_z() nounwind readnone alwaysinline } ;;;;;;;; declare i64* @llvm.nvvm.ptr.shared.to.gen.p0i64.p3i64(i64 addrspace(3)*) +declare i64* @llvm.nvvm.ptr.shared.to.gen.p0i64.p4i64(i64 addrspace(4)*) define i64* @__cvt_loc2gen(i64 addrspace(3)*) nounwind readnone alwaysinline { %ptr = tail call i64* @llvm.nvvm.ptr.shared.to.gen.p0i64.p3i64(i64 addrspace(3)* %0) ret i64* %ptr } +define i64* @__cvt_const2gen(i64 addrspace(4)*) nounwind readnone alwaysinline +{ + %ptr = tail call i64* @llvm.nvvm.ptr.shared.to.gen.p0i64.p4i64(i64 addrspace(4)* %0) + ret i64* %ptr +} ;;;;;;;; define i32 @__shfl_i32_nvptx(i32, i32) nounwind readnone alwaysinline { diff --git a/builtins/util.m4 b/builtins/util.m4 index 873e8c4d..fb8c5c33 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -4543,5 +4543,6 @@ declare i32 @__nctaid_x() nounwind readnone alwaysinline declare i32 @__nctaid_y() nounwind readnone alwaysinline declare i32 @__nctaid_z() nounwind readnone alwaysinline declare i64* @__cvt_loc2gen(i64 addrspace(3)*) nounwind readnone alwaysinline +declare i64* @__cvt_const2gen(i64 addrspace(4)*) nounwind readnone alwaysinline ') diff --git a/ctx.cpp b/ctx.cpp index ce8b831f..096feeb0 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1833,8 +1833,9 @@ FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type, } /* NVPTX: - * this is a helper function which adds a warp offset to a base pointer in local memory - * either in addrspace(3) or converted from addrspace(3) to addrspace(0) + * this is a helper function which adds a warp offset to a base pointer + * pointer must either be in local memory addrspace(3) + * or the one just converted from addrspace(3) to addrspace(0) in lConvertToGenericPtr */ static llvm::Value* lAddWarpOffset(FunctionEmitContext *ctx, llvm::Value *value) { @@ -1848,7 +1849,8 @@ static llvm::Value* lAddWarpOffset(FunctionEmitContext *ctx, llvm::Value *value) } /* NVPTX: - * this function compute correct address in local memory for load/store operations*/ + * this function compute correct address in local memory for load/store operations + */ static llvm::Value* lCorrectLocalPtr(FunctionEmitContext *ctx, llvm::Value* value) { assert(value->getType()->isPointerTy()); @@ -1858,32 +1860,37 @@ static llvm::Value* lCorrectLocalPtr(FunctionEmitContext *ctx, llvm::Value* valu } /* NVPTX: - * this function converts pointers from addrspace(3) to addrspace(0) + * this function converts a pointer in addrspace(3 or 4) to addrspace(0) */ -static llvm::Value* lConvertLocal2GenericPtr(FunctionEmitContext *ctx, llvm::Value *value) +static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value, const SourcePos ¤tPos) { if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX) return value; llvm::PointerType *pt = llvm::dyn_cast(value->getType()); - if (pt->getAddressSpace() != 3) return value; - /* if array, extracts its element type */ + /* make sure addrspace corresponds to either local or constant memories */ + const int addressSpace = pt->getAddressSpace(); + if (addressSpace != 3 && addressSpace != 4) return value; + + /* if array, extracts element type */ llvm::Type *type = pt->getElementType(); llvm::Type *typeEl = type; if (type->isArrayTy()) { typeEl = type->getArrayElementType(); - assert(!typeEl->isArrayTy()); /* currently we don't support array-of-array in uniform */ + if (typeEl->isArrayTy()) + Error(currentPos, "Currently \"nvptx\" target doesn't support array-of-array"); } /* convert elTy addrspace(3)* to i64* addrspace(3)* */ - llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, 3); - value = ctx->BitCastInst(value, Int64Ptr3, "cvtLog2Gen_i64ptr"); + llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, addressSpace); + value = ctx->BitCastInst(value, Int64Ptr3, "cvt2gen_i64ptr"); /* convert i64* addrspace(3) to i64* */ - llvm::Function *__cvt_loc2gen = m->module->getFunction("__cvt_loc2gen"); - std::vector __cvt_loc2gen_args; - __cvt_loc2gen_args.push_back(value); - value = ctx->CallInst(__cvt_loc2gen, NULL, __cvt_loc2gen_args, "cvtLoc2Gen"); + llvm::Function *__cvt2gen = m->module->getFunction( + addressSpace == 3 ? "__cvt_loc2gen" : "__cvt_const2gen"); + std::vector __cvt2gen_args; + __cvt2gen_args.push_back(value); + value = ctx->CallInst(__cvt2gen, NULL, __cvt2gen_args, "cvt2gen_call"); /* convert i64* to elTy* */ llvm::PointerType *typeElPtr = llvm::PointerType::get(typeEl, 0); @@ -1907,7 +1914,7 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) { if (name == NULL) name = LLVMGetName(value, "_ptr2int"); - value = lConvertLocal2GenericPtr(this, value); /* NVPTX : convert addrspace 3->0 before converting pointer */ + value = lConvertToGenericPtr(this, value, currentPos); /* NVPTX : convert to addrspace(0) */ llvm::Type *type = LLVMTypes::PointerIntType; llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock); AddDebugPos(inst); @@ -1941,7 +1948,7 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType, } } - value = lConvertLocal2GenericPtr(this, value); /* NVPTX : convert addrspace 3->0 before converting pointer */ + value = lConvertToGenericPtr(this, value, currentPos); /* NVPTX : convert to addrspace(0) */ llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock); AddDebugPos(inst); return inst; diff --git a/module.cpp b/module.cpp index 4b3ad351..b10a4daf 100644 --- a/module.cpp +++ b/module.cpp @@ -2353,25 +2353,25 @@ Module::CompileAndOutput(const char *srcFile, m = new Module(srcFile); if (m->CompileFile() == 0) { - - /* NVPTX: - * for PTX target replace '.' with '_' in all global variables - * a PTX identifier name must match [a-zA-Z$_][a-zA-Z$_0-9]* - */ - if (g->target->getISA() == Target::NVPTX) - { - llvm::Module::global_iterator - I = m->module->global_begin(), - E = m->module->global_end(); - for (; I != E; I++) + /* NVPTX: + * for PTX target replace '.' with '_' in all global variables + * a PTX identifier name must match [a-zA-Z$_][a-zA-Z$_0-9]* + */ + if (g->target->getISA() == Target::NVPTX) { - std::string name = I->getName(); - for (int i = 0; i < name.length(); i++) - if (name[i] == '.') - name[i] = '_'; - I->setName(name); + llvm::Module::global_iterator + I = m->module->global_begin(), + E = m->module->global_end(); + for (; I != E; I++) + { + std::string name = I->getName(); + for (int i = 0; i < name.length(); i++) + if (name[i] == '.') + name[i] = '_'; + I->setName(name); + } } - } + if (outputType == CXX) { if (target == NULL || strncmp(target, "generic-", 8) != 0) { Error(SourcePos(), "When generating C++ output, one of the \"generic-*\" " diff --git a/stmt.cpp b/stmt.cpp index e73fadd6..d498cb59 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -207,10 +207,16 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { if (sym->storageClass == SC_STATIC) { - if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType()) - Error(sym->pos, - "Non-constant static variable ""\"%s\" is not supported with ""\"nvptx\" target.", - sym->name.c_str()); + if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType()) + Error(sym->pos, + "Non-constant static variable ""\"%s\" is not supported with ""\"nvptx\" target.", + sym->name.c_str()); +#if 0 + if (g->target->getISA() == Target::NVPTX && sym->type->IsVaryingType()) + Error(sym->pos, + "const static varying variable ""\"%s\" is not supported with ""\"nvptx\" target.", + sym->name.c_str()); +#endif // For static variables, we need a compile-time constant value // for its initializer; if there's no initializer, we use a @@ -248,59 +254,93 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { llvm::Twine("static_") + llvm::Twine(sym->pos.first_line) + llvm::Twine("_") + sym->name.c_str()); -#if 0 - NULL, - llvm::GlobalVariable::NotThreadLocal, - 3); -#endif // Tell the FunctionEmitContext about the variable ctx->EmitVariableDebugInfo(sym); } else { - if (sym->type->IsArrayType() && sym->type->IsUniformType() - && g->target->getISA() == Target::NVPTX) + if (sym->type->IsArrayType() && + sym->type->IsUniformType() && + g->target->getISA() == Target::NVPTX) { - if (initExpr != NULL) - Error(initExpr->pos, - "It is not possible to initialize \"uniform\" array \"%s\" with \"nvptx\" target. " - "Use \"varying\" or \"const static uniform\" if possible.", - sym->name.c_str()); + /* deal with "const uniform" or "uniform" arrays for nvptx target */ + PerformanceWarning(sym->pos, + "\"uniform\" arrays might be slow with \"nvptx\" target. " + "Unless data sharing between program instances is required, use \"varying\" if possible."); + if (initExpr != NULL && !sym->type->IsConstType()) + Error(initExpr->pos, + "It is not possible to initialize non-constant \"uniform\" array \"%s\" with \"nvptx\" target. " + "Please use \"varying\" or \"const static uniform\".", + sym->name.c_str()); - PerformanceWarning(sym->pos, - "\"uniform\" arrays might be slow with \"nvptx\" target." - " Unless data sharing between program instances is required, use \"varying\" instead."); + llvm::Constant *cinit = NULL; + llvm::Type *llvmTypeUn; + int addressSpace; + if (sym->type->IsConstType()) + { + addressSpace = 4; /* constant */ + if (initExpr != NULL) { + if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false) + continue; + // FIXME: we only need this for function pointers; it was + // already done for atomic types and enums in + // DeclStmt::TypeCheck()... + if (dynamic_cast(initExpr) == NULL) { + initExpr = TypeConvertExpr(initExpr, sym->type, + "initializer"); + // FIXME: and this is only needed to re-establish + // constant-ness so that GetConstant below works for + // constant artithmetic expressions... + initExpr = ::Optimize(initExpr); + } - const ArrayType *at = CastType(sym->type); - const int nel = at->GetElementCount(); - /* we must scale # elements by 4, because a thread-block will run 4 warps - * or 128 threads. - * ***note-to-me***:please define these value (128threads/4warps) - * in nvptx-target definition - * instead of compile-time constants - */ - const int nel4 = nel*4; - ArrayType nat(at->GetElementType(), nel4); - llvm::Type *llvmType = nat.LLVMType(g->ctx); + cinit = initExpr->GetConstant(sym->type); + if (cinit == NULL) + Error(initExpr->pos, "Initializer for static variable " + "\"%s\" must be a constant.", sym->name.c_str()); + } + else + { + Error(sym->pos, "Missing initializer for const variable " + "\"%s\".", sym->name.c_str()); + } + if (cinit == NULL) + cinit = llvm::Constant::getNullValue(llvmType); + llvmTypeUn = llvmType; + } + else + { + addressSpace = 3; /* local */ + const ArrayType *at = CastType(sym->type); + const int nel = at->GetElementCount(); + /* we must scale # elements by 4, because a thread-block will run 4 warps + * or 128 threads. + * ***note-to-me***:please define these value (128threads/4warps) + * in nvptx-target definition + * instead of compile-time constants + */ + const int nel4 = nel*4; + ArrayType nat(at->GetElementType(), nel4); + llvmTypeUn = nat.LLVMType(g->ctx); + cinit = llvm::UndefValue::get(llvmTypeUn); + } - // addrspace(3) must be undefined at initialization - llvm::Constant *cinit = llvm::UndefValue::get(llvmType); - sym->storagePtr = - new llvm::GlobalVariable(*m->module, llvmType, - sym->type->IsConstType(), - llvm::GlobalValue::PrivateLinkage, - cinit, - llvm::Twine("local_") + - llvm::Twine(sym->pos.first_line) + - llvm::Twine("_") + sym->name.c_str(), - NULL, - llvm::GlobalVariable::NotThreadLocal, - /*AddressSpace=*/ 3); + sym->storagePtr = + new llvm::GlobalVariable(*m->module, llvmTypeUn, + sym->type->IsConstType(), + llvm::GlobalValue::PrivateLinkage, + cinit, + llvm::Twine("local_") + + llvm::Twine(sym->pos.first_line) + + llvm::Twine("_") + sym->name.c_str(), + NULL, + llvm::GlobalVariable::NotThreadLocal, + addressSpace); #if 0 - llvm::GlobalVariable *var = llvm::dyn_cast(sym->storagePtr); - var->setAlignment(128); + llvm::GlobalVariable *var = llvm::dyn_cast(sym->storagePtr); + var->setAlignment(128); #endif - // Tell the FunctionEmitContext about the variable - ctx->EmitVariableDebugInfo(sym); + // Tell the FunctionEmitContext about the variable + ctx->EmitVariableDebugInfo(sym); } else { @@ -311,6 +351,10 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { // this before the initializer stuff. ctx->EmitVariableDebugInfo(sym); + if (initExpr == 0 && sym->type->IsConstType()) + Error(sym->pos, "Missing initializer for const variable " + "\"%s\".", sym->name.c_str()); + // And then get it initialized... sym->parentFunction = ctx->GetFunction(); InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos); diff --git a/tests/uniform-1.ispc b/tests/uniform-1.ispc index 4c4768a5..dcf4eab0 100644 --- a/tests/uniform-1.ispc +++ b/tests/uniform-1.ispc @@ -24,14 +24,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) } task void result_task(uniform float RET[]) { - uniform float ret = -1; - switch (taskIndex) - { - case 0: ret = 496; break; - case 1: ret = 1520; break; - case 2: ret = 2544; break; - case 3: ret = 3568; break; - } + const uniform float ret = reduce_add(programIndex + programCount*taskIndex); if (programIndex < 32/4) RET[programCount/4*taskIndex + programIndex] = ret; }