added addrspace(4)/constant memory for const uniform declarations
This commit is contained in:
@@ -63,11 +63,17 @@ define i32 @__nctaid_z() nounwind readnone alwaysinline
|
||||
}
|
||||
;;;;;;;;
|
||||
declare i64* @llvm.nvvm.ptr.shared.to.gen.p0i64.p3i64(i64 addrspace(3)*)
|
||||
declare i64* @llvm.nvvm.ptr.shared.to.gen.p0i64.p4i64(i64 addrspace(4)*)
|
||||
define i64* @__cvt_loc2gen(i64 addrspace(3)*) nounwind readnone alwaysinline
|
||||
{
|
||||
%ptr = tail call i64* @llvm.nvvm.ptr.shared.to.gen.p0i64.p3i64(i64 addrspace(3)* %0)
|
||||
ret i64* %ptr
|
||||
}
|
||||
define i64* @__cvt_const2gen(i64 addrspace(4)*) nounwind readnone alwaysinline
|
||||
{
|
||||
%ptr = tail call i64* @llvm.nvvm.ptr.shared.to.gen.p0i64.p4i64(i64 addrspace(4)* %0)
|
||||
ret i64* %ptr
|
||||
}
|
||||
;;;;;;;;
|
||||
define i32 @__shfl_i32_nvptx(i32, i32) nounwind readnone alwaysinline
|
||||
{
|
||||
|
||||
@@ -4543,5 +4543,6 @@ declare i32 @__nctaid_x() nounwind readnone alwaysinline
|
||||
declare i32 @__nctaid_y() nounwind readnone alwaysinline
|
||||
declare i32 @__nctaid_z() nounwind readnone alwaysinline
|
||||
declare i64* @__cvt_loc2gen(i64 addrspace(3)*) nounwind readnone alwaysinline
|
||||
declare i64* @__cvt_const2gen(i64 addrspace(4)*) nounwind readnone alwaysinline
|
||||
')
|
||||
|
||||
|
||||
39
ctx.cpp
39
ctx.cpp
@@ -1833,8 +1833,9 @@ FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type,
|
||||
}
|
||||
|
||||
/* NVPTX:
|
||||
* this is a helper function which adds a warp offset to a base pointer in local memory
|
||||
* either in addrspace(3) or converted from addrspace(3) to addrspace(0)
|
||||
* this is a helper function which adds a warp offset to a base pointer
|
||||
* pointer must either be in local memory addrspace(3)
|
||||
* or the one just converted from addrspace(3) to addrspace(0) in lConvertToGenericPtr
|
||||
*/
|
||||
static llvm::Value* lAddWarpOffset(FunctionEmitContext *ctx, llvm::Value *value)
|
||||
{
|
||||
@@ -1848,7 +1849,8 @@ static llvm::Value* lAddWarpOffset(FunctionEmitContext *ctx, llvm::Value *value)
|
||||
}
|
||||
|
||||
/* NVPTX:
|
||||
* this function compute correct address in local memory for load/store operations*/
|
||||
* this function compute correct address in local memory for load/store operations
|
||||
*/
|
||||
static llvm::Value* lCorrectLocalPtr(FunctionEmitContext *ctx, llvm::Value* value)
|
||||
{
|
||||
assert(value->getType()->isPointerTy());
|
||||
@@ -1858,32 +1860,37 @@ static llvm::Value* lCorrectLocalPtr(FunctionEmitContext *ctx, llvm::Value* valu
|
||||
}
|
||||
|
||||
/* NVPTX:
|
||||
* this function converts pointers from addrspace(3) to addrspace(0)
|
||||
* this function converts a pointer in addrspace(3 or 4) to addrspace(0)
|
||||
*/
|
||||
static llvm::Value* lConvertLocal2GenericPtr(FunctionEmitContext *ctx, llvm::Value *value)
|
||||
static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value, const SourcePos ¤tPos)
|
||||
{
|
||||
if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX) return value;
|
||||
llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(value->getType());
|
||||
if (pt->getAddressSpace() != 3) return value;
|
||||
|
||||
/* if array, extracts its element type */
|
||||
/* make sure addrspace corresponds to either local or constant memories */
|
||||
const int addressSpace = pt->getAddressSpace();
|
||||
if (addressSpace != 3 && addressSpace != 4) return value;
|
||||
|
||||
/* if array, extracts element type */
|
||||
llvm::Type *type = pt->getElementType();
|
||||
llvm::Type *typeEl = type;
|
||||
if (type->isArrayTy())
|
||||
{
|
||||
typeEl = type->getArrayElementType();
|
||||
assert(!typeEl->isArrayTy()); /* currently we don't support array-of-array in uniform */
|
||||
if (typeEl->isArrayTy())
|
||||
Error(currentPos, "Currently \"nvptx\" target doesn't support array-of-array");
|
||||
}
|
||||
|
||||
/* convert elTy addrspace(3)* to i64* addrspace(3)* */
|
||||
llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, 3);
|
||||
value = ctx->BitCastInst(value, Int64Ptr3, "cvtLog2Gen_i64ptr");
|
||||
llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, addressSpace);
|
||||
value = ctx->BitCastInst(value, Int64Ptr3, "cvt2gen_i64ptr");
|
||||
|
||||
/* convert i64* addrspace(3) to i64* */
|
||||
llvm::Function *__cvt_loc2gen = m->module->getFunction("__cvt_loc2gen");
|
||||
std::vector<llvm::Value *> __cvt_loc2gen_args;
|
||||
__cvt_loc2gen_args.push_back(value);
|
||||
value = ctx->CallInst(__cvt_loc2gen, NULL, __cvt_loc2gen_args, "cvtLoc2Gen");
|
||||
llvm::Function *__cvt2gen = m->module->getFunction(
|
||||
addressSpace == 3 ? "__cvt_loc2gen" : "__cvt_const2gen");
|
||||
std::vector<llvm::Value *> __cvt2gen_args;
|
||||
__cvt2gen_args.push_back(value);
|
||||
value = ctx->CallInst(__cvt2gen, NULL, __cvt2gen_args, "cvt2gen_call");
|
||||
|
||||
/* convert i64* to elTy* */
|
||||
llvm::PointerType *typeElPtr = llvm::PointerType::get(typeEl, 0);
|
||||
@@ -1907,7 +1914,7 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
|
||||
if (name == NULL)
|
||||
name = LLVMGetName(value, "_ptr2int");
|
||||
|
||||
value = lConvertLocal2GenericPtr(this, value); /* NVPTX : convert addrspace 3->0 before converting pointer */
|
||||
value = lConvertToGenericPtr(this, value, currentPos); /* NVPTX : convert to addrspace(0) */
|
||||
llvm::Type *type = LLVMTypes::PointerIntType;
|
||||
llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock);
|
||||
AddDebugPos(inst);
|
||||
@@ -1941,7 +1948,7 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType,
|
||||
}
|
||||
}
|
||||
|
||||
value = lConvertLocal2GenericPtr(this, value); /* NVPTX : convert addrspace 3->0 before converting pointer */
|
||||
value = lConvertToGenericPtr(this, value, currentPos); /* NVPTX : convert to addrspace(0) */
|
||||
llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock);
|
||||
AddDebugPos(inst);
|
||||
return inst;
|
||||
|
||||
34
module.cpp
34
module.cpp
@@ -2353,25 +2353,25 @@ Module::CompileAndOutput(const char *srcFile,
|
||||
m = new Module(srcFile);
|
||||
if (m->CompileFile() == 0) {
|
||||
|
||||
|
||||
/* NVPTX:
|
||||
* for PTX target replace '.' with '_' in all global variables
|
||||
* a PTX identifier name must match [a-zA-Z$_][a-zA-Z$_0-9]*
|
||||
*/
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
llvm::Module::global_iterator
|
||||
I = m->module->global_begin(),
|
||||
E = m->module->global_end();
|
||||
for (; I != E; I++)
|
||||
/* NVPTX:
|
||||
* for PTX target replace '.' with '_' in all global variables
|
||||
* a PTX identifier name must match [a-zA-Z$_][a-zA-Z$_0-9]*
|
||||
*/
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
std::string name = I->getName();
|
||||
for (int i = 0; i < name.length(); i++)
|
||||
if (name[i] == '.')
|
||||
name[i] = '_';
|
||||
I->setName(name);
|
||||
llvm::Module::global_iterator
|
||||
I = m->module->global_begin(),
|
||||
E = m->module->global_end();
|
||||
for (; I != E; I++)
|
||||
{
|
||||
std::string name = I->getName();
|
||||
for (int i = 0; i < name.length(); i++)
|
||||
if (name[i] == '.')
|
||||
name[i] = '_';
|
||||
I->setName(name);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if (outputType == CXX) {
|
||||
if (target == NULL || strncmp(target, "generic-", 8) != 0) {
|
||||
Error(SourcePos(), "When generating C++ output, one of the \"generic-*\" "
|
||||
|
||||
138
stmt.cpp
138
stmt.cpp
@@ -207,10 +207,16 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
|
||||
if (sym->storageClass == SC_STATIC) {
|
||||
|
||||
if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType())
|
||||
Error(sym->pos,
|
||||
"Non-constant static variable ""\"%s\" is not supported with ""\"nvptx\" target.",
|
||||
sym->name.c_str());
|
||||
if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType())
|
||||
Error(sym->pos,
|
||||
"Non-constant static variable ""\"%s\" is not supported with ""\"nvptx\" target.",
|
||||
sym->name.c_str());
|
||||
#if 0
|
||||
if (g->target->getISA() == Target::NVPTX && sym->type->IsVaryingType())
|
||||
Error(sym->pos,
|
||||
"const static varying variable ""\"%s\" is not supported with ""\"nvptx\" target.",
|
||||
sym->name.c_str());
|
||||
#endif
|
||||
|
||||
// For static variables, we need a compile-time constant value
|
||||
// for its initializer; if there's no initializer, we use a
|
||||
@@ -248,59 +254,93 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
llvm::Twine("static_") +
|
||||
llvm::Twine(sym->pos.first_line) +
|
||||
llvm::Twine("_") + sym->name.c_str());
|
||||
#if 0
|
||||
NULL,
|
||||
llvm::GlobalVariable::NotThreadLocal,
|
||||
3);
|
||||
#endif
|
||||
// Tell the FunctionEmitContext about the variable
|
||||
ctx->EmitVariableDebugInfo(sym);
|
||||
}
|
||||
else {
|
||||
if (sym->type->IsArrayType() && sym->type->IsUniformType()
|
||||
&& g->target->getISA() == Target::NVPTX)
|
||||
if (sym->type->IsArrayType() &&
|
||||
sym->type->IsUniformType() &&
|
||||
g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
if (initExpr != NULL)
|
||||
Error(initExpr->pos,
|
||||
"It is not possible to initialize \"uniform\" array \"%s\" with \"nvptx\" target. "
|
||||
"Use \"varying\" or \"const static uniform\" if possible.",
|
||||
sym->name.c_str());
|
||||
/* deal with "const uniform" or "uniform" arrays for nvptx target */
|
||||
PerformanceWarning(sym->pos,
|
||||
"\"uniform\" arrays might be slow with \"nvptx\" target. "
|
||||
"Unless data sharing between program instances is required, use \"varying\" if possible.");
|
||||
if (initExpr != NULL && !sym->type->IsConstType())
|
||||
Error(initExpr->pos,
|
||||
"It is not possible to initialize non-constant \"uniform\" array \"%s\" with \"nvptx\" target. "
|
||||
"Please use \"varying\" or \"const static uniform\".",
|
||||
sym->name.c_str());
|
||||
|
||||
PerformanceWarning(sym->pos,
|
||||
"\"uniform\" arrays might be slow with \"nvptx\" target."
|
||||
" Unless data sharing between program instances is required, use \"varying\" instead.");
|
||||
llvm::Constant *cinit = NULL;
|
||||
llvm::Type *llvmTypeUn;
|
||||
int addressSpace;
|
||||
if (sym->type->IsConstType())
|
||||
{
|
||||
addressSpace = 4; /* constant */
|
||||
if (initExpr != NULL) {
|
||||
if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false)
|
||||
continue;
|
||||
// FIXME: we only need this for function pointers; it was
|
||||
// already done for atomic types and enums in
|
||||
// DeclStmt::TypeCheck()...
|
||||
if (dynamic_cast<ExprList *>(initExpr) == NULL) {
|
||||
initExpr = TypeConvertExpr(initExpr, sym->type,
|
||||
"initializer");
|
||||
// FIXME: and this is only needed to re-establish
|
||||
// constant-ness so that GetConstant below works for
|
||||
// constant artithmetic expressions...
|
||||
initExpr = ::Optimize(initExpr);
|
||||
}
|
||||
|
||||
const ArrayType *at = CastType<ArrayType>(sym->type);
|
||||
const int nel = at->GetElementCount();
|
||||
/* we must scale # elements by 4, because a thread-block will run 4 warps
|
||||
* or 128 threads.
|
||||
* ***note-to-me***:please define these value (128threads/4warps)
|
||||
* in nvptx-target definition
|
||||
* instead of compile-time constants
|
||||
*/
|
||||
const int nel4 = nel*4;
|
||||
ArrayType nat(at->GetElementType(), nel4);
|
||||
llvm::Type *llvmType = nat.LLVMType(g->ctx);
|
||||
cinit = initExpr->GetConstant(sym->type);
|
||||
if (cinit == NULL)
|
||||
Error(initExpr->pos, "Initializer for static variable "
|
||||
"\"%s\" must be a constant.", sym->name.c_str());
|
||||
}
|
||||
else
|
||||
{
|
||||
Error(sym->pos, "Missing initializer for const variable "
|
||||
"\"%s\".", sym->name.c_str());
|
||||
}
|
||||
if (cinit == NULL)
|
||||
cinit = llvm::Constant::getNullValue(llvmType);
|
||||
llvmTypeUn = llvmType;
|
||||
}
|
||||
else
|
||||
{
|
||||
addressSpace = 3; /* local */
|
||||
const ArrayType *at = CastType<ArrayType>(sym->type);
|
||||
const int nel = at->GetElementCount();
|
||||
/* we must scale # elements by 4, because a thread-block will run 4 warps
|
||||
* or 128 threads.
|
||||
* ***note-to-me***:please define these value (128threads/4warps)
|
||||
* in nvptx-target definition
|
||||
* instead of compile-time constants
|
||||
*/
|
||||
const int nel4 = nel*4;
|
||||
ArrayType nat(at->GetElementType(), nel4);
|
||||
llvmTypeUn = nat.LLVMType(g->ctx);
|
||||
cinit = llvm::UndefValue::get(llvmTypeUn);
|
||||
}
|
||||
|
||||
// addrspace(3) must be undefined at initialization
|
||||
llvm::Constant *cinit = llvm::UndefValue::get(llvmType);
|
||||
sym->storagePtr =
|
||||
new llvm::GlobalVariable(*m->module, llvmType,
|
||||
sym->type->IsConstType(),
|
||||
llvm::GlobalValue::PrivateLinkage,
|
||||
cinit,
|
||||
llvm::Twine("local_") +
|
||||
llvm::Twine(sym->pos.first_line) +
|
||||
llvm::Twine("_") + sym->name.c_str(),
|
||||
NULL,
|
||||
llvm::GlobalVariable::NotThreadLocal,
|
||||
/*AddressSpace=*/ 3);
|
||||
sym->storagePtr =
|
||||
new llvm::GlobalVariable(*m->module, llvmTypeUn,
|
||||
sym->type->IsConstType(),
|
||||
llvm::GlobalValue::PrivateLinkage,
|
||||
cinit,
|
||||
llvm::Twine("local_") +
|
||||
llvm::Twine(sym->pos.first_line) +
|
||||
llvm::Twine("_") + sym->name.c_str(),
|
||||
NULL,
|
||||
llvm::GlobalVariable::NotThreadLocal,
|
||||
addressSpace);
|
||||
#if 0
|
||||
llvm::GlobalVariable *var = llvm::dyn_cast<llvm::GlobalVariable>(sym->storagePtr);
|
||||
var->setAlignment(128);
|
||||
llvm::GlobalVariable *var = llvm::dyn_cast<llvm::GlobalVariable>(sym->storagePtr);
|
||||
var->setAlignment(128);
|
||||
#endif
|
||||
// Tell the FunctionEmitContext about the variable
|
||||
ctx->EmitVariableDebugInfo(sym);
|
||||
// Tell the FunctionEmitContext about the variable
|
||||
ctx->EmitVariableDebugInfo(sym);
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -311,6 +351,10 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// this before the initializer stuff.
|
||||
ctx->EmitVariableDebugInfo(sym);
|
||||
|
||||
if (initExpr == 0 && sym->type->IsConstType())
|
||||
Error(sym->pos, "Missing initializer for const variable "
|
||||
"\"%s\".", sym->name.c_str());
|
||||
|
||||
// And then get it initialized...
|
||||
sym->parentFunction = ctx->GetFunction();
|
||||
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
|
||||
|
||||
@@ -24,14 +24,7 @@ export void f_f(uniform float RET[], uniform float aFOO[])
|
||||
}
|
||||
task void result_task(uniform float RET[])
|
||||
{
|
||||
uniform float ret = -1;
|
||||
switch (taskIndex)
|
||||
{
|
||||
case 0: ret = 496; break;
|
||||
case 1: ret = 1520; break;
|
||||
case 2: ret = 2544; break;
|
||||
case 3: ret = 3568; break;
|
||||
}
|
||||
const uniform float ret = reduce_add(programIndex + programCount*taskIndex);
|
||||
if (programIndex < 32/4)
|
||||
RET[programCount/4*taskIndex + programIndex] = ret;
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user