experimental support for non-constant [non-static] uniform arrays mapped to addrspace(3)

This commit is contained in:
Evghenii
2014-01-08 11:06:14 +01:00
parent f0b49995e5
commit 0a66f17897
5 changed files with 100 additions and 46 deletions

63
ctx.cpp
View File

@@ -1832,34 +1832,65 @@ FunctionEmitContext::BitCastInst(llvm::Value *value, llvm::Type *type,
return inst;
}
static llvm::Value* lCorrectLocalPtr(FunctionEmitContext *ctx, llvm::Value* value)
/* NVPTX:
* this is a helper function which adds a warp offset to a base pointer in local memory
* either in addrspace(3) or converted from addrspace(3) to addrspace(0)
*/
static llvm::Value* lAddWarpOffset(FunctionEmitContext *ctx, llvm::Value *value)
{
assert(value->getType()->isPointerTy());
llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(value->getType());
if (pt->getAddressSpace() != 3) return value;
llvm::Function *func_tid_x = m->module->getFunction("__tid_x");
llvm::Function *func_warpsz = m->module->getFunction("__warpsize");
llvm::Value *__tid_x = ctx->CallInst(func_tid_x, NULL, std::vector<llvm::Value*>(), "tidCorrectLocalPtr");
llvm::Value *__warpsz = ctx->CallInst(func_warpsz, NULL, std::vector<llvm::Value*>(), "warpSzCorrectLocaLPtr");
llvm::Value *_mwarpsz = ctx->BinaryOperator(llvm::Instruction::Sub, LLVMInt32(0), __warpsz, "mwarpSzCorrectLocalPtr");
llvm::Value *__offset = ctx->BinaryOperator(llvm::Instruction::And, __tid_x, _mwarpsz, "offsetCorrectLocalPtr");
return llvm::GetElementPtrInst::Create(value, __offset, "__gepCorrectLocalPtr", ctx->GetCurrentBasicBlock());
return llvm::GetElementPtrInst::Create(value, __offset, "warpOffset_gep", ctx->GetCurrentBasicBlock());
}
static llvm::Value* lConvertLocalToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value)
/* NVPTX:
* this function compute correct address in local memory for load/store operations*/
static llvm::Value* lCorrectLocalPtr(FunctionEmitContext *ctx, llvm::Value* value)
{
assert(value->getType()->isPointerTy());
llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(value->getType());
if (g->target->getISA() != Target::NVPTX || pt->getAddressSpace() != 3) return value;
return lAddWarpOffset(ctx, value);
}
/* NVPTX:
* this function converts pointers from addrspace(3) to addrspace(0)
*/
static llvm::Value* lConvertLocal2GenericPtr(FunctionEmitContext *ctx, llvm::Value *value)
{
if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX) return value;
llvm::PointerType *pt = llvm::dyn_cast<llvm::PointerType>(value->getType());
if (pt->getAddressSpace() != 3) return value;
llvm::PointerType *PointerTy = llvm::PointerType::get(LLVMTypes::Int64Type, 3);
value = ctx->BitCastInst(value, PointerTy, "cvtLog2Gen_i64ptr");
value = lCorrectLocalPtr(ctx, value);
/* if array, extracts its element type */
llvm::Type *type = pt->getElementType();
llvm::Type *typeEl = type;
if (type->isArrayTy())
{
typeEl = type->getArrayElementType();
assert(!typeEl->isArrayTy()); /* currently we don't support array-of-array in uniform */
}
/* convert elTy addrspace(3)* to i64* addrspace(3)* */
llvm::PointerType *Int64Ptr3 = llvm::PointerType::get(LLVMTypes::Int64Type, 3);
value = ctx->BitCastInst(value, Int64Ptr3, "cvtLog2Gen_i64ptr");
/* convert i64* addrspace(3) to i64* */
llvm::Function *__cvt_loc2gen = m->module->getFunction("__cvt_loc2gen");
std::vector<llvm::Value *> __cvt_loc2gen_args;
__cvt_loc2gen_args.push_back(value);
return ctx->CallInst(__cvt_loc2gen, NULL, __cvt_loc2gen_args, "cvtLoc2Gen");
value = ctx->CallInst(__cvt_loc2gen, NULL, __cvt_loc2gen_args, "cvtLoc2Gen");
/* convert i64* to elTy* */
llvm::PointerType *typeElPtr = llvm::PointerType::get(typeEl, 0);
value = ctx->BitCastInst(value, typeElPtr, "cvtLoc2Gen_i642ptr");
/* add warp offset to the pointer */
return lAddWarpOffset(ctx, value);
}
llvm::Value *
@@ -1876,7 +1907,7 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
if (name == NULL)
name = LLVMGetName(value, "_ptr2int");
value = lConvertLocalToGenericPtr(this, value); /* NVPTX */
value = lConvertLocal2GenericPtr(this, value); /* NVPTX : convert addrspace 3->0 before converting pointer */
llvm::Type *type = LLVMTypes::PointerIntType;
llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock);
AddDebugPos(inst);
@@ -1910,7 +1941,7 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType,
}
}
value = lConvertLocalToGenericPtr(this, value); /* NVPTX */
value = lConvertLocal2GenericPtr(this, value); /* NVPTX : convert addrspace 3->0 before converting pointer */
llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock);
AddDebugPos(inst);
return inst;
@@ -2489,7 +2520,7 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, const char *name) {
if (name == NULL)
name = LLVMGetName(ptr, "_load");
ptr = lCorrectLocalPtr(this, ptr); /* NVPTX */
ptr = lCorrectLocalPtr(this, ptr); /* NVPTX: correct addrspace(3) pointer before load/store */
llvm::LoadInst *inst = new llvm::LoadInst(ptr, name, bblock);
if (g->opt.forceAlignedMemory &&
@@ -2622,7 +2653,7 @@ FunctionEmitContext::LoadInst(llvm::Value *ptr, llvm::Value *mask,
// it's totally unaligned. (This shouldn't make any difference
// vs the proper alignment in practice.)
align = 1;
ptr = lCorrectLocalPtr(this, ptr); /* NVPTX */
ptr = lCorrectLocalPtr(this, ptr); /* NVPTX: correct addrspace(3) pointer before load/store */
llvm::Instruction *inst = new llvm::LoadInst(ptr, name,
false /* not volatile */,
align, bblock);
@@ -3050,7 +3081,7 @@ FunctionEmitContext::StoreInst(llvm::Value *value, llvm::Value *ptr) {
llvm::dyn_cast<llvm::PointerType>(ptr->getType());
AssertPos(currentPos, pt != NULL);
ptr = lCorrectLocalPtr(this, ptr); /* NVPTX */
ptr = lCorrectLocalPtr(this, ptr); /* NVPTX: correct addrspace(3) pointer before load/store */
llvm::StoreInst *inst = new llvm::StoreInst(value, ptr, bblock);
if (g->opt.forceAlignedMemory &&

View File

@@ -402,6 +402,13 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
return;
}
#if 0 /* NVPTX */
if (baseType->IsUniformType())
{
fprintf(stderr, " detected uniform array of size= %d array= %s\n" ,arraySize,
baseType->IsArrayType() ? " true " : " false ");
}
#endif
const Type *arrayType = new ArrayType(baseType, arraySize);
if (child != NULL) {
child->InitFromType(arrayType, ds);

View File

@@ -2338,6 +2338,28 @@ Module::CompileAndOutput(const char *srcFile,
m = new Module(srcFile);
if (m->CompileFile() == 0) {
/* NVPTX:
* for PTX target replace '.' with '_' in all global variables
* a PTX identifier name must match [a-zA-Z$_][a-zA-Z$_0-9]*
*/
if (g->target->getISA() == Target::NVPTX)
{
llvm::Module::global_iterator
I = m->module->global_begin(),
E = m->module->global_end();
for (; I != E; I++)
{
std::string name = I->getName();
for (int i = 0; i < name.length(); i++)
if (name[i] == '.')
name[i] = '_';
I->setName(name);
fprintf(stderr, " %s \n", name.c_str());
}
}
if (outputType == CXX) {
if (target == NULL || strncmp(target, "generic-", 8) != 0) {
Error(SourcePos(), "When generating C++ output, one of the \"generic-*\" "

View File

@@ -502,6 +502,7 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createCFGSimplificationPass());
optPM.add(llvm::createGlobalDCEPass());
}
#if 1
else {
llvm::PassRegistry *registry = llvm::PassRegistry::getPassRegistry();
llvm::initializeCore(*registry);
@@ -685,6 +686,7 @@ Optimize(llvm::Module *module, int optLevel) {
// Should be the last
optPM.add(CreateFixBooleanSelectPass(), 400);
}
#endif
// Finish up by making sure we didn't mess anything up in the IR along
// the way.

View File

@@ -260,44 +260,36 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
if (sym->type->IsArrayType() && sym->type->IsUniformType()
&& g->target->getISA() == Target::NVPTX)
{
#if 0 /* need to test if initializer works ... */
if (initExpr != NULL)
Error(initExpr->pos, "Initializer for static variable "
"\"%s\" must be a constant.", sym->name.c_str());
#endif
Error(initExpr->pos,
"It is not possible to initialize \"uniform\" array \"%s\" with \"nvptx\" target. "
"Use \"varying\" or \"const static uniform\" if possible.",
sym->name.c_str());
PerformanceWarning(sym->pos,
"\"uniform\" arrays may be slow with \"nvptx\" target. Use \"varying\" if possible.");
"\"uniform\" arrays might be slow with \"nvptx\" target."
" Unless data sharing between program instances is required, use \"varying\" instead.");
llvm::Constant *cinit = NULL;
if (initExpr != NULL)
{
if (PossiblyResolveFunctionOverloads(initExpr, sym->type) == false)
continue;
// FIXME: we only need this for function pointers; it was
// already done for atomic types and enums in
// DeclStmt::TypeCheck()...
if (dynamic_cast<ExprList *>(initExpr) == NULL) {
initExpr = TypeConvertExpr(initExpr, sym->type,
"initializer");
// FIXME: and this is only needed to re-establish
// constant-ness so that GetConstant below works for
// constant artithmetic expressions...
initExpr = ::Optimize(initExpr);
}
const ArrayType *at = CastType<ArrayType>(sym->type);
const int nel = at->GetElementCount();
/* we must scale # elements by 4, because a thread-block will run 4 warps
* or 128 threads.
* ***note-to-me***:please define these value (128threads/4warps)
* in nvptx-target definition
* instead of compile-time constants
*/
const int nel4 = nel*4;
ArrayType nat(at->GetElementType(), nel4);
llvm::Type *llvmType = nat.LLVMType(g->ctx);
cinit = initExpr->GetConstant(sym->type);
}
if (cinit == NULL)
cinit = llvm::Constant::getNullValue(llvmType);
// Allocate space for the static variable in global scope, so
// that it persists across function calls
// addrspace(3) must be undefined at initialization
llvm::Constant *cinit = llvm::UndefValue::get(llvmType);
sym->storagePtr =
new llvm::GlobalVariable(*m->module, llvmType,
sym->type->IsConstType(),
llvm::GlobalValue::InternalLinkage, cinit,
llvm::Twine("local") +
llvm::GlobalValue::PrivateLinkage,
cinit,
llvm::Twine("local_") +
llvm::Twine(sym->pos.first_line) +
llvm::Twine("_") + sym->name.c_str(),
NULL,