From 9b74e60185ca19769bcb8ef9a5f97e01d257993d Mon Sep 17 00:00:00 2001 From: Evghenii Date: Tue, 7 Jan 2014 14:29:55 +0100 Subject: [PATCH] added conversion from addrspace(3)/__local/__shared__ to addspace(0)/generic when PtrToInt is called --- builtins.cpp | 3 ++- builtins/target-nvptx.ll | 8 ++++++++ builtins/util.m4 | 1 + ctx.cpp | 28 ++++++++++++++++++++++++++++ ptxtestcc.sh | 4 ++-- stmt.cpp | 6 +++--- 6 files changed, 44 insertions(+), 6 deletions(-) diff --git a/builtins.cpp b/builtins.cpp index f6e3c409..cfef06fd 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -629,7 +629,8 @@ lSetInternalFunctions(llvm::Module *module) { "__nctaid_x", "__nctaid_y", "__nctaid_z", - "__warpsize" + "__warpsize", + "__cvt_loc2gen" }; int count = sizeof(names) / sizeof(names[0]); diff --git a/builtins/target-nvptx.ll b/builtins/target-nvptx.ll index db217e9a..83d02116 100644 --- a/builtins/target-nvptx.ll +++ b/builtins/target-nvptx.ll @@ -61,6 +61,14 @@ define i32 @__nctaid_z() nounwind readnone alwaysinline %nb = call i32 @llvm.nvvm.read.ptx.sreg.nctaid.z() ret i32 %nb } +;;;;;;;; +declare i64* @llvm.nvvm.ptr.shared.to.gen.p0i64.p3i64(i64 addrspace(3)*) +define i64* @__cvt_loc2gen(i64 addrspace(3)*) nounwind readnone alwaysinline +{ + %ptr = tail call i64* @llvm.nvvm.ptr.shared.to.gen.p0i64.p3i64(i64 addrspace(3)* %0) + ret i64* %ptr +} +;;;;;;;; define i32 @__shfl_i32_nvptx(i32, i32) nounwind readnone alwaysinline { %shfl = tail call i32 asm sideeffect "shfl.idx.b32 $0, $1, $2, 0x1f;", "=r,r,r"(i32 %0, i32 %1) nounwind readnone alwaysinline diff --git a/builtins/util.m4 b/builtins/util.m4 index 24b8b3df..873e8c4d 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -4542,5 +4542,6 @@ declare i32 @__ctaid_z() nounwind readnone alwaysinline declare i32 @__nctaid_x() nounwind readnone alwaysinline declare i32 @__nctaid_y() nounwind readnone alwaysinline declare i32 @__nctaid_z() nounwind readnone alwaysinline +declare i64* @__cvt_loc2gen(i64 addrspace(3)*) nounwind readnone alwaysinline ') diff --git a/ctx.cpp b/ctx.cpp index df1129b8..410deea0 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1846,6 +1846,21 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) { if (name == NULL) name = LLVMGetName(value, "_ptr2int"); + + if (value->getType()->isPointerTy() && g->target->getISA() == Target::NVPTX) + { + llvm::PointerType *pt = llvm::dyn_cast(value->getType()); + if (pt->getAddressSpace() == 3) + { + llvm::PointerType *PointerTy3 = llvm::PointerType::get(LLVMTypes::Int64Type, 3); + llvm::Value *cast = BitCastInst(value, PointerTy3, "__cvt_log2gen_i64ptr1_"); + llvm::Function *__cvt_loc2gen = m->module->getFunction("__cvt_loc2gen"); + std::vector __cvt_loc2gen_args; + __cvt_loc2gen_args.push_back(cast); + value = CallInst(__cvt_loc2gen, NULL, __cvt_loc2gen_args, "__cvt_loc2gen1_"); + } + } + llvm::Type *type = LLVMTypes::PointerIntType; llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock); AddDebugPos(inst); @@ -1879,6 +1894,19 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, llvm::Type *toType, } } + if (value->getType()->isPointerTy() && g->target->getISA() == Target::NVPTX) + { + llvm::PointerType *pt = llvm::dyn_cast(value->getType()); + if (pt->getAddressSpace() == 3) + { + llvm::PointerType *PointerTy3 = llvm::PointerType::get(LLVMTypes::Int64Type, 3); + llvm::Value *cast = BitCastInst(value, PointerTy3, "__cvt_log2gen_i64ptr2_"); + llvm::Function *__cvt_loc2gen = m->module->getFunction("__cvt_loc2gen"); + std::vector __cvt_loc2gen_args; + __cvt_loc2gen_args.push_back(cast); + value = CallInst(__cvt_loc2gen, NULL, __cvt_loc2gen_args, "__cvt_loc2gen2_"); + } + } llvm::Instruction *inst = new llvm::PtrToIntInst(value, toType, name, bblock); AddDebugPos(inst); return inst; diff --git a/ptxtestcc.sh b/ptxtestcc.sh index 80b672ca..2ba5e252 100755 --- a/ptxtestcc.sh +++ b/ptxtestcc.sh @@ -5,8 +5,8 @@ DIS=$HOME/usr/local/llvm/bin-3.2/bin/llvm-dis ISPC=ispc PTXCC=ptxcc PTXGEN=~/ptxgen -#$(cat $1 |grep -v 'width'|$ISPC --target=nvptx --emit-llvm -o -|$LLC -march=nvptx64 -mcpu=sm_35 -o $1.ptx) && \ -$(cat $1 |grep -v 'width'|$ISPC --target=nvptx --emit-llvm -o -|$DIS -o $1_32_ptx.ll && $PTXGEN $1_32_ptx.ll > $1.ptx) && \ +$(cat $1 |grep -v 'width'|$ISPC --target=nvptx --emit-llvm -o -|$LLC -march=nvptx64 -mcpu=sm_35 -o $1.ptx) && \ +#$(cat $1 |grep -v 'width'|$ISPC --target=nvptx --emit-llvm -o -|$DIS -o $1_32_ptx.ll && $PTXGEN $1_32_ptx.ll > $1.ptx) && \ $($PTXCC $1.ptx -Xptxas=-v -o $1.ptx.o) && \ nvcc -o test_nvptx test_static_nvptx.cpp examples_ptx/nvcc_helpers.cu examples_ptx/ispc_malloc.cpp $1.ptx.o -arch=sm_35 -Iexamples_ptx/ -D_CUDA_ -lcudadevrt -DTEST_SIG=$2 diff --git a/stmt.cpp b/stmt.cpp index 936141f5..fac16640 100644 --- a/stmt.cpp +++ b/stmt.cpp @@ -257,11 +257,10 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { ctx->EmitVariableDebugInfo(sym); } else { -#if 0 /* PTX:: shared/uniform data types are here */ if (sym->type->IsArrayType() && sym->type->IsUniformType() && g->target->getISA() == Target::NVPTX) { -#if 0 +#if 0 /* need to test if initializer works ... */ if (initExpr != NULL) Error(initExpr->pos, "Initializer for static variable " "\"%s\" must be a constant.", sym->name.c_str()); @@ -304,13 +303,14 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const { NULL, llvm::GlobalVariable::NotThreadLocal, /*AddressSpace=*/ 3); +#if 0 llvm::GlobalVariable *var = llvm::dyn_cast(sym->storagePtr); var->setAlignment(128); +#endif // Tell the FunctionEmitContext about the variable ctx->EmitVariableDebugInfo(sym); } else -#endif { // For non-static variables, allocate storage on the stack sym->storagePtr = ctx->AllocaInst(llvmType, sym->name.c_str());