diff --git a/ctx.cpp b/ctx.cpp index 9925d51c..6f7413b7 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1483,6 +1483,10 @@ FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) { llvm::Value *__warpszm1 = BinaryOperator(llvm::Instruction::Add, __warpsz, LLVMInt32(-1), "__warpszm1"); llvm::Value *laneIdx = BinaryOperator(llvm::Instruction::And, __tid_x, __warpszm1, "__laneidx"); llvm::Value *index = InsertInst(llvm::UndefValue::get(LLVMTypes::Int32VectorType), laneIdx, 0, "__laneIdxV"); +#if 0 + if (!is32bits) + index = ZExtInst(index, LLVMTypes::Int64VectandType); +#endif return index; } diff --git a/examples_ptx/ptxcc/ptx.ll b/examples_ptx/ptxcc/ptx.ll index b717e0cd..0c3a0fd4 100644 --- a/examples_ptx/ptxcc/ptx.ll +++ b/examples_ptx/ptxcc/ptx.ll @@ -29,6 +29,7 @@ TAB [\t]* ".visible" { return TOKEN_VISIBLE; } ".global" { return TOKEN_GLOBAL; } ".param" { return TOKEN_PARAM; } +".b0" { LLSETTOKEN( TOKEN_B32);} /* fix for buggy llvm-ptx generator */ ".b8" { LLSETTOKEN( TOKEN_B8);} ".b16" { LLSETTOKEN( TOKEN_B16);} ".b32" { LLSETTOKEN( TOKEN_B32);} diff --git a/examples_ptx/ptxcc/ptxcc b/examples_ptx/ptxcc/ptxcc index ae0fca91..73964be7 100755 --- a/examples_ptx/ptxcc/ptxcc +++ b/examples_ptx/ptxcc/ptxcc @@ -1,6 +1,6 @@ #!/bin/sh -PTXSRC=$1 +PTXSRC=$1__tmp_ptx.ptx PTXCU=$1___tmp_ptx.cu PTXSH=$1___tmp_ptx.sh @@ -9,6 +9,7 @@ NVCCPARM=${@:2} DEPTX=dePTX NVCC=nvcc +$(cat $1 | sed 's/\.b0/\.b32/g' > $PTXSRC) && $DEPTX < $PTXSRC > $PTXCU && $NVCC -arch=sm_35 -G -dc $NVCCPARM -dryrun $PTXCU 2>&1 | \ sed 's/\#\$//g'| \ diff --git a/module.cpp b/module.cpp index c41ca1da..1031667a 100644 --- a/module.cpp +++ b/module.cpp @@ -444,15 +444,36 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE return; } -#if 1 if (g->target->getISA() == Target::NVPTX && +#if 0 + !type->IsConstType() && +#endif +#if 1 at != NULL && +#endif type->IsVaryingType()) { - Error(pos, "Global \"varying\" variables are not yet supported in \"nvptx\" target."); - return; - } + Error(pos, "Global \"varying\" variables are not yet supported in \"nvptx\" target."); + return; +#if 0 + int nel = 32; /* warp-size */ + if (type->IsArrayType()) + { + const ArrayType *at = CastType(type); + /* we must scale # elements by 4, because a thread-block will run 4 warps + * or 128 threads. + * ***note-to-me***:please define these value (128threads/4warps) + * in nvptx-target definition + * instead of compile-time constants + */ + nel *= at->GetElementCount(); + assert (!type->IsSOAType()); + type = new ArrayType(at->GetElementType()->GetAsUniformType(), nel); + } + else + type = new ArrayType(type->GetAsUniformType(), nel); #endif + } llvm::Type *llvmType = type->LLVMType(g->ctx); diff --git a/tests/operators2.ispc b/tests/operators2.ispc index 0a462a9f..daef4ec6 100644 --- a/tests/operators2.ispc +++ b/tests/operators2.ispc @@ -27,33 +27,24 @@ struct S operator/(struct S rr, struct S rv) { return c; } -#ifdef __NVPTX__ -uniform struct S _a[programCount]; -uniform struct S _b[programCount]; -uniform struct S _d[programCount]; -#define global_a _a[programIndex] -#define global_b _b[programIndex] -#define global_d _d[programIndex] -#else -struct S global_a; -struct S global_b; -struct S d; -#endif export void f_f(uniform float RET[], uniform float aFOO[]) { + struct S a; + struct S b; + struct S d; int T = programIndex; - global_a.a = aFOO[programIndex]; - global_b.a = -aFOO[programIndex]; + a.a = aFOO[programIndex]; + b.a = -aFOO[programIndex]; if (programIndex == 3) off = 1; else off = 0; if (T % 2) - global_d = global_a + global_b; + d = a + b; else - global_d = global_a / global_b; + d = a / b; - RET[programIndex] = global_d.a; + RET[programIndex] = d.a; } export void result(uniform float RET[4]) {