a fix for .b0 ptx and some other code improvements
This commit is contained in:
4
ctx.cpp
4
ctx.cpp
@@ -1483,6 +1483,10 @@ FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
|
||||
llvm::Value *__warpszm1 = BinaryOperator(llvm::Instruction::Add, __warpsz, LLVMInt32(-1), "__warpszm1");
|
||||
llvm::Value *laneIdx = BinaryOperator(llvm::Instruction::And, __tid_x, __warpszm1, "__laneidx");
|
||||
llvm::Value *index = InsertInst(llvm::UndefValue::get(LLVMTypes::Int32VectorType), laneIdx, 0, "__laneIdxV");
|
||||
#if 0
|
||||
if (!is32bits)
|
||||
index = ZExtInst(index, LLVMTypes::Int64VectandType);
|
||||
#endif
|
||||
return index;
|
||||
}
|
||||
|
||||
|
||||
@@ -29,6 +29,7 @@ TAB [\t]*
|
||||
".visible" { return TOKEN_VISIBLE; }
|
||||
".global" { return TOKEN_GLOBAL; }
|
||||
".param" { return TOKEN_PARAM; }
|
||||
".b0" { LLSETTOKEN( TOKEN_B32);} /* fix for buggy llvm-ptx generator */
|
||||
".b8" { LLSETTOKEN( TOKEN_B8);}
|
||||
".b16" { LLSETTOKEN( TOKEN_B16);}
|
||||
".b32" { LLSETTOKEN( TOKEN_B32);}
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
#!/bin/sh
|
||||
|
||||
PTXSRC=$1
|
||||
PTXSRC=$1__tmp_ptx.ptx
|
||||
PTXCU=$1___tmp_ptx.cu
|
||||
PTXSH=$1___tmp_ptx.sh
|
||||
|
||||
@@ -9,6 +9,7 @@ NVCCPARM=${@:2}
|
||||
DEPTX=dePTX
|
||||
NVCC=nvcc
|
||||
|
||||
$(cat $1 | sed 's/\.b0/\.b32/g' > $PTXSRC) &&
|
||||
$DEPTX < $PTXSRC > $PTXCU &&
|
||||
$NVCC -arch=sm_35 -G -dc $NVCCPARM -dryrun $PTXCU 2>&1 | \
|
||||
sed 's/\#\$//g'| \
|
||||
|
||||
29
module.cpp
29
module.cpp
@@ -444,15 +444,36 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
|
||||
return;
|
||||
}
|
||||
|
||||
#if 1
|
||||
if (g->target->getISA() == Target::NVPTX &&
|
||||
#if 0
|
||||
!type->IsConstType() &&
|
||||
#endif
|
||||
#if 1
|
||||
at != NULL &&
|
||||
#endif
|
||||
type->IsVaryingType())
|
||||
{
|
||||
Error(pos, "Global \"varying\" variables are not yet supported in \"nvptx\" target.");
|
||||
return;
|
||||
}
|
||||
Error(pos, "Global \"varying\" variables are not yet supported in \"nvptx\" target.");
|
||||
return;
|
||||
#if 0
|
||||
int nel = 32; /* warp-size */
|
||||
if (type->IsArrayType())
|
||||
{
|
||||
const ArrayType *at = CastType<ArrayType>(type);
|
||||
/* we must scale # elements by 4, because a thread-block will run 4 warps
|
||||
* or 128 threads.
|
||||
* ***note-to-me***:please define these value (128threads/4warps)
|
||||
* in nvptx-target definition
|
||||
* instead of compile-time constants
|
||||
*/
|
||||
nel *= at->GetElementCount();
|
||||
assert (!type->IsSOAType());
|
||||
type = new ArrayType(at->GetElementType()->GetAsUniformType(), nel);
|
||||
}
|
||||
else
|
||||
type = new ArrayType(type->GetAsUniformType(), nel);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
llvm::Type *llvmType = type->LLVMType(g->ctx);
|
||||
|
||||
@@ -27,33 +27,24 @@ struct S operator/(struct S rr, struct S rv) {
|
||||
return c;
|
||||
}
|
||||
|
||||
#ifdef __NVPTX__
|
||||
uniform struct S _a[programCount];
|
||||
uniform struct S _b[programCount];
|
||||
uniform struct S _d[programCount];
|
||||
#define global_a _a[programIndex]
|
||||
#define global_b _b[programIndex]
|
||||
#define global_d _d[programIndex]
|
||||
#else
|
||||
struct S global_a;
|
||||
struct S global_b;
|
||||
struct S d;
|
||||
#endif
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
struct S a;
|
||||
struct S b;
|
||||
struct S d;
|
||||
int T = programIndex;
|
||||
global_a.a = aFOO[programIndex];
|
||||
global_b.a = -aFOO[programIndex];
|
||||
a.a = aFOO[programIndex];
|
||||
b.a = -aFOO[programIndex];
|
||||
if (programIndex == 3)
|
||||
off = 1;
|
||||
else
|
||||
off = 0;
|
||||
if (T % 2)
|
||||
global_d = global_a + global_b;
|
||||
d = a + b;
|
||||
else
|
||||
global_d = global_a / global_b;
|
||||
d = a / b;
|
||||
|
||||
RET[programIndex] = global_d.a;
|
||||
RET[programIndex] = d.a;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[4]) {
|
||||
|
||||
Reference in New Issue
Block a user