a fix for .b0 ptx and some other code improvements

This commit is contained in:
Evghenii
2014-01-27 08:51:05 +01:00
parent 52691fbb52
commit 1c2dbd6a27
5 changed files with 40 additions and 22 deletions

View File

@@ -1483,6 +1483,10 @@ FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
llvm::Value *__warpszm1 = BinaryOperator(llvm::Instruction::Add, __warpsz, LLVMInt32(-1), "__warpszm1");
llvm::Value *laneIdx = BinaryOperator(llvm::Instruction::And, __tid_x, __warpszm1, "__laneidx");
llvm::Value *index = InsertInst(llvm::UndefValue::get(LLVMTypes::Int32VectorType), laneIdx, 0, "__laneIdxV");
#if 0
if (!is32bits)
index = ZExtInst(index, LLVMTypes::Int64VectandType);
#endif
return index;
}

View File

@@ -29,6 +29,7 @@ TAB [\t]*
".visible" { return TOKEN_VISIBLE; }
".global" { return TOKEN_GLOBAL; }
".param" { return TOKEN_PARAM; }
".b0" { LLSETTOKEN( TOKEN_B32);} /* fix for buggy llvm-ptx generator */
".b8" { LLSETTOKEN( TOKEN_B8);}
".b16" { LLSETTOKEN( TOKEN_B16);}
".b32" { LLSETTOKEN( TOKEN_B32);}

View File

@@ -1,6 +1,6 @@
#!/bin/sh
PTXSRC=$1
PTXSRC=$1__tmp_ptx.ptx
PTXCU=$1___tmp_ptx.cu
PTXSH=$1___tmp_ptx.sh
@@ -9,6 +9,7 @@ NVCCPARM=${@:2}
DEPTX=dePTX
NVCC=nvcc
$(cat $1 | sed 's/\.b0/\.b32/g' > $PTXSRC) &&
$DEPTX < $PTXSRC > $PTXCU &&
$NVCC -arch=sm_35 -G -dc $NVCCPARM -dryrun $PTXCU 2>&1 | \
sed 's/\#\$//g'| \

View File

@@ -444,15 +444,36 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
return;
}
#if 1
if (g->target->getISA() == Target::NVPTX &&
#if 0
!type->IsConstType() &&
#endif
#if 1
at != NULL &&
#endif
type->IsVaryingType())
{
Error(pos, "Global \"varying\" variables are not yet supported in \"nvptx\" target.");
return;
}
Error(pos, "Global \"varying\" variables are not yet supported in \"nvptx\" target.");
return;
#if 0
int nel = 32; /* warp-size */
if (type->IsArrayType())
{
const ArrayType *at = CastType<ArrayType>(type);
/* we must scale # elements by 4, because a thread-block will run 4 warps
* or 128 threads.
* ***note-to-me***:please define these value (128threads/4warps)
* in nvptx-target definition
* instead of compile-time constants
*/
nel *= at->GetElementCount();
assert (!type->IsSOAType());
type = new ArrayType(at->GetElementType()->GetAsUniformType(), nel);
}
else
type = new ArrayType(type->GetAsUniformType(), nel);
#endif
}
llvm::Type *llvmType = type->LLVMType(g->ctx);

View File

@@ -27,33 +27,24 @@ struct S operator/(struct S rr, struct S rv) {
return c;
}
#ifdef __NVPTX__
uniform struct S _a[programCount];
uniform struct S _b[programCount];
uniform struct S _d[programCount];
#define global_a _a[programIndex]
#define global_b _b[programIndex]
#define global_d _d[programIndex]
#else
struct S global_a;
struct S global_b;
struct S d;
#endif
export void f_f(uniform float RET[], uniform float aFOO[]) {
struct S a;
struct S b;
struct S d;
int T = programIndex;
global_a.a = aFOO[programIndex];
global_b.a = -aFOO[programIndex];
a.a = aFOO[programIndex];
b.a = -aFOO[programIndex];
if (programIndex == 3)
off = 1;
else
off = 0;
if (T % 2)
global_d = global_a + global_b;
d = a + b;
else
global_d = global_a / global_b;
d = a / b;
RET[programIndex] = global_d.a;
RET[programIndex] = d.a;
}
export void result(uniform float RET[4]) {