fixed foreach_unique and local_atomics
This commit is contained in:
@@ -481,6 +481,7 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__min_varying_uint32",
|
"__min_varying_uint32",
|
||||||
"__min_varying_uint64",
|
"__min_varying_uint64",
|
||||||
"__movmsk",
|
"__movmsk",
|
||||||
|
"__movmsk_ptx",
|
||||||
"__new_uniform_32rt",
|
"__new_uniform_32rt",
|
||||||
"__new_uniform_64rt",
|
"__new_uniform_64rt",
|
||||||
"__new_varying32_32rt",
|
"__new_varying32_32rt",
|
||||||
|
|||||||
@@ -722,15 +722,13 @@ svml_stubs(double,d,WIDTH)
|
|||||||
|
|
||||||
define i64 @__movmsk(<1 x i1>) nounwind readnone alwaysinline {
|
define i64 @__movmsk(<1 x i1>) nounwind readnone alwaysinline {
|
||||||
%v = extractelement <1 x i1> %0, i32 0
|
%v = extractelement <1 x i1> %0, i32 0
|
||||||
;; if 0
|
|
||||||
;; this one fails with ./tests/popcnt-4.ispc and others ...
|
|
||||||
;; %v0 = call i32 @__ballot_nvptx(i1 %v)
|
|
||||||
;; %v64 = zext i32 %v0 to i64
|
|
||||||
|
|
||||||
;; else
|
|
||||||
;; this one just copies mask
|
|
||||||
%v64 = zext i1 %v to i64
|
%v64 = zext i1 %v to i64
|
||||||
;; endif
|
ret i64 %v64
|
||||||
|
}
|
||||||
|
define i64 @__movmsk_ptx(<1 x i1>) nounwind readnone alwaysinline {
|
||||||
|
%v = extractelement <1 x i1> %0, i32 0
|
||||||
|
%v0 = call i32 @__ballot_nvptx(i1 %v)
|
||||||
|
%v64 = zext i32 %v0 to i64
|
||||||
ret i64 %v64
|
ret i64 %v64
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|||||||
22
ctx.cpp
22
ctx.cpp
@@ -1374,10 +1374,11 @@ FunctionEmitContext::None(llvm::Value *mask) {
|
|||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
FunctionEmitContext::LaneMask(llvm::Value *v) {
|
FunctionEmitContext::LaneMask(llvm::Value *v) {
|
||||||
|
const char *__movmsk = g->target->getISA() == Target::NVPTX ? "__movmsk_ptx" : "__movmsk";
|
||||||
// Call the target-dependent movmsk function to turn the vector mask
|
// Call the target-dependent movmsk function to turn the vector mask
|
||||||
// into an i64 value
|
// into an i64 value
|
||||||
std::vector<Symbol *> mm;
|
std::vector<Symbol *> mm;
|
||||||
m->symbolTable->LookupFunction("__movmsk", &mm);
|
m->symbolTable->LookupFunction(__movmsk, &mm);
|
||||||
if (g->target->getMaskBitCount() == 1)
|
if (g->target->getMaskBitCount() == 1)
|
||||||
AssertPos(currentPos, mm.size() == 1);
|
AssertPos(currentPos, mm.size() == 1);
|
||||||
else
|
else
|
||||||
@@ -1389,6 +1390,18 @@ FunctionEmitContext::LaneMask(llvm::Value *v) {
|
|||||||
return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
|
return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
llvm::Value*
|
||||||
|
FunctionEmitContext::Insert(llvm::Value *vector, llvm::Value *lane, llvm::Value *scalar)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Value*
|
||||||
|
FunctionEmitContext::Extract(llvm::Value *vector, llvm::Value *lane)
|
||||||
|
{
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
||||||
@@ -1410,8 +1423,6 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
|||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
||||||
if (1 || g->target->getISA() != Target::NVPTX)
|
|
||||||
{
|
|
||||||
llvm::SmallVector<llvm::Constant*, 16> array;
|
llvm::SmallVector<llvm::Constant*, 16> array;
|
||||||
for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
|
for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
|
||||||
llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i);
|
llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i);
|
||||||
@@ -1422,8 +1433,8 @@ FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
|||||||
|
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
else
|
llvm::Value *
|
||||||
{ /* this calls __tid_x() & __warpsize */
|
FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
|
||||||
llvm::Function *func_tid_x = m->module->getFunction("__tid_x");
|
llvm::Function *func_tid_x = m->module->getFunction("__tid_x");
|
||||||
llvm::Function *func_warpsz = m->module->getFunction("__warpsize");
|
llvm::Function *func_warpsz = m->module->getFunction("__warpsize");
|
||||||
llvm::Value *__tid_x = CallInst(func_tid_x, NULL, std::vector<llvm::Value*>(), "laneIdxForEach");
|
llvm::Value *__tid_x = CallInst(func_tid_x, NULL, std::vector<llvm::Value*>(), "laneIdxForEach");
|
||||||
@@ -1433,7 +1444,6 @@ FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
|||||||
llvm::Value *index = InsertInst(llvm::UndefValue::get(LLVMTypes::Int32VectorType), laneIdx, 0, "__laneIdxV");
|
llvm::Value *index = InsertInst(llvm::UndefValue::get(LLVMTypes::Int32VectorType), laneIdx, 0, "__laneIdxV");
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
|
|||||||
8
ctx.h
8
ctx.h
@@ -291,6 +291,13 @@ public:
|
|||||||
of the mask is on. */
|
of the mask is on. */
|
||||||
llvm::Value *LaneMask(llvm::Value *mask);
|
llvm::Value *LaneMask(llvm::Value *mask);
|
||||||
|
|
||||||
|
|
||||||
|
/** Issues a call to __insert_int8/int16/int32/int64/float/double */
|
||||||
|
llvm::Value* Insert(llvm::Value *vector, llvm::Value *lane, llvm::Value *scalar);
|
||||||
|
/** Issues a call to __extract_int8/int16/int32/int64/float/double */
|
||||||
|
llvm::Value* Extract(llvm::Value *vector, llvm::Value *lane);
|
||||||
|
|
||||||
|
|
||||||
/** Given two masks of type LLVMTypes::MaskType, return an i1 value
|
/** Given two masks of type LLVMTypes::MaskType, return an i1 value
|
||||||
that indicates whether the two masks are equal. */
|
that indicates whether the two masks are equal. */
|
||||||
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
|
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
|
||||||
@@ -298,6 +305,7 @@ public:
|
|||||||
/** Generate ConstantVector, which contains ProgramIndex, i.e.
|
/** Generate ConstantVector, which contains ProgramIndex, i.e.
|
||||||
< i32 0, i32 1, i32 2, i32 3> */
|
< i32 0, i32 1, i32 2, i32 3> */
|
||||||
llvm::Value *ProgramIndexVector(bool is32bits = true);
|
llvm::Value *ProgramIndexVector(bool is32bits = true);
|
||||||
|
llvm::Value *ProgramIndexVectorPTX(bool is32bits = true);
|
||||||
|
|
||||||
/** Given a string, create an anonymous global variable to hold its
|
/** Given a string, create an anonymous global variable to hold its
|
||||||
value and return the pointer to the string. */
|
value and return the pointer to the string. */
|
||||||
|
|||||||
@@ -2057,11 +2057,10 @@ static inline TYPE atomic_##NAME##_local(uniform TYPE * uniform ptr, TYPE value)
|
|||||||
} \
|
} \
|
||||||
static inline TYPE atomic_##NAME##_local(uniform TYPE * p, TYPE value) { \
|
static inline TYPE atomic_##NAME##_local(uniform TYPE * p, TYPE value) { \
|
||||||
TYPE ret; \
|
TYPE ret; \
|
||||||
uniform TYPE * uniform ptrs[programCount]; \
|
|
||||||
ptrs[programIndex] = p; \
|
|
||||||
foreach_active (i) { \
|
foreach_active (i) { \
|
||||||
ret = insert(ret, i, *ptrs[i]); \
|
uniform TYPE * uniform ptr = (uniform TYPE * uniform)extract((int64)p, i); \
|
||||||
*ptrs[i] = OPFUNC(*ptrs[i], extract(value, i)); \
|
ret = insert(ret, i, *ptr); \
|
||||||
|
*ptr = OPFUNC(*ptr, extract(value, i)); \
|
||||||
} \
|
} \
|
||||||
return ret; \
|
return ret; \
|
||||||
}
|
}
|
||||||
|
|||||||
5
stmt.cpp
5
stmt.cpp
@@ -2243,7 +2243,8 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// math...)
|
// math...)
|
||||||
|
|
||||||
// Get the "program index" vector value
|
// Get the "program index" vector value
|
||||||
llvm::Value *programIndex = ctx->ProgramIndexVector();
|
llvm::Value *programIndex = g->target->getISA() == Target::NVPTX ?
|
||||||
|
ctx->ProgramIndexVectorPTX() : ctx->ProgramIndexVector();
|
||||||
|
|
||||||
// And smear the current lane out to a vector
|
// And smear the current lane out to a vector
|
||||||
llvm::Value *firstSet32 =
|
llvm::Value *firstSet32 =
|
||||||
@@ -2354,6 +2355,8 @@ ForeachUniqueStmt::ForeachUniqueStmt(const char *iterName, Expr *e,
|
|||||||
sym = m->symbolTable->LookupVariable(iterName);
|
sym = m->symbolTable->LookupVariable(iterName);
|
||||||
expr = e;
|
expr = e;
|
||||||
stmts = s;
|
stmts = s;
|
||||||
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
|
Error(pos, "\"foreach_unique\" is not yetsupported with \"nvptx\" target.");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user