MAJOR CHANGE--- STOP WITH THIS BRANCH--
This commit is contained in:
8
Makefile
8
Makefile
@@ -144,7 +144,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
|
||||
type.cpp util.cpp
|
||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||
opt.h stmt.h sym.h type.h util.h
|
||||
TARGETS=nvptx64 avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
||||
TARGETS=nvptx avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
||||
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
|
||||
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
|
||||
ifneq ($(ARM_ENABLED), 0)
|
||||
@@ -254,15 +254,15 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
|
||||
@echo Compiling $<
|
||||
$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/builtins-dispatch.cpp: builtins/dispatch.ll builtins/util.m4 builtins/util_ptx.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
||||
objs/builtins-dispatch.cpp: builtins/dispatch.ll builtins/util.m4 builtins/util-nvptx.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
||||
@echo Creating C++ source from builtins definition file $<
|
||||
m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX $< | python bitcode2cpp.py $< > $@
|
||||
|
||||
objs/builtins-%-32bit.cpp: builtins/%.ll builtins/util.m4 builtins/util_ptx.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
||||
objs/builtins-%-32bit.cpp: builtins/%.ll builtins/util.m4 builtins/util-nvptx.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
||||
@echo Creating C++ source from builtins definition file $< \(32 bit version\)
|
||||
m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX -DRUNTIME=32 $< | python bitcode2cpp.py $< 32bit > $@
|
||||
|
||||
objs/builtins-%-64bit.cpp: builtins/%.ll builtins/util.m4 builtins/util_ptx.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
||||
objs/builtins-%-64bit.cpp: builtins/%.ll builtins/util.m4 builtins/util-nvptx.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
||||
@echo Creating C++ source from builtins definition file $< \(64 bit version\)
|
||||
m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX -DRUNTIME=64 $< | python bitcode2cpp.py $< 64bit > $@
|
||||
|
||||
|
||||
12
builtins.cpp
12
builtins.cpp
@@ -693,9 +693,9 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
||||
if (g->target->getISA() != Target::NEON32 &&
|
||||
g->target->getISA() != Target::NEON16 &&
|
||||
g->target->getISA() != Target::NEON8 &&
|
||||
g->target->getISA() != Target::NVPTX64)
|
||||
g->target->getISA() != Target::NVPTX)
|
||||
#else
|
||||
if (g->target->getISA() != Target::NVPTX64)
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
#endif // !__arm__
|
||||
{
|
||||
Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
|
||||
@@ -858,14 +858,14 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
// Next, add the target's custom implementations of the various needed
|
||||
// builtin functions (e.g. __masked_store_32(), etc).
|
||||
switch (g->target->getISA()) {
|
||||
case Target::NVPTX64:
|
||||
case Target::NVPTX:
|
||||
{
|
||||
if (runtime32) {
|
||||
fprintf(stderr, "W're sorry, but only 64bit targets are supported at this moment .. \n");
|
||||
fprintf(stderr, "Unforetunatly 32bit targets are supported at the moment .. \n");
|
||||
assert(0);
|
||||
}
|
||||
else {
|
||||
EXPORT_MODULE(builtins_bitcode_nvptx64_64bit);
|
||||
EXPORT_MODULE(builtins_bitcode_nvptx_64bit);
|
||||
}
|
||||
break;
|
||||
};
|
||||
@@ -1138,7 +1138,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
}
|
||||
|
||||
// define the 'programCount' builtin variable
|
||||
if (!g->target->isPTX())
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
|
||||
}
|
||||
|
||||
@@ -105,15 +105,9 @@ define i32 @__lanemask_lt_nvptx() nounwind readnone alwaysinline
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; tasking
|
||||
|
||||
define i8* @ISPCAlloc(i8**, i64, i32) nounwind alwaysinline
|
||||
{
|
||||
%ptr = inttoptr i64 1 to i8*
|
||||
ret i8* %ptr
|
||||
}
|
||||
|
||||
;; this call allocate parameter buffer for kernel launch
|
||||
declare i64 @cudaGetParameterBuffer(i64, i64) nounwind
|
||||
define i8* @ISPCGetParamBuffer(i8**, i64 %align, i64 %size) nounwind alwaysinline
|
||||
define i8* @ISPCAlloc(i8**, i64 %size, i32 %align32) nounwind alwaysinline
|
||||
{
|
||||
entry:
|
||||
%call = tail call i32 @__tid_x()
|
||||
@@ -121,6 +115,7 @@ entry:
|
||||
%sub = add nsw i32 %call1, -1
|
||||
%and = and i32 %sub, %call
|
||||
%cmp = icmp eq i32 %and, 0
|
||||
%align = zext i32 %align32 to i64
|
||||
br i1 %cmp, label %if.then, label %if.end
|
||||
|
||||
if.then:
|
||||
@@ -224,7 +219,7 @@ define void @ISPCSync(i8*) nounwind alwaysinline
|
||||
|
||||
|
||||
|
||||
include(`util_ptx.m4')
|
||||
include(`util-nvptx.m4')
|
||||
|
||||
stdlib_core()
|
||||
packed_load_and_store()
|
||||
85
ctx.cpp
85
ctx.cpp
@@ -1410,7 +1410,7 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
||||
if (!g->target->isPTX()) //g->target->getISA() != Target::NVPTX64)
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
llvm::SmallVector<llvm::Constant*, 16> array;
|
||||
for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
|
||||
@@ -3540,7 +3540,7 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||
std::vector<llvm::Value *> &argVals,
|
||||
llvm::Value *launchCount[3]){
|
||||
|
||||
if (!g->target->isPTX())
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
if (callee == NULL) {
|
||||
AssertPos(currentPos, m->errorCount > 0);
|
||||
@@ -3608,7 +3608,79 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||
args.push_back(launchCount[2]);
|
||||
return CallInst(flaunch, NULL, args, "");
|
||||
}
|
||||
else /* isPTX == true */
|
||||
else /* NVPTX */
|
||||
{
|
||||
if (callee == NULL) {
|
||||
AssertPos(currentPos, m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
launchedTasks = true;
|
||||
|
||||
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
|
||||
std::vector<llvm::Type*> argTypes;
|
||||
for (unsigned int i = 0; i < argVals.size(); i++)
|
||||
argTypes.push_back(argVals[i]->getType());
|
||||
llvm::Type *st = llvm::StructType::get(*g->ctx, argTypes);
|
||||
llvm::StructType *argStructType = static_cast<llvm::StructType *>(st);
|
||||
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
|
||||
if (structSize->getType() != LLVMTypes::Int64Type)
|
||||
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
|
||||
"struct_size_to_64");
|
||||
|
||||
const int align = 8;
|
||||
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
|
||||
AssertPos(currentPos, falloc != NULL);
|
||||
std::vector<llvm::Value *> allocArgs;
|
||||
allocArgs.push_back(launchGroupHandlePtr);
|
||||
allocArgs.push_back(structSize);
|
||||
allocArgs.push_back(LLVMInt32(align));
|
||||
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
|
||||
llvm::Value *voidi64 = PtrToIntInst(voidmem, "args_i64");
|
||||
llvm::BasicBlock* if_true = CreateBasicBlock("if_true");
|
||||
llvm::BasicBlock* if_false = CreateBasicBlock("if_false");
|
||||
|
||||
/* check if the pointer returned by ISPCAlloc is not NULL
|
||||
* --------------
|
||||
* this is a workaround for not checking the value of programIndex
|
||||
* because ISPCAlloc will return NULL pointer for all programIndex > 0
|
||||
* of course, if ISPAlloc fails to get parameter buffer, the pointer for programIndex = 0
|
||||
* will also be NULL
|
||||
* This check must be added, and also rewrite the code to make it less opaque
|
||||
*/
|
||||
llvm::Value* cmp1 = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, voidi64, LLVMInt64(0), "cmp1");
|
||||
BranchInst(if_true, if_false, cmp1);
|
||||
|
||||
/**********************/
|
||||
bblock = if_true;
|
||||
|
||||
// label_if_then block:
|
||||
llvm::Type *pt = llvm::PointerType::getUnqual(st);
|
||||
llvm::Value *argmem = BitCastInst(voidmem, pt);
|
||||
for (unsigned int i = 0; i < argVals.size(); ++i)
|
||||
{
|
||||
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
|
||||
// don't need to do masked store here, I think
|
||||
StoreInst(argVals[i], ptr);
|
||||
}
|
||||
BranchInst(if_false);
|
||||
|
||||
/**********************/
|
||||
bblock = if_false;
|
||||
|
||||
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
|
||||
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
|
||||
AssertPos(currentPos, flaunch != NULL);
|
||||
std::vector<llvm::Value *> args;
|
||||
args.push_back(launchGroupHandlePtr);
|
||||
args.push_back(fptr);
|
||||
args.push_back(voidmem);
|
||||
args.push_back(launchCount[0]);
|
||||
args.push_back(launchCount[1]);
|
||||
args.push_back(launchCount[2]);
|
||||
llvm::Value *ret = CallInst(flaunch, NULL, args, "");
|
||||
return ret;
|
||||
}
|
||||
#if 0
|
||||
{
|
||||
if (callee == NULL) {
|
||||
AssertPos(currentPos, m->errorCount > 0);
|
||||
@@ -3684,13 +3756,16 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||
args.push_back(launchCount[2]);
|
||||
return CallInst(flaunch, NULL, args, "");
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::SyncInst() {
|
||||
if (!g->target->isPTX())
|
||||
#if 0
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
#endif
|
||||
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
||||
llvm::Value *nullPtrValue =
|
||||
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
|
||||
@@ -3714,6 +3789,7 @@ FunctionEmitContext::SyncInst() {
|
||||
BranchInst(bPostSync);
|
||||
|
||||
SetCurrentBasicBlock(bPostSync);
|
||||
#if 0
|
||||
}
|
||||
else
|
||||
{
|
||||
@@ -3726,6 +3802,7 @@ FunctionEmitContext::SyncInst() {
|
||||
CallInst(fsync, NULL, launchGroupHandle, "");
|
||||
StoreInst(nullPtrValue, launchGroupHandlePtr);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
|
||||
3
decl.cpp
3
decl.cpp
@@ -531,7 +531,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
||||
returnType = returnType->ResolveUnboundVariability(Variability::Varying);
|
||||
|
||||
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
|
||||
if (isTask && g->target->isPTX()) //getISA() == Target::NVPTX64)
|
||||
if (isTask && g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
// ds->storageClass = SC_EXTERN_C;
|
||||
ds->typeQualifiers |= TYPEQUAL_UNMASKED;
|
||||
@@ -547,7 +547,6 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
||||
"qualifiers");
|
||||
return;
|
||||
}
|
||||
// if (!g->target->isPTX())
|
||||
if (isExternC && isTask) {
|
||||
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
|
||||
"qualifiers");
|
||||
|
||||
@@ -22,7 +22,7 @@ endif
|
||||
|
||||
#
|
||||
ISPC=ispc
|
||||
ISPC_FLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
||||
ISPC_FLAGS=-O3 --math-lib=default --target=nvptx --opt=fast-math
|
||||
#
|
||||
#
|
||||
#
|
||||
|
||||
141
func.cpp
141
func.cpp
@@ -125,7 +125,7 @@ Function::Function(Symbol *s, Stmt *c) {
|
||||
sym->parentFunction = this;
|
||||
}
|
||||
|
||||
if (type->isTask) {
|
||||
if (type->isTask && g->target->getISA() != Target::NVPTX) {
|
||||
threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
|
||||
Assert(threadIndexSym);
|
||||
threadCountSym = m->symbolTable->LookupVariable("threadCount");
|
||||
@@ -237,12 +237,122 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
#endif
|
||||
const FunctionType *type = CastType<FunctionType>(sym->type);
|
||||
Assert(type != NULL);
|
||||
if (type->isTask == true && g->target->getISA() != Target::NVPTX) {
|
||||
// For tasks, we there should always be three parmeters: the
|
||||
// pointer to the structure that holds all of the arguments, the
|
||||
// thread index, and the thread count variables.
|
||||
llvm::Function::arg_iterator argIter = function->arg_begin();
|
||||
llvm::Value *structParamPtr = argIter++;
|
||||
llvm::Value *threadIndex = argIter++;
|
||||
llvm::Value *threadCount = argIter++;
|
||||
llvm::Value *taskIndex = argIter++;
|
||||
llvm::Value *taskCount = argIter++;
|
||||
llvm::Value *taskIndex0 = argIter++;
|
||||
llvm::Value *taskIndex1 = argIter++;
|
||||
llvm::Value *taskIndex2 = argIter++;
|
||||
llvm::Value *taskCount0 = argIter++;
|
||||
llvm::Value *taskCount1 = argIter++;
|
||||
llvm::Value *taskCount2 = argIter++;
|
||||
|
||||
// Copy the function parameter values from the structure into local
|
||||
// storage
|
||||
for (unsigned int i = 0; i < args.size(); ++i)
|
||||
lCopyInTaskParameter(i, structParamPtr, args, ctx);
|
||||
|
||||
if (type->isUnmasked == false) {
|
||||
// Copy in the mask as well.
|
||||
int nArgs = (int)args.size();
|
||||
// The mask is the last parameter in the argument structure
|
||||
llvm::Value *ptr = ctx->AddElementOffset(structParamPtr, nArgs, NULL,
|
||||
"task_struct_mask");
|
||||
llvm::Value *ptrval = ctx->LoadInst(ptr, "mask");
|
||||
ctx->SetFunctionMask(ptrval);
|
||||
}
|
||||
|
||||
// Copy threadIndex and threadCount into stack-allocated storage so
|
||||
// that their symbols point to something reasonable.
|
||||
threadIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadIndex");
|
||||
ctx->StoreInst(threadIndex, threadIndexSym->storagePtr);
|
||||
|
||||
threadCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadCount");
|
||||
ctx->StoreInst(threadCount, threadCountSym->storagePtr);
|
||||
|
||||
// Copy taskIndex and taskCount into stack-allocated storage so
|
||||
// that their symbols point to something reasonable.
|
||||
taskIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex");
|
||||
ctx->StoreInst(taskIndex, taskIndexSym->storagePtr);
|
||||
|
||||
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
|
||||
ctx->StoreInst(taskCount, taskCountSym->storagePtr);
|
||||
|
||||
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
|
||||
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
|
||||
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
|
||||
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
|
||||
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
|
||||
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
|
||||
|
||||
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
|
||||
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
|
||||
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
|
||||
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
|
||||
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
|
||||
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
|
||||
}
|
||||
else {
|
||||
// Regular, non-task function
|
||||
llvm::Function::arg_iterator argIter = function->arg_begin();
|
||||
for (unsigned int i = 0; i < args.size(); ++i, ++argIter) {
|
||||
Symbol *sym = args[i];
|
||||
if (sym == NULL)
|
||||
// anonymous function parameter
|
||||
continue;
|
||||
|
||||
argIter->setName(sym->name.c_str());
|
||||
|
||||
// Allocate stack storage for the parameter and emit code
|
||||
// to store the its value there.
|
||||
sym->storagePtr = ctx->AllocaInst(argIter->getType(), sym->name.c_str());
|
||||
ctx->StoreInst(argIter, sym->storagePtr);
|
||||
ctx->EmitFunctionParameterDebugInfo(sym, i);
|
||||
}
|
||||
|
||||
// If the number of actual function arguments is equal to the
|
||||
// number of declared arguments in decl->functionParams, then we
|
||||
// don't have a mask parameter, so set it to be all on. This
|
||||
// happens for exmaple with 'export'ed functions that the app
|
||||
// calls.
|
||||
if (argIter == function->arg_end()) {
|
||||
Assert(type->isUnmasked || type->isExported);
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
}
|
||||
else {
|
||||
Assert(type->isUnmasked == false);
|
||||
|
||||
// Otherwise use the mask to set the entry mask value
|
||||
argIter->setName("__mask");
|
||||
Assert(argIter->getType() == LLVMTypes::MaskType);
|
||||
ctx->SetFunctionMask(argIter);
|
||||
Assert(++argIter == function->arg_end());
|
||||
}
|
||||
if (type->isTask == true && g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
llvm::NamedMDNode* annotations =
|
||||
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
||||
llvm::SmallVector<llvm::Value*, 3> av;
|
||||
av.push_back(function);
|
||||
av.push_back(llvm::MDString::get(*g->ctx, "kernel"));
|
||||
av.push_back(LLVMInt32(1));
|
||||
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
||||
}
|
||||
}
|
||||
#if 0
|
||||
if (type->isTask == true) {
|
||||
// For tasks, we there should always be three parmeters: the
|
||||
// pointer to the structure that holds all of the arguments, the
|
||||
// thread index, and the thread count variables.
|
||||
|
||||
if (!g->target->isPTX()) //if (g->target->getISA() != Target::NVPTX64)
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
llvm::Function::arg_iterator argIter = function->arg_begin();
|
||||
llvm::Value *structParamPtr = argIter++;
|
||||
@@ -341,7 +451,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
Assert(type->isUnmasked || type->isExported);
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
}
|
||||
else /* for NVPTX64 , function must be unmasked */
|
||||
else /* for NVPTX, function must be unmasked */
|
||||
{
|
||||
assert(0);
|
||||
Assert(type->isUnmasked == false);
|
||||
@@ -353,7 +463,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
Assert(++argIter == function->arg_end());
|
||||
}
|
||||
|
||||
if (g->target->isPTX() && g->target->getISA() == Target::NVPTX64)
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
llvm::NamedMDNode* annotations =
|
||||
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
||||
@@ -402,6 +512,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
Assert(++argIter == function->arg_end());
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
// Finally, we can generate code for the function
|
||||
if (code != NULL) {
|
||||
@@ -535,26 +646,12 @@ Function::GenerateIR() {
|
||||
}
|
||||
|
||||
// And we can now go ahead and emit the code
|
||||
/* export function with NVPTX64 target should be emitted host architecture */
|
||||
#if 0
|
||||
const FunctionType *func_type= CastType<FunctionType>(sym->type);
|
||||
if (g->target->getISA() == Target::NVPTX64 && func_type->isExported)
|
||||
return;
|
||||
#endif
|
||||
|
||||
#if 0
|
||||
if (g->target->getISA() != Target::NVPTX64 && g->target->isPTX() && func_type->isTask)
|
||||
return;
|
||||
#endif
|
||||
|
||||
// if (!(g->target->getISA()==Target::NVPTX64 && func_type->isExported))
|
||||
{
|
||||
FunctionEmitContext ec(this, sym, function, firstStmtPos);
|
||||
emitCode(&ec, function, firstStmtPos);
|
||||
}
|
||||
|
||||
if (m->errorCount == 0) {
|
||||
// if (!(g->target->getISA() == Target::NVPTX64 && func_type->isExported))
|
||||
if (llvm::verifyFunction(*function, llvm::ReturnStatusAction) == true) {
|
||||
if (g->debugPrint)
|
||||
function->dump();
|
||||
@@ -566,9 +663,9 @@ Function::GenerateIR() {
|
||||
// the application can call it
|
||||
const FunctionType *type = CastType<FunctionType>(sym->type);
|
||||
Assert(type != NULL);
|
||||
if (type->isExported) { // && g->target->getISA() != Target::VPTX64) {
|
||||
if (type->isExported) {
|
||||
if (!type->isTask) {
|
||||
if (g->target->isPTX() && g->target->getISA() == Target::NVPTX64)
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
llvm::NamedMDNode* annotations =
|
||||
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
||||
@@ -585,7 +682,7 @@ Function::GenerateIR() {
|
||||
if (g->mangleFunctionsWithTarget)
|
||||
functionName += std::string("_") + g->target->GetISAString();
|
||||
|
||||
if (g->target->getISA() == Target::NVPTX64)
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
functionName += std::string("___export");
|
||||
llvm::Function *appFunction =
|
||||
llvm::Function::Create(ftype, linkage, functionName.c_str(), m->module);
|
||||
@@ -615,7 +712,7 @@ Function::GenerateIR() {
|
||||
FATAL("Function verificication failed");
|
||||
}
|
||||
}
|
||||
if (g->target->isPTX() && g->target->getISA() == Target::NVPTX64)
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
llvm::NamedMDNode* annotations =
|
||||
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
||||
|
||||
32
ispc.cpp
32
ispc.cpp
@@ -174,7 +174,7 @@ static const char *supportedCPUs[] = {
|
||||
#endif // LLVM 3.4+
|
||||
};
|
||||
|
||||
Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, bool isPTX) :
|
||||
Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
m_target(NULL),
|
||||
m_targetMachine(NULL),
|
||||
#if defined(LLVM_3_1)
|
||||
@@ -184,7 +184,6 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
||||
#endif
|
||||
m_valid(false),
|
||||
m_isa(SSE2),
|
||||
m_isPTX(isPTX),
|
||||
m_arch(""),
|
||||
m_is32Bit(true),
|
||||
m_cpu(""),
|
||||
@@ -212,7 +211,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
||||
if (!strcmp(cpu, "core-avx2"))
|
||||
isa = "avx2-i32x8";
|
||||
else if (!strcmp(cpu, "sm_35"))
|
||||
isa = "nvptx64";
|
||||
isa = "nvptx";
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
else if (!strcmp(cpu, "cortex-a9") ||
|
||||
!strcmp(cpu, "cortex-a15"))
|
||||
@@ -249,7 +248,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
||||
cpu = "cortex-a9";
|
||||
#endif
|
||||
|
||||
if (cpu == NULL && !strcmp(isa, "nvptx64"))
|
||||
if (cpu == NULL && !strcmp(isa, "nvptx"))
|
||||
cpu = "sm_35";
|
||||
|
||||
if (cpu == NULL) {
|
||||
@@ -280,8 +279,8 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
||||
this->m_cpu = cpu;
|
||||
|
||||
if (arch == NULL) {
|
||||
if (!strcmp(isa, "nvptx64"))
|
||||
arch = "nvptx64";
|
||||
if (!strcmp(isa, "nvptx"))
|
||||
arch = "nvptx";
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
else if (!strncmp(isa, "neon", 4))
|
||||
arch = "arm";
|
||||
@@ -709,10 +708,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
||||
this->m_maskBitCount = 32;
|
||||
}
|
||||
#endif
|
||||
else if (!strcasecmp(isa, "nvptx64"))
|
||||
else if (!strcasecmp(isa, "nvptx"))
|
||||
{
|
||||
this->m_isa = Target::NVPTX64;
|
||||
this->m_isPTX = true;
|
||||
this->m_isa = Target::NVPTX;
|
||||
this->m_nativeVectorWidth = 32;
|
||||
this->m_nativeVectorAlignment = 32;
|
||||
this->m_vectorWidth = 1;
|
||||
@@ -780,7 +778,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
||||
dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
|
||||
"i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-"
|
||||
"f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128";
|
||||
} else if (m_isa == Target::NVPTX64)
|
||||
} else if (m_isa == Target::NVPTX)
|
||||
{
|
||||
dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
|
||||
}
|
||||
@@ -803,7 +801,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
||||
// Initialize target-specific "target-feature" attribute.
|
||||
if (!m_attributes.empty()) {
|
||||
llvm::AttrBuilder attrBuilder;
|
||||
if (m_isa != Target::NVPTX64)
|
||||
if (m_isa != Target::NVPTX)
|
||||
attrBuilder.addAttribute("target-cpu", this->m_cpu);
|
||||
attrBuilder.addAttribute("target-features", this->m_attributes);
|
||||
this->m_tf_attributes = new llvm::AttributeSet(
|
||||
@@ -838,7 +836,7 @@ Target::SupportedCPUs() {
|
||||
|
||||
const char *
|
||||
Target::SupportedArchs() {
|
||||
return "nvptx64, "
|
||||
return "nvptx, "
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
"arm, "
|
||||
#endif
|
||||
@@ -848,7 +846,7 @@ Target::SupportedArchs() {
|
||||
|
||||
const char *
|
||||
Target::SupportedTargets() {
|
||||
return "nvptx64, "
|
||||
return "nvptx, "
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
"neon-i8x16, neon-i16x8, neon-i32x4, "
|
||||
#endif
|
||||
@@ -866,9 +864,9 @@ Target::SupportedTargets() {
|
||||
std::string
|
||||
Target::GetTripleString() const {
|
||||
llvm::Triple triple;
|
||||
if (m_arch == "nvptx64")
|
||||
if (m_arch == "nvptx")
|
||||
{
|
||||
triple.setTriple("nvptx64");
|
||||
triple.setTriple("nvptx");
|
||||
}
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
else if (m_arch == "arm") {
|
||||
@@ -902,8 +900,8 @@ Target::GetTripleString() const {
|
||||
const char *
|
||||
Target::ISAToString(ISA isa) {
|
||||
switch (isa) {
|
||||
case Target::NVPTX64:
|
||||
return "nvptx64";
|
||||
case Target::NVPTX:
|
||||
return "nvptx";
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
case Target::NEON8:
|
||||
return "neon-8";
|
||||
|
||||
6
ispc.h
6
ispc.h
@@ -179,7 +179,7 @@ public:
|
||||
flexible/performant of them will apear last in the enumerant. Note
|
||||
also that __best_available_isa() needs to be updated if ISAs are
|
||||
added or the enumerant values are reordered. */
|
||||
enum ISA { NVPTX64,
|
||||
enum ISA { NVPTX,
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
NEON32, NEON16, NEON8,
|
||||
#endif
|
||||
@@ -189,7 +189,7 @@ public:
|
||||
/** Initializes the given Target pointer for a target of the given
|
||||
name, if the name is a known target. Returns true if the
|
||||
target was initialized and false if the name is unknown. */
|
||||
Target(const char *arch, const char *cpu, const char *isa, bool pic, bool isPTX = false);
|
||||
Target(const char *arch, const char *cpu, const char *isa, bool pic);
|
||||
|
||||
/** Returns a comma-delimited string giving the names of the currently
|
||||
supported compilation targets. */
|
||||
@@ -251,7 +251,6 @@ public:
|
||||
bool isValid() const {return m_valid;}
|
||||
|
||||
ISA getISA() const {return m_isa;}
|
||||
bool isPTX() const {return m_isPTX;}
|
||||
|
||||
std::string getArch() const {return m_arch;}
|
||||
|
||||
@@ -310,7 +309,6 @@ private:
|
||||
|
||||
/** Instruction set being compiled to. */
|
||||
ISA m_isa;
|
||||
bool m_isPTX;
|
||||
|
||||
/** Target system architecture. (e.g. "x86-64", "x86"). */
|
||||
std::string m_arch;
|
||||
|
||||
144
module.cpp
144
module.cpp
@@ -733,7 +733,7 @@ Module::AddFunctionDeclaration(const std::string &name,
|
||||
if (storageClass == SC_EXTERN_C) {
|
||||
// Make sure the user hasn't supplied both an 'extern "C"' and a
|
||||
// 'task' qualifier with the function
|
||||
if (functionType->isTask) //&& !g->target->isPTX()) //tISA() != Target::NVPTX64)
|
||||
if (functionType->isTask)
|
||||
{
|
||||
Error(pos, "\"task\" qualifier is illegal with C-linkage extern "
|
||||
"function \"%s\". Ignoring this function.", name.c_str());
|
||||
@@ -796,8 +796,8 @@ Module::AddFunctionDeclaration(const std::string &name,
|
||||
#else // LLVM 3.1 and 3.3+
|
||||
function->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
#endif
|
||||
/* evghenii: on PTX target this must not be used, cause crash, dunno why */
|
||||
if (functionType->isTask && g->target->getISA() != Target::NVPTX64)
|
||||
/* evghenii: on PTX target the following must not be set ... why ?!? */
|
||||
if (functionType->isTask && g->target->getISA() != Target::NVPTX)
|
||||
// This also applies transitively to members I think?
|
||||
#if defined(LLVM_3_1)
|
||||
function->setDoesNotAlias(1, true);
|
||||
@@ -953,7 +953,7 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
|
||||
const char *fileType = NULL;
|
||||
switch (outputType) {
|
||||
case Asm:
|
||||
if (g->target->getISA() != Target::NVPTX64)
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
if (strcasecmp(suffix, "s"))
|
||||
fileType = "assembly";
|
||||
@@ -1053,7 +1053,7 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
|
||||
}
|
||||
|
||||
llvm::raw_fd_ostream fos(fd, (fd != 1), false);
|
||||
if (g->target->getISA() == Target::NVPTX64)
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
const std::string dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
|
||||
module->setDataLayout(dl_string);
|
||||
@@ -1925,7 +1925,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
|
||||
opts.addMacroDef(g->cppArgs[i].substr(2));
|
||||
}
|
||||
}
|
||||
if (g->target->getISA() == Target::NVPTX64)
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
opts.addMacroDef("__NVPTX__");
|
||||
opts.addMacroDef("programIndex=laneIndex()");
|
||||
@@ -2331,135 +2331,9 @@ Module::CompileAndOutput(const char *srcFile,
|
||||
const char *hostStubFileName,
|
||||
const char *devStubFileName)
|
||||
{
|
||||
char ptxname[] = "nvptx64";
|
||||
for (int k = 0; k < 7; k++)
|
||||
ptxname[k] = target[k];
|
||||
if (0) //target != NULL && strcmp(ptxname,"nvptx64") == 0) // NVPTX64
|
||||
{
|
||||
std::vector<std::string> targets = lExtractTargets(target);
|
||||
Assert(targets.size() > 1);
|
||||
if (target == NULL || strchr(target, ',') == NULL) {
|
||||
// We're only compiling to a single target
|
||||
int errorCount = 0;
|
||||
|
||||
const char *suffix_orig = strrchr(outFileName, '.');
|
||||
++suffix_orig;
|
||||
assert(suffix_orig!=NULL);
|
||||
|
||||
g->PtxString = std::string();
|
||||
|
||||
for (int itarget = 0; itarget < 1; itarget++)
|
||||
{
|
||||
fprintf(stderr, "compiling nvptx64 : target= %s\n",targets[itarget].c_str());
|
||||
g->target = new Target(arch, cpu, targets[itarget].c_str(), generatePIC, /* isPTX= */ true);
|
||||
if (!g->target->isValid())
|
||||
return 1;
|
||||
|
||||
m = new Module(srcFile);
|
||||
if (m->CompileFile() == 0) {
|
||||
if (outputType == CXX) {
|
||||
if (target == NULL || strncmp(target, "generic-", 8) != 0) {
|
||||
Error(SourcePos(), "When generating C++ output, one of the \"generic-*\" "
|
||||
"targets must be used.");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else if (outputType == Asm || outputType == Object) {
|
||||
if (target != NULL && strncmp(target, "generic-", 8) == 0) {
|
||||
Error(SourcePos(), "When using a \"generic-*\" compilation target, "
|
||||
"%s output can not be used.",
|
||||
(outputType == Asm) ? "assembly" : "object file");
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
|
||||
assert(outFileName != NULL);
|
||||
|
||||
std::string targetOutFileName =
|
||||
lGetTargetFileName(outFileName, targets[itarget].c_str());
|
||||
if (outputType == Asm)
|
||||
{
|
||||
const char * targetOutFileName_c = targetOutFileName.c_str();
|
||||
const int suffix = strrchr(targetOutFileName_c, '.') - targetOutFileName_c + 1;
|
||||
if (itarget == 1 && !strcasecmp(suffix_orig, "ptx"))
|
||||
{
|
||||
targetOutFileName[suffix ] = 's';
|
||||
targetOutFileName[suffix+1] = 0;
|
||||
}
|
||||
}
|
||||
|
||||
if (outputType != Object)
|
||||
{
|
||||
if (!m->writeOutput(outputType, targetOutFileName.c_str(), includeFileName))
|
||||
return 1;
|
||||
}
|
||||
else if (itarget > 0)
|
||||
{
|
||||
if (!m->writeOutput(outputType, outFileName, includeFileName))
|
||||
return 1;
|
||||
}
|
||||
|
||||
if (itarget == 0)
|
||||
{ /* store ptx into memory */
|
||||
llvm::PassManager pm;
|
||||
#if defined(LLVM_3_1)
|
||||
pm.add(new llvm::TargetData(*g->target->getDataLayout()));
|
||||
#else
|
||||
pm.add(new llvm::DataLayout(*g->target->getDataLayout()));
|
||||
#endif
|
||||
|
||||
llvm::raw_string_ostream rso(g->PtxString);
|
||||
llvm::formatted_raw_ostream fos(rso);
|
||||
|
||||
llvm::TargetMachine::CodeGenFileType fileType = llvm::TargetMachine::CGFT_AssemblyFile;
|
||||
llvm::TargetMachine *targetMachine = g->target->GetTargetMachine();
|
||||
if (targetMachine->addPassesToEmitFile(pm, fos, fileType)) {
|
||||
fprintf(stderr, "Fatal error adding passes to emit object file!");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
llvm::Module *module = m->module;
|
||||
pm.run(*module);
|
||||
fos.flush();
|
||||
assert(!g->PtxString.empty());
|
||||
#if 0
|
||||
std::cout << g->PtxString << std::endl;
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
if (itarget > 0)
|
||||
{
|
||||
if (headerFileName != NULL)
|
||||
if (!m->writeOutput(Module::Header, headerFileName))
|
||||
return 1;
|
||||
if (depsFileName != NULL)
|
||||
if (!m->writeOutput(Module::Deps,depsFileName))
|
||||
return 1;
|
||||
if (hostStubFileName != NULL)
|
||||
if (!m->writeOutput(Module::HostStub,hostStubFileName))
|
||||
return 1;
|
||||
if (devStubFileName != NULL)
|
||||
if (!m->writeOutput(Module::DevStub,devStubFileName))
|
||||
return 1;
|
||||
}
|
||||
}
|
||||
else
|
||||
++m->errorCount;
|
||||
|
||||
errorCount += m->errorCount;
|
||||
delete m;
|
||||
m = NULL;
|
||||
|
||||
delete g->target;
|
||||
g->target = NULL;
|
||||
|
||||
}
|
||||
return errorCount > 0;
|
||||
}
|
||||
else if (target == NULL || strchr(target, ',') == NULL) {
|
||||
// We're only compiling to a single target
|
||||
const bool isPTX = strcmp(target, "nvptx64") == 0;
|
||||
g->target = new Target(arch, cpu, target, generatePIC, isPTX);
|
||||
g->target = new Target(arch, cpu, target, generatePIC);
|
||||
if (!g->target->isValid())
|
||||
return 1;
|
||||
|
||||
@@ -2525,8 +2399,6 @@ Module::CompileAndOutput(const char *srcFile,
|
||||
// The user supplied multiple targets
|
||||
std::vector<std::string> targets = lExtractTargets(target);
|
||||
Assert(targets.size() > 1);
|
||||
for (unsigned int i = 0; i < targets.size(); ++i)
|
||||
assert(strcmp(targets[i].c_str(), "nvptx64") < 0);
|
||||
|
||||
if (outFileName != NULL && strcmp(outFileName, "-") == 0) {
|
||||
Error(SourcePos(), "Multi-target compilation can't generate output "
|
||||
|
||||
473
stmt.cpp
473
stmt.cpp
@@ -206,7 +206,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
}
|
||||
|
||||
if (sym->storageClass == SC_STATIC) {
|
||||
if (g->target->getISA() == Target::NVPTX64)
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
if (!sym->type->IsConstType())
|
||||
Error(initExpr->pos, "Non-constant static variable ""\"%s\" is not supported with ""\"cuda\" target.",
|
||||
sym->name.c_str());
|
||||
@@ -1280,7 +1280,7 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
|
||||
llvm::Value *varyingCounterPtr,
|
||||
const std::vector<int> &spans)
|
||||
{
|
||||
if (!g->target->isPTX())
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
// Smear the uniform counter value out to be varying
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
||||
@@ -1315,7 +1315,7 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
|
||||
ctx->StoreInst(varyingCounter, varyingCounterPtr);
|
||||
return varyingCounter;
|
||||
}
|
||||
else /* isPTX() == true */
|
||||
else /* NVPTX == true */
|
||||
{
|
||||
// Smear the uniform counter value out to be varying
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
||||
@@ -1465,8 +1465,6 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
if (ctx->GetCurrentBasicBlock() == NULL || stmts == NULL)
|
||||
return;
|
||||
|
||||
if (!g->target->isPTX())
|
||||
{
|
||||
llvm::BasicBlock *bbFullBody = ctx->CreateBasicBlock("foreach_full_body");
|
||||
llvm::BasicBlock *bbMaskedBody = ctx->CreateBasicBlock("foreach_masked_body");
|
||||
llvm::BasicBlock *bbExit = ctx->CreateBasicBlock("foreach_exit");
|
||||
@@ -1493,469 +1491,9 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
|
||||
|
||||
std::vector<int> span(nDims, 0);
|
||||
lGetSpans(nDims-1, nDims, g->target->getVectorWidth(), isTiled, &span[0]);
|
||||
|
||||
for (int i = 0; i < nDims; ++i) {
|
||||
// Basic blocks that we'll fill in later with the looping logic for
|
||||
// this dimension.
|
||||
bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
|
||||
if (i < nDims-1)
|
||||
// stepping for the innermost dimension is handled specially
|
||||
bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
|
||||
bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
|
||||
|
||||
// Start and end value for this loop dimension
|
||||
llvm::Value *sv = startExprs[i]->GetValue(ctx);
|
||||
llvm::Value *ev = endExprs[i]->GetValue(ctx);
|
||||
if (sv == NULL || ev == NULL)
|
||||
return;
|
||||
startVals.push_back(sv);
|
||||
endVals.push_back(ev);
|
||||
|
||||
// nItems = endVal - startVal
|
||||
llvm::Value *nItems =
|
||||
ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
|
||||
|
||||
// nExtras = nItems % (span for this dimension)
|
||||
// This gives us the number of extra elements we need to deal with
|
||||
// at the end of the loop for this dimension that don't fit cleanly
|
||||
// into a vector width.
|
||||
nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems,
|
||||
LLVMInt32(span[i]), "nextras"));
|
||||
|
||||
// alignedEnd = endVal - nExtras
|
||||
alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
|
||||
nExtras[i], "aligned_end"));
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// Each dimension has a loop counter that is a uniform value that
|
||||
// goes from startVal to endVal, in steps of the span for this
|
||||
// dimension. Its value is only used internally here for looping
|
||||
// logic and isn't directly available in the user's program code.
|
||||
uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
|
||||
"counter"));
|
||||
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
||||
|
||||
// There is also a varying variable that holds the set of index
|
||||
// values for each dimension in the current loop iteration; this is
|
||||
// the value that is program-visible.
|
||||
dimVariables[i]->storagePtr =
|
||||
ctx->AllocaInst(LLVMTypes::Int32VectorType,
|
||||
dimVariables[i]->name.c_str());
|
||||
dimVariables[i]->parentFunction = ctx->GetFunction();
|
||||
ctx->EmitVariableDebugInfo(dimVariables[i]);
|
||||
|
||||
// Each dimension also maintains a mask that represents which of
|
||||
// the varying elements in the current iteration should be
|
||||
// processed. (i.e. this is used to disable the lanes that have
|
||||
// out-of-bounds offsets.)
|
||||
extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
|
||||
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
||||
}
|
||||
|
||||
ctx->StartForeach(FunctionEmitContext::FOREACH_REGULAR);
|
||||
|
||||
// On to the outermost loop's test
|
||||
ctx->BranchInst(bbTest[0]);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// foreach_reset: this code runs when we need to reset the counter for
|
||||
// a given dimension in preparation for running through its loop again,
|
||||
// after the enclosing level advances its counter.
|
||||
for (int i = 0; i < nDims; ++i) {
|
||||
ctx->SetCurrentBasicBlock(bbReset[i]);
|
||||
if (i == 0)
|
||||
ctx->BranchInst(bbExit);
|
||||
else {
|
||||
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
||||
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
||||
ctx->BranchInst(bbStep[i-1]);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// foreach_step: increment the uniform counter by the vector width.
|
||||
// Note that we don't increment the varying counter here as well but
|
||||
// just generate its value when we need it in the loop body. Don't do
|
||||
// this for the innermost dimension, which has a more complex stepping
|
||||
// structure..
|
||||
for (int i = 0; i < nDims-1; ++i) {
|
||||
ctx->SetCurrentBasicBlock(bbStep[i]);
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
|
||||
llvm::Value *newCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||
LLVMInt32(span[i]), "new_counter");
|
||||
ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
|
||||
ctx->BranchInst(bbTest[i]);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// foreach_test (for all dimensions other than the innermost...)
|
||||
std::vector<llvm::Value *> inExtras;
|
||||
for (int i = 0; i < nDims-1; ++i) {
|
||||
ctx->SetCurrentBasicBlock(bbTest[i]);
|
||||
|
||||
llvm::Value *haveExtras =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
|
||||
endVals[i], alignedEnd[i], "have_extras");
|
||||
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
|
||||
llvm::Value *atAlignedEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
||||
counter, alignedEnd[i], "at_aligned_end");
|
||||
llvm::Value *inEx =
|
||||
ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
|
||||
atAlignedEnd, "in_extras");
|
||||
|
||||
if (i == 0)
|
||||
inExtras.push_back(inEx);
|
||||
else
|
||||
inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
|
||||
inExtras[i-1], "in_extras_all"));
|
||||
|
||||
llvm::Value *varyingCounter =
|
||||
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
|
||||
dimVariables[i]->storagePtr, span);
|
||||
|
||||
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||
endVals[i], LLVMTypes::Int32VectorType, "smear_end");
|
||||
|
||||
// Do a vector compare of its value to the end value to generate a
|
||||
// mask for this last bit of work.
|
||||
llvm::Value *emask =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
varyingCounter, smearEnd);
|
||||
emask = ctx->I1VecToBoolVec(emask);
|
||||
|
||||
if (i == 0)
|
||||
ctx->StoreInst(emask, extrasMaskPtrs[i]);
|
||||
else {
|
||||
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
|
||||
llvm::Value *newMask =
|
||||
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
|
||||
"extras_mask");
|
||||
ctx->StoreInst(newMask, extrasMaskPtrs[i]);
|
||||
}
|
||||
|
||||
llvm::Value *notAtEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
counter, endVals[i]);
|
||||
ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// foreach_test (for innermost dimension)
|
||||
//
|
||||
// All of the outer dimensions are handled generically--basically as a
|
||||
// for() loop from the start value to the end value, where at each loop
|
||||
// test, we compute the mask of active elements for the current
|
||||
// dimension and then update an overall mask that is the AND
|
||||
// combination of all of the outer ones.
|
||||
//
|
||||
// The innermost loop is handled specially, for performance purposes.
|
||||
// When starting the innermost dimension, we start by checking once
|
||||
// whether any of the outer dimensions has set the mask to be
|
||||
// partially-active or not. We follow different code paths for these
|
||||
// two cases, taking advantage of the knowledge that the mask is all
|
||||
// on, when this is the case.
|
||||
//
|
||||
// In each of these code paths, we start with a loop from the starting
|
||||
// value to the aligned end value for the innermost dimension; we can
|
||||
// guarantee that the innermost loop will have an "all on" mask (as far
|
||||
// as its dimension is concerned) for the duration of this loop. Doing
|
||||
// so allows us to emit code that assumes the mask is all on (for the
|
||||
// case where none of the outer dimensions has set the mask to be
|
||||
// partially on), or allows us to emit code that just uses the mask
|
||||
// from the outer dimensions directly (for the case where they have).
|
||||
//
|
||||
// After this loop, we just need to deal with one vector's worth of
|
||||
// "ragged extra bits", where the mask used includes the effect of the
|
||||
// mask for the innermost dimension.
|
||||
//
|
||||
// We start out this process by emitting the check that determines
|
||||
// whether any of the enclosing dimensions is partially active
|
||||
// (i.e. processing extra elements that don't exactly fit into a
|
||||
// vector).
|
||||
llvm::BasicBlock *bbOuterInExtras =
|
||||
ctx->CreateBasicBlock("outer_in_extras");
|
||||
llvm::BasicBlock *bbOuterNotInExtras =
|
||||
ctx->CreateBasicBlock("outer_not_in_extras");
|
||||
|
||||
ctx->SetCurrentBasicBlock(bbTest[nDims-1]);
|
||||
if (inExtras.size())
|
||||
ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras,
|
||||
inExtras.back());
|
||||
else
|
||||
// for a 1D iteration domain, we certainly don't have any enclosing
|
||||
// dimensions that are processing extra elements.
|
||||
ctx->BranchInst(bbOuterNotInExtras);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// One or more outer dimensions in extras, so we need to mask for the loop
|
||||
// body regardless. We break this into two cases, roughly:
|
||||
// for (counter = start; counter < alignedEnd; counter += step) {
|
||||
// // mask is all on for inner, so set mask to outer mask
|
||||
// // run loop body with mask
|
||||
// }
|
||||
// // counter == alignedEnd
|
||||
// if (counter < end) {
|
||||
// // set mask to outermask & (counter+programCounter < end)
|
||||
// // run loop body with mask
|
||||
// }
|
||||
llvm::BasicBlock *bbAllInnerPartialOuter =
|
||||
ctx->CreateBasicBlock("all_inner_partial_outer");
|
||||
llvm::BasicBlock *bbPartial =
|
||||
ctx->CreateBasicBlock("both_partial");
|
||||
ctx->SetCurrentBasicBlock(bbOuterInExtras); {
|
||||
// Update the varying counter value here, since all subsequent
|
||||
// blocks along this path need it.
|
||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||
dimVariables[nDims-1]->storagePtr, span);
|
||||
|
||||
// here we just check to see if counter < alignedEnd
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
||||
llvm::Value *beforeAlignedEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
counter, alignedEnd[nDims-1], "before_aligned_end");
|
||||
ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
|
||||
}
|
||||
|
||||
// Below we have a basic block that runs the loop body code for the
|
||||
// case where the mask is partially but not fully on. This same block
|
||||
// runs in multiple cases: both for handling any ragged extra data for
|
||||
// the innermost dimension but also when outer dimensions have set the
|
||||
// mask to be partially on.
|
||||
//
|
||||
// The value stored in stepIndexAfterMaskedBodyPtr is used after each
|
||||
// execution of the body code to determine whether the innermost index
|
||||
// value should be incremented by the step (we're running the "for"
|
||||
// loop of full vectors at the innermost dimension, with outer
|
||||
// dimensions having set the mask to be partially on), or whether we're
|
||||
// running once for the ragged extra bits at the end of the innermost
|
||||
// dimension, in which case we're done with the innermost dimension and
|
||||
// should step the loop counter for the next enclosing dimension
|
||||
// instead.
|
||||
llvm::Value *stepIndexAfterMaskedBodyPtr =
|
||||
ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// We're in the inner loop part where the only masking is due to outer
|
||||
// dimensions but the innermost dimension fits fully into a vector's
|
||||
// width. Set the mask and jump to the masked loop body.
|
||||
ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); {
|
||||
llvm::Value *mask;
|
||||
if (nDims == 1)
|
||||
// 1D loop; we shouldn't ever get here anyway
|
||||
mask = LLVMMaskAllOff;
|
||||
else
|
||||
mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
|
||||
|
||||
ctx->SetInternalMask(mask);
|
||||
|
||||
ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
|
||||
ctx->BranchInst(bbMaskedBody);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// We need to include the effect of the innermost dimension in the mask
|
||||
// for the final bits here
|
||||
ctx->SetCurrentBasicBlock(bbPartial); {
|
||||
llvm::Value *varyingCounter =
|
||||
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
|
||||
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
||||
|
||||
llvm::Value *emask =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
varyingCounter, smearEnd);
|
||||
emask = ctx->I1VecToBoolVec(emask);
|
||||
|
||||
if (nDims == 1) {
|
||||
ctx->SetInternalMask(emask);
|
||||
}
|
||||
else {
|
||||
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
|
||||
llvm::Value *newMask =
|
||||
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
|
||||
"extras_mask");
|
||||
ctx->SetInternalMask(newMask);
|
||||
}
|
||||
|
||||
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
|
||||
ctx->BranchInst(bbMaskedBody);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// None of the outer dimensions is processing extras; along the lines
|
||||
// of above, we can express this as:
|
||||
// for (counter = start; counter < alignedEnd; counter += step) {
|
||||
// // mask is all on
|
||||
// // run loop body with mask all on
|
||||
// }
|
||||
// // counter == alignedEnd
|
||||
// if (counter < end) {
|
||||
// // set mask to (counter+programCounter < end)
|
||||
// // run loop body with mask
|
||||
// }
|
||||
llvm::BasicBlock *bbPartialInnerAllOuter =
|
||||
ctx->CreateBasicBlock("partial_inner_all_outer");
|
||||
ctx->SetCurrentBasicBlock(bbOuterNotInExtras); {
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
||||
llvm::Value *beforeAlignedEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
counter, alignedEnd[nDims-1], "before_aligned_end");
|
||||
ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter,
|
||||
beforeAlignedEnd);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// full_body: do a full vector's worth of work. We know that all
|
||||
// lanes will be running here, so we explicitly set the mask to be 'all
|
||||
// on'. This ends up being relatively straightforward: just update the
|
||||
// value of the varying loop counter and have the statements in the
|
||||
// loop body emit their code.
|
||||
llvm::BasicBlock *bbFullBodyContinue =
|
||||
ctx->CreateBasicBlock("foreach_full_continue");
|
||||
ctx->SetCurrentBasicBlock(bbFullBody); {
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
ctx->SetBlockEntryMask(LLVMMaskAllOn);
|
||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||
dimVariables[nDims-1]->storagePtr, span);
|
||||
ctx->SetContinueTarget(bbFullBodyContinue);
|
||||
ctx->AddInstrumentationPoint("foreach loop body (all on)");
|
||||
stmts->EmitCode(ctx);
|
||||
AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
|
||||
ctx->BranchInst(bbFullBodyContinue);
|
||||
}
|
||||
ctx->SetCurrentBasicBlock(bbFullBodyContinue); {
|
||||
ctx->RestoreContinuedLanes();
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
|
||||
llvm::Value *newCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||
LLVMInt32(span[nDims-1]), "new_counter");
|
||||
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
|
||||
ctx->BranchInst(bbOuterNotInExtras);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// We're done running blocks with the mask all on; see if the counter is
|
||||
// less than the end value, in which case we need to run the body one
|
||||
// more time to get the extra bits.
|
||||
llvm::BasicBlock *bbSetInnerMask =
|
||||
ctx->CreateBasicBlock("partial_inner_only");
|
||||
ctx->SetCurrentBasicBlock(bbPartialInnerAllOuter); {
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
||||
llvm::Value *beforeFullEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
counter, endVals[nDims-1], "before_full_end");
|
||||
ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// The outer dimensions are all on, so the mask is just given by the
|
||||
// mask for the innermost dimension
|
||||
ctx->SetCurrentBasicBlock(bbSetInnerMask); {
|
||||
llvm::Value *varyingCounter =
|
||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||
dimVariables[nDims-1]->storagePtr, span);
|
||||
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
||||
llvm::Value *emask =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
varyingCounter, smearEnd);
|
||||
emask = ctx->I1VecToBoolVec(emask);
|
||||
ctx->SetInternalMask(emask);
|
||||
ctx->SetBlockEntryMask(emask);
|
||||
|
||||
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
|
||||
ctx->BranchInst(bbMaskedBody);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// masked_body: set the mask and have the statements emit their
|
||||
// code again. Note that it's generally worthwhile having two copies
|
||||
// of the statements' code, since the code above is emitted with the
|
||||
// mask known to be all-on, which in turn leads to more efficient code
|
||||
// for that case.
|
||||
llvm::BasicBlock *bbStepInnerIndex =
|
||||
ctx->CreateBasicBlock("step_inner_index");
|
||||
llvm::BasicBlock *bbMaskedBodyContinue =
|
||||
ctx->CreateBasicBlock("foreach_masked_continue");
|
||||
ctx->SetCurrentBasicBlock(bbMaskedBody); {
|
||||
ctx->AddInstrumentationPoint("foreach loop body (masked)");
|
||||
ctx->SetContinueTarget(bbMaskedBodyContinue);
|
||||
ctx->DisableGatherScatterWarnings();
|
||||
ctx->SetBlockEntryMask(ctx->GetFullMask());
|
||||
stmts->EmitCode(ctx);
|
||||
ctx->EnableGatherScatterWarnings();
|
||||
ctx->BranchInst(bbMaskedBodyContinue);
|
||||
}
|
||||
ctx->SetCurrentBasicBlock(bbMaskedBodyContinue); {
|
||||
ctx->RestoreContinuedLanes();
|
||||
llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
|
||||
ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// step the innermost index, for the case where we're doing the
|
||||
// innermost for loop over full vectors.
|
||||
ctx->SetCurrentBasicBlock(bbStepInnerIndex); {
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
|
||||
llvm::Value *newCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||
LLVMInt32(span[nDims-1]), "new_counter");
|
||||
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
|
||||
ctx->BranchInst(bbOuterInExtras);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// foreach_exit: All done. Restore the old mask and clean up
|
||||
ctx->SetCurrentBasicBlock(bbExit);
|
||||
|
||||
ctx->SetInternalMask(oldMask);
|
||||
ctx->SetFunctionMask(oldFunctionMask);
|
||||
|
||||
ctx->EndForeach();
|
||||
ctx->EndScope();
|
||||
}
|
||||
else /* isPTX() == true */
|
||||
{
|
||||
llvm::BasicBlock *bbFullBody = ctx->CreateBasicBlock("foreach_full_body");
|
||||
llvm::BasicBlock *bbMaskedBody = ctx->CreateBasicBlock("foreach_masked_body");
|
||||
llvm::BasicBlock *bbExit = ctx->CreateBasicBlock("foreach_exit");
|
||||
|
||||
llvm::Value *oldMask = ctx->GetInternalMask();
|
||||
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
|
||||
|
||||
ctx->SetDebugPos(pos);
|
||||
ctx->StartScope();
|
||||
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
|
||||
// This should be caught during typechecking
|
||||
AssertPos(pos, startExprs.size() == dimVariables.size() &&
|
||||
endExprs.size() == dimVariables.size());
|
||||
int nDims = (int)dimVariables.size();
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// Setup: compute the number of items we have to work on in each
|
||||
// dimension and a number of derived values.
|
||||
std::vector<llvm::BasicBlock *> bbReset, bbStep, bbTest;
|
||||
std::vector<llvm::Value *> startVals, endVals, uniformCounterPtrs;
|
||||
std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
|
||||
|
||||
std::vector<int> span(nDims, 0);
|
||||
const int vectorWidth = 32;
|
||||
const int vectorWidth =
|
||||
g->target->getISA() == Target::NVPTX ? 32 : g->target->getVectorWidth();
|
||||
lGetSpans(nDims-1, nDims, vectorWidth, isTiled, &span[0]);
|
||||
#if 0
|
||||
for (int i = 0; i < nDims; i++)
|
||||
{
|
||||
fprintf(stderr, " i= %d [ %d ] : %d \n",
|
||||
i, nDims, span[i]);
|
||||
}
|
||||
fprintf(stderr, " --- \n");
|
||||
#endif
|
||||
|
||||
for (int i = 0; i < nDims; ++i) {
|
||||
// Basic blocks that we'll fill in later with the looping logic for
|
||||
@@ -2380,7 +1918,6 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
ctx->EndForeach();
|
||||
ctx->EndScope();
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Stmt *
|
||||
|
||||
20
type.cpp
20
type.cpp
@@ -2925,7 +2925,7 @@ FunctionType::GetReturnTypeString() const {
|
||||
llvm::FunctionType *
|
||||
FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
||||
|
||||
if (isTask == true) // && !g->target->isPTX()) //getISA() != Target::NVPTX64)
|
||||
if (isTask == true)
|
||||
Assert(removeMask == false);
|
||||
|
||||
// Get the LLVM Type *s for the function arguments
|
||||
@@ -2950,15 +2950,12 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
||||
llvmArgTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
std::vector<llvm::Type *> callTypes;
|
||||
if (isTask) {
|
||||
if (isTask && g->target->getISA() != Target::NVPTX) {
|
||||
// Tasks take three arguments: a pointer to a struct that holds the
|
||||
// actual task arguments, the thread index, and the total number of
|
||||
// threads the tasks system has running. (Task arguments are
|
||||
// marshalled in a struct so that it's easy to allocate space to
|
||||
// hold them until the task actually runs.)
|
||||
// if (g->target->getISA() != Target::NVPTX64)
|
||||
if (!g->target->isPTX())
|
||||
{
|
||||
llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes);
|
||||
callTypes.push_back(llvm::PointerType::getUnqual(st));
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // threadIndex
|
||||
@@ -2972,22 +2969,11 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount1
|
||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount2
|
||||
}
|
||||
else
|
||||
{
|
||||
if (g->target->getISA() == Target::NVPTX64)
|
||||
callTypes = llvmArgTypes;
|
||||
else
|
||||
{
|
||||
assert(0); /* evghenii: must be removed in final, just for test for nvptx64 target */
|
||||
llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes);
|
||||
callTypes.push_back(llvm::PointerType::getUnqual(st));
|
||||
}
|
||||
}
|
||||
}
|
||||
else
|
||||
// Otherwise we already have the types of the arguments
|
||||
callTypes = llvmArgTypes;
|
||||
|
||||
|
||||
if (returnType == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
|
||||
Reference in New Issue
Block a user