MAJOR CHANGE--- STOP WITH THIS BRANCH--
This commit is contained in:
8
Makefile
8
Makefile
@@ -144,7 +144,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
|
|||||||
type.cpp util.cpp
|
type.cpp util.cpp
|
||||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||||
opt.h stmt.h sym.h type.h util.h
|
opt.h stmt.h sym.h type.h util.h
|
||||||
TARGETS=nvptx64 avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
TARGETS=nvptx avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
||||||
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
|
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
|
||||||
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
|
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
|
||||||
ifneq ($(ARM_ENABLED), 0)
|
ifneq ($(ARM_ENABLED), 0)
|
||||||
@@ -254,15 +254,15 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
|
|||||||
@echo Compiling $<
|
@echo Compiling $<
|
||||||
$(CXX) $(CXXFLAGS) -o $@ -c $<
|
$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||||
|
|
||||||
objs/builtins-dispatch.cpp: builtins/dispatch.ll builtins/util.m4 builtins/util_ptx.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
objs/builtins-dispatch.cpp: builtins/dispatch.ll builtins/util.m4 builtins/util-nvptx.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
||||||
@echo Creating C++ source from builtins definition file $<
|
@echo Creating C++ source from builtins definition file $<
|
||||||
m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX $< | python bitcode2cpp.py $< > $@
|
m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX $< | python bitcode2cpp.py $< > $@
|
||||||
|
|
||||||
objs/builtins-%-32bit.cpp: builtins/%.ll builtins/util.m4 builtins/util_ptx.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
objs/builtins-%-32bit.cpp: builtins/%.ll builtins/util.m4 builtins/util-nvptx.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
||||||
@echo Creating C++ source from builtins definition file $< \(32 bit version\)
|
@echo Creating C++ source from builtins definition file $< \(32 bit version\)
|
||||||
m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX -DRUNTIME=32 $< | python bitcode2cpp.py $< 32bit > $@
|
m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX -DRUNTIME=32 $< | python bitcode2cpp.py $< 32bit > $@
|
||||||
|
|
||||||
objs/builtins-%-64bit.cpp: builtins/%.ll builtins/util.m4 builtins/util_ptx.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
objs/builtins-%-64bit.cpp: builtins/%.ll builtins/util.m4 builtins/util-nvptx.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
||||||
@echo Creating C++ source from builtins definition file $< \(64 bit version\)
|
@echo Creating C++ source from builtins definition file $< \(64 bit version\)
|
||||||
m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX -DRUNTIME=64 $< | python bitcode2cpp.py $< 64bit > $@
|
m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX -DRUNTIME=64 $< | python bitcode2cpp.py $< 64bit > $@
|
||||||
|
|
||||||
|
|||||||
12
builtins.cpp
12
builtins.cpp
@@ -693,9 +693,9 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
|||||||
if (g->target->getISA() != Target::NEON32 &&
|
if (g->target->getISA() != Target::NEON32 &&
|
||||||
g->target->getISA() != Target::NEON16 &&
|
g->target->getISA() != Target::NEON16 &&
|
||||||
g->target->getISA() != Target::NEON8 &&
|
g->target->getISA() != Target::NEON8 &&
|
||||||
g->target->getISA() != Target::NVPTX64)
|
g->target->getISA() != Target::NVPTX)
|
||||||
#else
|
#else
|
||||||
if (g->target->getISA() != Target::NVPTX64)
|
if (g->target->getISA() != Target::NVPTX)
|
||||||
#endif // !__arm__
|
#endif // !__arm__
|
||||||
{
|
{
|
||||||
Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
|
Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
|
||||||
@@ -858,14 +858,14 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
// Next, add the target's custom implementations of the various needed
|
// Next, add the target's custom implementations of the various needed
|
||||||
// builtin functions (e.g. __masked_store_32(), etc).
|
// builtin functions (e.g. __masked_store_32(), etc).
|
||||||
switch (g->target->getISA()) {
|
switch (g->target->getISA()) {
|
||||||
case Target::NVPTX64:
|
case Target::NVPTX:
|
||||||
{
|
{
|
||||||
if (runtime32) {
|
if (runtime32) {
|
||||||
fprintf(stderr, "W're sorry, but only 64bit targets are supported at this moment .. \n");
|
fprintf(stderr, "Unforetunatly 32bit targets are supported at the moment .. \n");
|
||||||
assert(0);
|
assert(0);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
EXPORT_MODULE(builtins_bitcode_nvptx64_64bit);
|
EXPORT_MODULE(builtins_bitcode_nvptx_64bit);
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
};
|
};
|
||||||
@@ -1138,7 +1138,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
}
|
}
|
||||||
|
|
||||||
// define the 'programCount' builtin variable
|
// define the 'programCount' builtin variable
|
||||||
if (!g->target->isPTX())
|
if (g->target->getISA() != Target::NVPTX)
|
||||||
{
|
{
|
||||||
lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
|
lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -105,15 +105,9 @@ define i32 @__lanemask_lt_nvptx() nounwind readnone alwaysinline
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; tasking
|
;; tasking
|
||||||
|
|
||||||
define i8* @ISPCAlloc(i8**, i64, i32) nounwind alwaysinline
|
|
||||||
{
|
|
||||||
%ptr = inttoptr i64 1 to i8*
|
|
||||||
ret i8* %ptr
|
|
||||||
}
|
|
||||||
|
|
||||||
;; this call allocate parameter buffer for kernel launch
|
;; this call allocate parameter buffer for kernel launch
|
||||||
declare i64 @cudaGetParameterBuffer(i64, i64) nounwind
|
declare i64 @cudaGetParameterBuffer(i64, i64) nounwind
|
||||||
define i8* @ISPCGetParamBuffer(i8**, i64 %align, i64 %size) nounwind alwaysinline
|
define i8* @ISPCAlloc(i8**, i64 %size, i32 %align32) nounwind alwaysinline
|
||||||
{
|
{
|
||||||
entry:
|
entry:
|
||||||
%call = tail call i32 @__tid_x()
|
%call = tail call i32 @__tid_x()
|
||||||
@@ -121,6 +115,7 @@ entry:
|
|||||||
%sub = add nsw i32 %call1, -1
|
%sub = add nsw i32 %call1, -1
|
||||||
%and = and i32 %sub, %call
|
%and = and i32 %sub, %call
|
||||||
%cmp = icmp eq i32 %and, 0
|
%cmp = icmp eq i32 %and, 0
|
||||||
|
%align = zext i32 %align32 to i64
|
||||||
br i1 %cmp, label %if.then, label %if.end
|
br i1 %cmp, label %if.then, label %if.end
|
||||||
|
|
||||||
if.then:
|
if.then:
|
||||||
@@ -224,7 +219,7 @@ define void @ISPCSync(i8*) nounwind alwaysinline
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
include(`util_ptx.m4')
|
include(`util-nvptx.m4')
|
||||||
|
|
||||||
stdlib_core()
|
stdlib_core()
|
||||||
packed_load_and_store()
|
packed_load_and_store()
|
||||||
85
ctx.cpp
85
ctx.cpp
@@ -1410,7 +1410,7 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
|||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
||||||
if (!g->target->isPTX()) //g->target->getISA() != Target::NVPTX64)
|
if (g->target->getISA() != Target::NVPTX)
|
||||||
{
|
{
|
||||||
llvm::SmallVector<llvm::Constant*, 16> array;
|
llvm::SmallVector<llvm::Constant*, 16> array;
|
||||||
for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
|
for (int i = 0; i < g->target->getVectorWidth() ; ++i) {
|
||||||
@@ -3540,7 +3540,7 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
|||||||
std::vector<llvm::Value *> &argVals,
|
std::vector<llvm::Value *> &argVals,
|
||||||
llvm::Value *launchCount[3]){
|
llvm::Value *launchCount[3]){
|
||||||
|
|
||||||
if (!g->target->isPTX())
|
if (g->target->getISA() != Target::NVPTX)
|
||||||
{
|
{
|
||||||
if (callee == NULL) {
|
if (callee == NULL) {
|
||||||
AssertPos(currentPos, m->errorCount > 0);
|
AssertPos(currentPos, m->errorCount > 0);
|
||||||
@@ -3608,7 +3608,79 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
|||||||
args.push_back(launchCount[2]);
|
args.push_back(launchCount[2]);
|
||||||
return CallInst(flaunch, NULL, args, "");
|
return CallInst(flaunch, NULL, args, "");
|
||||||
}
|
}
|
||||||
else /* isPTX == true */
|
else /* NVPTX */
|
||||||
|
{
|
||||||
|
if (callee == NULL) {
|
||||||
|
AssertPos(currentPos, m->errorCount > 0);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
launchedTasks = true;
|
||||||
|
|
||||||
|
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
|
||||||
|
std::vector<llvm::Type*> argTypes;
|
||||||
|
for (unsigned int i = 0; i < argVals.size(); i++)
|
||||||
|
argTypes.push_back(argVals[i]->getType());
|
||||||
|
llvm::Type *st = llvm::StructType::get(*g->ctx, argTypes);
|
||||||
|
llvm::StructType *argStructType = static_cast<llvm::StructType *>(st);
|
||||||
|
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
|
||||||
|
if (structSize->getType() != LLVMTypes::Int64Type)
|
||||||
|
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
|
||||||
|
"struct_size_to_64");
|
||||||
|
|
||||||
|
const int align = 8;
|
||||||
|
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
|
||||||
|
AssertPos(currentPos, falloc != NULL);
|
||||||
|
std::vector<llvm::Value *> allocArgs;
|
||||||
|
allocArgs.push_back(launchGroupHandlePtr);
|
||||||
|
allocArgs.push_back(structSize);
|
||||||
|
allocArgs.push_back(LLVMInt32(align));
|
||||||
|
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
|
||||||
|
llvm::Value *voidi64 = PtrToIntInst(voidmem, "args_i64");
|
||||||
|
llvm::BasicBlock* if_true = CreateBasicBlock("if_true");
|
||||||
|
llvm::BasicBlock* if_false = CreateBasicBlock("if_false");
|
||||||
|
|
||||||
|
/* check if the pointer returned by ISPCAlloc is not NULL
|
||||||
|
* --------------
|
||||||
|
* this is a workaround for not checking the value of programIndex
|
||||||
|
* because ISPCAlloc will return NULL pointer for all programIndex > 0
|
||||||
|
* of course, if ISPAlloc fails to get parameter buffer, the pointer for programIndex = 0
|
||||||
|
* will also be NULL
|
||||||
|
* This check must be added, and also rewrite the code to make it less opaque
|
||||||
|
*/
|
||||||
|
llvm::Value* cmp1 = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_NE, voidi64, LLVMInt64(0), "cmp1");
|
||||||
|
BranchInst(if_true, if_false, cmp1);
|
||||||
|
|
||||||
|
/**********************/
|
||||||
|
bblock = if_true;
|
||||||
|
|
||||||
|
// label_if_then block:
|
||||||
|
llvm::Type *pt = llvm::PointerType::getUnqual(st);
|
||||||
|
llvm::Value *argmem = BitCastInst(voidmem, pt);
|
||||||
|
for (unsigned int i = 0; i < argVals.size(); ++i)
|
||||||
|
{
|
||||||
|
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
|
||||||
|
// don't need to do masked store here, I think
|
||||||
|
StoreInst(argVals[i], ptr);
|
||||||
|
}
|
||||||
|
BranchInst(if_false);
|
||||||
|
|
||||||
|
/**********************/
|
||||||
|
bblock = if_false;
|
||||||
|
|
||||||
|
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
|
||||||
|
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
|
||||||
|
AssertPos(currentPos, flaunch != NULL);
|
||||||
|
std::vector<llvm::Value *> args;
|
||||||
|
args.push_back(launchGroupHandlePtr);
|
||||||
|
args.push_back(fptr);
|
||||||
|
args.push_back(voidmem);
|
||||||
|
args.push_back(launchCount[0]);
|
||||||
|
args.push_back(launchCount[1]);
|
||||||
|
args.push_back(launchCount[2]);
|
||||||
|
llvm::Value *ret = CallInst(flaunch, NULL, args, "");
|
||||||
|
return ret;
|
||||||
|
}
|
||||||
|
#if 0
|
||||||
{
|
{
|
||||||
if (callee == NULL) {
|
if (callee == NULL) {
|
||||||
AssertPos(currentPos, m->errorCount > 0);
|
AssertPos(currentPos, m->errorCount > 0);
|
||||||
@@ -3684,13 +3756,16 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
|||||||
args.push_back(launchCount[2]);
|
args.push_back(launchCount[2]);
|
||||||
return CallInst(flaunch, NULL, args, "");
|
return CallInst(flaunch, NULL, args, "");
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
FunctionEmitContext::SyncInst() {
|
FunctionEmitContext::SyncInst() {
|
||||||
if (!g->target->isPTX())
|
#if 0
|
||||||
|
if (g->target->getISA() != Target::NVPTX)
|
||||||
{
|
{
|
||||||
|
#endif
|
||||||
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
||||||
llvm::Value *nullPtrValue =
|
llvm::Value *nullPtrValue =
|
||||||
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
|
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
|
||||||
@@ -3714,6 +3789,7 @@ FunctionEmitContext::SyncInst() {
|
|||||||
BranchInst(bPostSync);
|
BranchInst(bPostSync);
|
||||||
|
|
||||||
SetCurrentBasicBlock(bPostSync);
|
SetCurrentBasicBlock(bPostSync);
|
||||||
|
#if 0
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
@@ -3726,6 +3802,7 @@ FunctionEmitContext::SyncInst() {
|
|||||||
CallInst(fsync, NULL, launchGroupHandle, "");
|
CallInst(fsync, NULL, launchGroupHandle, "");
|
||||||
StoreInst(nullPtrValue, launchGroupHandlePtr);
|
StoreInst(nullPtrValue, launchGroupHandlePtr);
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
13
decl.cpp
13
decl.cpp
@@ -531,7 +531,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
|||||||
returnType = returnType->ResolveUnboundVariability(Variability::Varying);
|
returnType = returnType->ResolveUnboundVariability(Variability::Varying);
|
||||||
|
|
||||||
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
|
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
|
||||||
if (isTask && g->target->isPTX()) //getISA() == Target::NVPTX64)
|
if (isTask && g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
// ds->storageClass = SC_EXTERN_C;
|
// ds->storageClass = SC_EXTERN_C;
|
||||||
ds->typeQualifiers |= TYPEQUAL_UNMASKED;
|
ds->typeQualifiers |= TYPEQUAL_UNMASKED;
|
||||||
@@ -547,12 +547,11 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
|||||||
"qualifiers");
|
"qualifiers");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
// if (!g->target->isPTX())
|
if (isExternC && isTask) {
|
||||||
if (isExternC && isTask) {
|
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
|
||||||
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
|
"qualifiers");
|
||||||
"qualifiers");
|
return;
|
||||||
return;
|
}
|
||||||
}
|
|
||||||
if (isExternC && isExported) {
|
if (isExternC && isExported) {
|
||||||
Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" "
|
Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" "
|
||||||
"qualifiers");
|
"qualifiers");
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ endif
|
|||||||
|
|
||||||
#
|
#
|
||||||
ISPC=ispc
|
ISPC=ispc
|
||||||
ISPC_FLAGS=-O3 --math-lib=default --target=nvptx64 --opt=fast-math
|
ISPC_FLAGS=-O3 --math-lib=default --target=nvptx --opt=fast-math
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
#
|
#
|
||||||
|
|||||||
159
func.cpp
159
func.cpp
@@ -125,7 +125,7 @@ Function::Function(Symbol *s, Stmt *c) {
|
|||||||
sym->parentFunction = this;
|
sym->parentFunction = this;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type->isTask) {
|
if (type->isTask && g->target->getISA() != Target::NVPTX) {
|
||||||
threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
|
threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
|
||||||
Assert(threadIndexSym);
|
Assert(threadIndexSym);
|
||||||
threadCountSym = m->symbolTable->LookupVariable("threadCount");
|
threadCountSym = m->symbolTable->LookupVariable("threadCount");
|
||||||
@@ -237,12 +237,122 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
#endif
|
#endif
|
||||||
const FunctionType *type = CastType<FunctionType>(sym->type);
|
const FunctionType *type = CastType<FunctionType>(sym->type);
|
||||||
Assert(type != NULL);
|
Assert(type != NULL);
|
||||||
|
if (type->isTask == true && g->target->getISA() != Target::NVPTX) {
|
||||||
|
// For tasks, we there should always be three parmeters: the
|
||||||
|
// pointer to the structure that holds all of the arguments, the
|
||||||
|
// thread index, and the thread count variables.
|
||||||
|
llvm::Function::arg_iterator argIter = function->arg_begin();
|
||||||
|
llvm::Value *structParamPtr = argIter++;
|
||||||
|
llvm::Value *threadIndex = argIter++;
|
||||||
|
llvm::Value *threadCount = argIter++;
|
||||||
|
llvm::Value *taskIndex = argIter++;
|
||||||
|
llvm::Value *taskCount = argIter++;
|
||||||
|
llvm::Value *taskIndex0 = argIter++;
|
||||||
|
llvm::Value *taskIndex1 = argIter++;
|
||||||
|
llvm::Value *taskIndex2 = argIter++;
|
||||||
|
llvm::Value *taskCount0 = argIter++;
|
||||||
|
llvm::Value *taskCount1 = argIter++;
|
||||||
|
llvm::Value *taskCount2 = argIter++;
|
||||||
|
|
||||||
|
// Copy the function parameter values from the structure into local
|
||||||
|
// storage
|
||||||
|
for (unsigned int i = 0; i < args.size(); ++i)
|
||||||
|
lCopyInTaskParameter(i, structParamPtr, args, ctx);
|
||||||
|
|
||||||
|
if (type->isUnmasked == false) {
|
||||||
|
// Copy in the mask as well.
|
||||||
|
int nArgs = (int)args.size();
|
||||||
|
// The mask is the last parameter in the argument structure
|
||||||
|
llvm::Value *ptr = ctx->AddElementOffset(structParamPtr, nArgs, NULL,
|
||||||
|
"task_struct_mask");
|
||||||
|
llvm::Value *ptrval = ctx->LoadInst(ptr, "mask");
|
||||||
|
ctx->SetFunctionMask(ptrval);
|
||||||
|
}
|
||||||
|
|
||||||
|
// Copy threadIndex and threadCount into stack-allocated storage so
|
||||||
|
// that their symbols point to something reasonable.
|
||||||
|
threadIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadIndex");
|
||||||
|
ctx->StoreInst(threadIndex, threadIndexSym->storagePtr);
|
||||||
|
|
||||||
|
threadCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "threadCount");
|
||||||
|
ctx->StoreInst(threadCount, threadCountSym->storagePtr);
|
||||||
|
|
||||||
|
// Copy taskIndex and taskCount into stack-allocated storage so
|
||||||
|
// that their symbols point to something reasonable.
|
||||||
|
taskIndexSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex");
|
||||||
|
ctx->StoreInst(taskIndex, taskIndexSym->storagePtr);
|
||||||
|
|
||||||
|
taskCountSym->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount");
|
||||||
|
ctx->StoreInst(taskCount, taskCountSym->storagePtr);
|
||||||
|
|
||||||
|
taskIndexSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex0");
|
||||||
|
ctx->StoreInst(taskIndex0, taskIndexSym0->storagePtr);
|
||||||
|
taskIndexSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex1");
|
||||||
|
ctx->StoreInst(taskIndex1, taskIndexSym1->storagePtr);
|
||||||
|
taskIndexSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskIndex2");
|
||||||
|
ctx->StoreInst(taskIndex2, taskIndexSym2->storagePtr);
|
||||||
|
|
||||||
|
taskCountSym0->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount0");
|
||||||
|
ctx->StoreInst(taskCount0, taskCountSym0->storagePtr);
|
||||||
|
taskCountSym1->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount1");
|
||||||
|
ctx->StoreInst(taskCount1, taskCountSym1->storagePtr);
|
||||||
|
taskCountSym2->storagePtr = ctx->AllocaInst(LLVMTypes::Int32Type, "taskCount2");
|
||||||
|
ctx->StoreInst(taskCount2, taskCountSym2->storagePtr);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
// Regular, non-task function
|
||||||
|
llvm::Function::arg_iterator argIter = function->arg_begin();
|
||||||
|
for (unsigned int i = 0; i < args.size(); ++i, ++argIter) {
|
||||||
|
Symbol *sym = args[i];
|
||||||
|
if (sym == NULL)
|
||||||
|
// anonymous function parameter
|
||||||
|
continue;
|
||||||
|
|
||||||
|
argIter->setName(sym->name.c_str());
|
||||||
|
|
||||||
|
// Allocate stack storage for the parameter and emit code
|
||||||
|
// to store the its value there.
|
||||||
|
sym->storagePtr = ctx->AllocaInst(argIter->getType(), sym->name.c_str());
|
||||||
|
ctx->StoreInst(argIter, sym->storagePtr);
|
||||||
|
ctx->EmitFunctionParameterDebugInfo(sym, i);
|
||||||
|
}
|
||||||
|
|
||||||
|
// If the number of actual function arguments is equal to the
|
||||||
|
// number of declared arguments in decl->functionParams, then we
|
||||||
|
// don't have a mask parameter, so set it to be all on. This
|
||||||
|
// happens for exmaple with 'export'ed functions that the app
|
||||||
|
// calls.
|
||||||
|
if (argIter == function->arg_end()) {
|
||||||
|
Assert(type->isUnmasked || type->isExported);
|
||||||
|
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
Assert(type->isUnmasked == false);
|
||||||
|
|
||||||
|
// Otherwise use the mask to set the entry mask value
|
||||||
|
argIter->setName("__mask");
|
||||||
|
Assert(argIter->getType() == LLVMTypes::MaskType);
|
||||||
|
ctx->SetFunctionMask(argIter);
|
||||||
|
Assert(++argIter == function->arg_end());
|
||||||
|
}
|
||||||
|
if (type->isTask == true && g->target->getISA() == Target::NVPTX)
|
||||||
|
{
|
||||||
|
llvm::NamedMDNode* annotations =
|
||||||
|
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
||||||
|
llvm::SmallVector<llvm::Value*, 3> av;
|
||||||
|
av.push_back(function);
|
||||||
|
av.push_back(llvm::MDString::get(*g->ctx, "kernel"));
|
||||||
|
av.push_back(LLVMInt32(1));
|
||||||
|
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
||||||
|
}
|
||||||
|
}
|
||||||
|
#if 0
|
||||||
if (type->isTask == true) {
|
if (type->isTask == true) {
|
||||||
// For tasks, we there should always be three parmeters: the
|
// For tasks, we there should always be three parmeters: the
|
||||||
// pointer to the structure that holds all of the arguments, the
|
// pointer to the structure that holds all of the arguments, the
|
||||||
// thread index, and the thread count variables.
|
// thread index, and the thread count variables.
|
||||||
|
|
||||||
if (!g->target->isPTX()) //if (g->target->getISA() != Target::NVPTX64)
|
if (g->target->getISA() != Target::NVPTX)
|
||||||
{
|
{
|
||||||
llvm::Function::arg_iterator argIter = function->arg_begin();
|
llvm::Function::arg_iterator argIter = function->arg_begin();
|
||||||
llvm::Value *structParamPtr = argIter++;
|
llvm::Value *structParamPtr = argIter++;
|
||||||
@@ -341,7 +451,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
Assert(type->isUnmasked || type->isExported);
|
Assert(type->isUnmasked || type->isExported);
|
||||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||||
}
|
}
|
||||||
else /* for NVPTX64 , function must be unmasked */
|
else /* for NVPTX, function must be unmasked */
|
||||||
{
|
{
|
||||||
assert(0);
|
assert(0);
|
||||||
Assert(type->isUnmasked == false);
|
Assert(type->isUnmasked == false);
|
||||||
@@ -353,7 +463,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
Assert(++argIter == function->arg_end());
|
Assert(++argIter == function->arg_end());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g->target->isPTX() && g->target->getISA() == Target::NVPTX64)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
llvm::NamedMDNode* annotations =
|
llvm::NamedMDNode* annotations =
|
||||||
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
||||||
@@ -402,6 +512,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
Assert(++argIter == function->arg_end());
|
Assert(++argIter == function->arg_end());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
// Finally, we can generate code for the function
|
// Finally, we can generate code for the function
|
||||||
if (code != NULL) {
|
if (code != NULL) {
|
||||||
@@ -535,26 +646,12 @@ Function::GenerateIR() {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// And we can now go ahead and emit the code
|
// And we can now go ahead and emit the code
|
||||||
/* export function with NVPTX64 target should be emitted host architecture */
|
|
||||||
#if 0
|
|
||||||
const FunctionType *func_type= CastType<FunctionType>(sym->type);
|
|
||||||
if (g->target->getISA() == Target::NVPTX64 && func_type->isExported)
|
|
||||||
return;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
if (g->target->getISA() != Target::NVPTX64 && g->target->isPTX() && func_type->isTask)
|
|
||||||
return;
|
|
||||||
#endif
|
|
||||||
|
|
||||||
// if (!(g->target->getISA()==Target::NVPTX64 && func_type->isExported))
|
|
||||||
{
|
{
|
||||||
FunctionEmitContext ec(this, sym, function, firstStmtPos);
|
FunctionEmitContext ec(this, sym, function, firstStmtPos);
|
||||||
emitCode(&ec, function, firstStmtPos);
|
emitCode(&ec, function, firstStmtPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (m->errorCount == 0) {
|
if (m->errorCount == 0) {
|
||||||
// if (!(g->target->getISA() == Target::NVPTX64 && func_type->isExported))
|
|
||||||
if (llvm::verifyFunction(*function, llvm::ReturnStatusAction) == true) {
|
if (llvm::verifyFunction(*function, llvm::ReturnStatusAction) == true) {
|
||||||
if (g->debugPrint)
|
if (g->debugPrint)
|
||||||
function->dump();
|
function->dump();
|
||||||
@@ -566,18 +663,18 @@ Function::GenerateIR() {
|
|||||||
// the application can call it
|
// the application can call it
|
||||||
const FunctionType *type = CastType<FunctionType>(sym->type);
|
const FunctionType *type = CastType<FunctionType>(sym->type);
|
||||||
Assert(type != NULL);
|
Assert(type != NULL);
|
||||||
if (type->isExported) { // && g->target->getISA() != Target::VPTX64) {
|
if (type->isExported) {
|
||||||
if (!type->isTask) {
|
if (!type->isTask) {
|
||||||
if (g->target->isPTX() && g->target->getISA() == Target::NVPTX64)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
llvm::NamedMDNode* annotations =
|
llvm::NamedMDNode* annotations =
|
||||||
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
||||||
llvm::SmallVector<llvm::Value*, 3> av;
|
llvm::SmallVector<llvm::Value*, 3> av;
|
||||||
av.push_back(function);
|
av.push_back(function);
|
||||||
av.push_back(llvm::MDString::get(*g->ctx, "kernel"));
|
av.push_back(llvm::MDString::get(*g->ctx, "kernel"));
|
||||||
av.push_back(llvm::ConstantInt::get(llvm::IntegerType::get(*g->ctx,32), 1));
|
av.push_back(llvm::ConstantInt::get(llvm::IntegerType::get(*g->ctx,32), 1));
|
||||||
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
||||||
}
|
}
|
||||||
llvm::FunctionType *ftype = type->LLVMFunctionType(g->ctx, true);
|
llvm::FunctionType *ftype = type->LLVMFunctionType(g->ctx, true);
|
||||||
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
|
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
|
||||||
std::string functionName = sym->name;
|
std::string functionName = sym->name;
|
||||||
@@ -585,7 +682,7 @@ Function::GenerateIR() {
|
|||||||
if (g->mangleFunctionsWithTarget)
|
if (g->mangleFunctionsWithTarget)
|
||||||
functionName += std::string("_") + g->target->GetISAString();
|
functionName += std::string("_") + g->target->GetISAString();
|
||||||
|
|
||||||
if (g->target->getISA() == Target::NVPTX64)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
functionName += std::string("___export");
|
functionName += std::string("___export");
|
||||||
llvm::Function *appFunction =
|
llvm::Function *appFunction =
|
||||||
llvm::Function::Create(ftype, linkage, functionName.c_str(), m->module);
|
llvm::Function::Create(ftype, linkage, functionName.c_str(), m->module);
|
||||||
@@ -615,7 +712,7 @@ Function::GenerateIR() {
|
|||||||
FATAL("Function verificication failed");
|
FATAL("Function verificication failed");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (g->target->isPTX() && g->target->getISA() == Target::NVPTX64)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
llvm::NamedMDNode* annotations =
|
llvm::NamedMDNode* annotations =
|
||||||
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
m->module->getOrInsertNamedMetadata("nvvm.annotations");
|
||||||
|
|||||||
32
ispc.cpp
32
ispc.cpp
@@ -174,7 +174,7 @@ static const char *supportedCPUs[] = {
|
|||||||
#endif // LLVM 3.4+
|
#endif // LLVM 3.4+
|
||||||
};
|
};
|
||||||
|
|
||||||
Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, bool isPTX) :
|
Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||||
m_target(NULL),
|
m_target(NULL),
|
||||||
m_targetMachine(NULL),
|
m_targetMachine(NULL),
|
||||||
#if defined(LLVM_3_1)
|
#if defined(LLVM_3_1)
|
||||||
@@ -184,7 +184,6 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
|||||||
#endif
|
#endif
|
||||||
m_valid(false),
|
m_valid(false),
|
||||||
m_isa(SSE2),
|
m_isa(SSE2),
|
||||||
m_isPTX(isPTX),
|
|
||||||
m_arch(""),
|
m_arch(""),
|
||||||
m_is32Bit(true),
|
m_is32Bit(true),
|
||||||
m_cpu(""),
|
m_cpu(""),
|
||||||
@@ -212,7 +211,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
|||||||
if (!strcmp(cpu, "core-avx2"))
|
if (!strcmp(cpu, "core-avx2"))
|
||||||
isa = "avx2-i32x8";
|
isa = "avx2-i32x8";
|
||||||
else if (!strcmp(cpu, "sm_35"))
|
else if (!strcmp(cpu, "sm_35"))
|
||||||
isa = "nvptx64";
|
isa = "nvptx";
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
else if (!strcmp(cpu, "cortex-a9") ||
|
else if (!strcmp(cpu, "cortex-a9") ||
|
||||||
!strcmp(cpu, "cortex-a15"))
|
!strcmp(cpu, "cortex-a15"))
|
||||||
@@ -249,7 +248,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
|||||||
cpu = "cortex-a9";
|
cpu = "cortex-a9";
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
if (cpu == NULL && !strcmp(isa, "nvptx64"))
|
if (cpu == NULL && !strcmp(isa, "nvptx"))
|
||||||
cpu = "sm_35";
|
cpu = "sm_35";
|
||||||
|
|
||||||
if (cpu == NULL) {
|
if (cpu == NULL) {
|
||||||
@@ -280,8 +279,8 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
|||||||
this->m_cpu = cpu;
|
this->m_cpu = cpu;
|
||||||
|
|
||||||
if (arch == NULL) {
|
if (arch == NULL) {
|
||||||
if (!strcmp(isa, "nvptx64"))
|
if (!strcmp(isa, "nvptx"))
|
||||||
arch = "nvptx64";
|
arch = "nvptx";
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
else if (!strncmp(isa, "neon", 4))
|
else if (!strncmp(isa, "neon", 4))
|
||||||
arch = "arm";
|
arch = "arm";
|
||||||
@@ -709,10 +708,9 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
|||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
else if (!strcasecmp(isa, "nvptx64"))
|
else if (!strcasecmp(isa, "nvptx"))
|
||||||
{
|
{
|
||||||
this->m_isa = Target::NVPTX64;
|
this->m_isa = Target::NVPTX;
|
||||||
this->m_isPTX = true;
|
|
||||||
this->m_nativeVectorWidth = 32;
|
this->m_nativeVectorWidth = 32;
|
||||||
this->m_nativeVectorAlignment = 32;
|
this->m_nativeVectorAlignment = 32;
|
||||||
this->m_vectorWidth = 1;
|
this->m_vectorWidth = 1;
|
||||||
@@ -780,7 +778,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
|||||||
dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
|
dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-"
|
||||||
"i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-"
|
"i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-"
|
||||||
"f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128";
|
"f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128";
|
||||||
} else if (m_isa == Target::NVPTX64)
|
} else if (m_isa == Target::NVPTX)
|
||||||
{
|
{
|
||||||
dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
|
dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
|
||||||
}
|
}
|
||||||
@@ -803,7 +801,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic, boo
|
|||||||
// Initialize target-specific "target-feature" attribute.
|
// Initialize target-specific "target-feature" attribute.
|
||||||
if (!m_attributes.empty()) {
|
if (!m_attributes.empty()) {
|
||||||
llvm::AttrBuilder attrBuilder;
|
llvm::AttrBuilder attrBuilder;
|
||||||
if (m_isa != Target::NVPTX64)
|
if (m_isa != Target::NVPTX)
|
||||||
attrBuilder.addAttribute("target-cpu", this->m_cpu);
|
attrBuilder.addAttribute("target-cpu", this->m_cpu);
|
||||||
attrBuilder.addAttribute("target-features", this->m_attributes);
|
attrBuilder.addAttribute("target-features", this->m_attributes);
|
||||||
this->m_tf_attributes = new llvm::AttributeSet(
|
this->m_tf_attributes = new llvm::AttributeSet(
|
||||||
@@ -838,7 +836,7 @@ Target::SupportedCPUs() {
|
|||||||
|
|
||||||
const char *
|
const char *
|
||||||
Target::SupportedArchs() {
|
Target::SupportedArchs() {
|
||||||
return "nvptx64, "
|
return "nvptx, "
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
"arm, "
|
"arm, "
|
||||||
#endif
|
#endif
|
||||||
@@ -848,7 +846,7 @@ Target::SupportedArchs() {
|
|||||||
|
|
||||||
const char *
|
const char *
|
||||||
Target::SupportedTargets() {
|
Target::SupportedTargets() {
|
||||||
return "nvptx64, "
|
return "nvptx, "
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
"neon-i8x16, neon-i16x8, neon-i32x4, "
|
"neon-i8x16, neon-i16x8, neon-i32x4, "
|
||||||
#endif
|
#endif
|
||||||
@@ -866,9 +864,9 @@ Target::SupportedTargets() {
|
|||||||
std::string
|
std::string
|
||||||
Target::GetTripleString() const {
|
Target::GetTripleString() const {
|
||||||
llvm::Triple triple;
|
llvm::Triple triple;
|
||||||
if (m_arch == "nvptx64")
|
if (m_arch == "nvptx")
|
||||||
{
|
{
|
||||||
triple.setTriple("nvptx64");
|
triple.setTriple("nvptx");
|
||||||
}
|
}
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
else if (m_arch == "arm") {
|
else if (m_arch == "arm") {
|
||||||
@@ -902,8 +900,8 @@ Target::GetTripleString() const {
|
|||||||
const char *
|
const char *
|
||||||
Target::ISAToString(ISA isa) {
|
Target::ISAToString(ISA isa) {
|
||||||
switch (isa) {
|
switch (isa) {
|
||||||
case Target::NVPTX64:
|
case Target::NVPTX:
|
||||||
return "nvptx64";
|
return "nvptx";
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
case Target::NEON8:
|
case Target::NEON8:
|
||||||
return "neon-8";
|
return "neon-8";
|
||||||
|
|||||||
6
ispc.h
6
ispc.h
@@ -179,7 +179,7 @@ public:
|
|||||||
flexible/performant of them will apear last in the enumerant. Note
|
flexible/performant of them will apear last in the enumerant. Note
|
||||||
also that __best_available_isa() needs to be updated if ISAs are
|
also that __best_available_isa() needs to be updated if ISAs are
|
||||||
added or the enumerant values are reordered. */
|
added or the enumerant values are reordered. */
|
||||||
enum ISA { NVPTX64,
|
enum ISA { NVPTX,
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
NEON32, NEON16, NEON8,
|
NEON32, NEON16, NEON8,
|
||||||
#endif
|
#endif
|
||||||
@@ -189,7 +189,7 @@ public:
|
|||||||
/** Initializes the given Target pointer for a target of the given
|
/** Initializes the given Target pointer for a target of the given
|
||||||
name, if the name is a known target. Returns true if the
|
name, if the name is a known target. Returns true if the
|
||||||
target was initialized and false if the name is unknown. */
|
target was initialized and false if the name is unknown. */
|
||||||
Target(const char *arch, const char *cpu, const char *isa, bool pic, bool isPTX = false);
|
Target(const char *arch, const char *cpu, const char *isa, bool pic);
|
||||||
|
|
||||||
/** Returns a comma-delimited string giving the names of the currently
|
/** Returns a comma-delimited string giving the names of the currently
|
||||||
supported compilation targets. */
|
supported compilation targets. */
|
||||||
@@ -251,7 +251,6 @@ public:
|
|||||||
bool isValid() const {return m_valid;}
|
bool isValid() const {return m_valid;}
|
||||||
|
|
||||||
ISA getISA() const {return m_isa;}
|
ISA getISA() const {return m_isa;}
|
||||||
bool isPTX() const {return m_isPTX;}
|
|
||||||
|
|
||||||
std::string getArch() const {return m_arch;}
|
std::string getArch() const {return m_arch;}
|
||||||
|
|
||||||
@@ -310,7 +309,6 @@ private:
|
|||||||
|
|
||||||
/** Instruction set being compiled to. */
|
/** Instruction set being compiled to. */
|
||||||
ISA m_isa;
|
ISA m_isa;
|
||||||
bool m_isPTX;
|
|
||||||
|
|
||||||
/** Target system architecture. (e.g. "x86-64", "x86"). */
|
/** Target system architecture. (e.g. "x86-64", "x86"). */
|
||||||
std::string m_arch;
|
std::string m_arch;
|
||||||
|
|||||||
144
module.cpp
144
module.cpp
@@ -733,7 +733,7 @@ Module::AddFunctionDeclaration(const std::string &name,
|
|||||||
if (storageClass == SC_EXTERN_C) {
|
if (storageClass == SC_EXTERN_C) {
|
||||||
// Make sure the user hasn't supplied both an 'extern "C"' and a
|
// Make sure the user hasn't supplied both an 'extern "C"' and a
|
||||||
// 'task' qualifier with the function
|
// 'task' qualifier with the function
|
||||||
if (functionType->isTask) //&& !g->target->isPTX()) //tISA() != Target::NVPTX64)
|
if (functionType->isTask)
|
||||||
{
|
{
|
||||||
Error(pos, "\"task\" qualifier is illegal with C-linkage extern "
|
Error(pos, "\"task\" qualifier is illegal with C-linkage extern "
|
||||||
"function \"%s\". Ignoring this function.", name.c_str());
|
"function \"%s\". Ignoring this function.", name.c_str());
|
||||||
@@ -796,8 +796,8 @@ Module::AddFunctionDeclaration(const std::string &name,
|
|||||||
#else // LLVM 3.1 and 3.3+
|
#else // LLVM 3.1 and 3.3+
|
||||||
function->addFnAttr(llvm::Attribute::AlwaysInline);
|
function->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||||
#endif
|
#endif
|
||||||
/* evghenii: on PTX target this must not be used, cause crash, dunno why */
|
/* evghenii: on PTX target the following must not be set ... why ?!? */
|
||||||
if (functionType->isTask && g->target->getISA() != Target::NVPTX64)
|
if (functionType->isTask && g->target->getISA() != Target::NVPTX)
|
||||||
// This also applies transitively to members I think?
|
// This also applies transitively to members I think?
|
||||||
#if defined(LLVM_3_1)
|
#if defined(LLVM_3_1)
|
||||||
function->setDoesNotAlias(1, true);
|
function->setDoesNotAlias(1, true);
|
||||||
@@ -953,7 +953,7 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
|
|||||||
const char *fileType = NULL;
|
const char *fileType = NULL;
|
||||||
switch (outputType) {
|
switch (outputType) {
|
||||||
case Asm:
|
case Asm:
|
||||||
if (g->target->getISA() != Target::NVPTX64)
|
if (g->target->getISA() != Target::NVPTX)
|
||||||
{
|
{
|
||||||
if (strcasecmp(suffix, "s"))
|
if (strcasecmp(suffix, "s"))
|
||||||
fileType = "assembly";
|
fileType = "assembly";
|
||||||
@@ -1053,7 +1053,7 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
llvm::raw_fd_ostream fos(fd, (fd != 1), false);
|
llvm::raw_fd_ostream fos(fd, (fd != 1), false);
|
||||||
if (g->target->getISA() == Target::NVPTX64)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
const std::string dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
|
const std::string dl_string = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v16:16:16-v32:32:32-v64:64:64-v128:128:128-n16:32:64";
|
||||||
module->setDataLayout(dl_string);
|
module->setDataLayout(dl_string);
|
||||||
@@ -1925,7 +1925,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
|
|||||||
opts.addMacroDef(g->cppArgs[i].substr(2));
|
opts.addMacroDef(g->cppArgs[i].substr(2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
if (g->target->getISA() == Target::NVPTX64)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
opts.addMacroDef("__NVPTX__");
|
opts.addMacroDef("__NVPTX__");
|
||||||
opts.addMacroDef("programIndex=laneIndex()");
|
opts.addMacroDef("programIndex=laneIndex()");
|
||||||
@@ -2331,135 +2331,9 @@ Module::CompileAndOutput(const char *srcFile,
|
|||||||
const char *hostStubFileName,
|
const char *hostStubFileName,
|
||||||
const char *devStubFileName)
|
const char *devStubFileName)
|
||||||
{
|
{
|
||||||
char ptxname[] = "nvptx64";
|
if (target == NULL || strchr(target, ',') == NULL) {
|
||||||
for (int k = 0; k < 7; k++)
|
|
||||||
ptxname[k] = target[k];
|
|
||||||
if (0) //target != NULL && strcmp(ptxname,"nvptx64") == 0) // NVPTX64
|
|
||||||
{
|
|
||||||
std::vector<std::string> targets = lExtractTargets(target);
|
|
||||||
Assert(targets.size() > 1);
|
|
||||||
// We're only compiling to a single target
|
|
||||||
int errorCount = 0;
|
|
||||||
|
|
||||||
const char *suffix_orig = strrchr(outFileName, '.');
|
|
||||||
++suffix_orig;
|
|
||||||
assert(suffix_orig!=NULL);
|
|
||||||
|
|
||||||
g->PtxString = std::string();
|
|
||||||
|
|
||||||
for (int itarget = 0; itarget < 1; itarget++)
|
|
||||||
{
|
|
||||||
fprintf(stderr, "compiling nvptx64 : target= %s\n",targets[itarget].c_str());
|
|
||||||
g->target = new Target(arch, cpu, targets[itarget].c_str(), generatePIC, /* isPTX= */ true);
|
|
||||||
if (!g->target->isValid())
|
|
||||||
return 1;
|
|
||||||
|
|
||||||
m = new Module(srcFile);
|
|
||||||
if (m->CompileFile() == 0) {
|
|
||||||
if (outputType == CXX) {
|
|
||||||
if (target == NULL || strncmp(target, "generic-", 8) != 0) {
|
|
||||||
Error(SourcePos(), "When generating C++ output, one of the \"generic-*\" "
|
|
||||||
"targets must be used.");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else if (outputType == Asm || outputType == Object) {
|
|
||||||
if (target != NULL && strncmp(target, "generic-", 8) == 0) {
|
|
||||||
Error(SourcePos(), "When using a \"generic-*\" compilation target, "
|
|
||||||
"%s output can not be used.",
|
|
||||||
(outputType == Asm) ? "assembly" : "object file");
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
assert(outFileName != NULL);
|
|
||||||
|
|
||||||
std::string targetOutFileName =
|
|
||||||
lGetTargetFileName(outFileName, targets[itarget].c_str());
|
|
||||||
if (outputType == Asm)
|
|
||||||
{
|
|
||||||
const char * targetOutFileName_c = targetOutFileName.c_str();
|
|
||||||
const int suffix = strrchr(targetOutFileName_c, '.') - targetOutFileName_c + 1;
|
|
||||||
if (itarget == 1 && !strcasecmp(suffix_orig, "ptx"))
|
|
||||||
{
|
|
||||||
targetOutFileName[suffix ] = 's';
|
|
||||||
targetOutFileName[suffix+1] = 0;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if (outputType != Object)
|
|
||||||
{
|
|
||||||
if (!m->writeOutput(outputType, targetOutFileName.c_str(), includeFileName))
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
else if (itarget > 0)
|
|
||||||
{
|
|
||||||
if (!m->writeOutput(outputType, outFileName, includeFileName))
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
if (itarget == 0)
|
|
||||||
{ /* store ptx into memory */
|
|
||||||
llvm::PassManager pm;
|
|
||||||
#if defined(LLVM_3_1)
|
|
||||||
pm.add(new llvm::TargetData(*g->target->getDataLayout()));
|
|
||||||
#else
|
|
||||||
pm.add(new llvm::DataLayout(*g->target->getDataLayout()));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
llvm::raw_string_ostream rso(g->PtxString);
|
|
||||||
llvm::formatted_raw_ostream fos(rso);
|
|
||||||
|
|
||||||
llvm::TargetMachine::CodeGenFileType fileType = llvm::TargetMachine::CGFT_AssemblyFile;
|
|
||||||
llvm::TargetMachine *targetMachine = g->target->GetTargetMachine();
|
|
||||||
if (targetMachine->addPassesToEmitFile(pm, fos, fileType)) {
|
|
||||||
fprintf(stderr, "Fatal error adding passes to emit object file!");
|
|
||||||
exit(1);
|
|
||||||
}
|
|
||||||
|
|
||||||
llvm::Module *module = m->module;
|
|
||||||
pm.run(*module);
|
|
||||||
fos.flush();
|
|
||||||
assert(!g->PtxString.empty());
|
|
||||||
#if 0
|
|
||||||
std::cout << g->PtxString << std::endl;
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
if (itarget > 0)
|
|
||||||
{
|
|
||||||
if (headerFileName != NULL)
|
|
||||||
if (!m->writeOutput(Module::Header, headerFileName))
|
|
||||||
return 1;
|
|
||||||
if (depsFileName != NULL)
|
|
||||||
if (!m->writeOutput(Module::Deps,depsFileName))
|
|
||||||
return 1;
|
|
||||||
if (hostStubFileName != NULL)
|
|
||||||
if (!m->writeOutput(Module::HostStub,hostStubFileName))
|
|
||||||
return 1;
|
|
||||||
if (devStubFileName != NULL)
|
|
||||||
if (!m->writeOutput(Module::DevStub,devStubFileName))
|
|
||||||
return 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
else
|
|
||||||
++m->errorCount;
|
|
||||||
|
|
||||||
errorCount += m->errorCount;
|
|
||||||
delete m;
|
|
||||||
m = NULL;
|
|
||||||
|
|
||||||
delete g->target;
|
|
||||||
g->target = NULL;
|
|
||||||
|
|
||||||
}
|
|
||||||
return errorCount > 0;
|
|
||||||
}
|
|
||||||
else if (target == NULL || strchr(target, ',') == NULL) {
|
|
||||||
// We're only compiling to a single target
|
// We're only compiling to a single target
|
||||||
const bool isPTX = strcmp(target, "nvptx64") == 0;
|
g->target = new Target(arch, cpu, target, generatePIC);
|
||||||
g->target = new Target(arch, cpu, target, generatePIC, isPTX);
|
|
||||||
if (!g->target->isValid())
|
if (!g->target->isValid())
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
@@ -2525,8 +2399,6 @@ Module::CompileAndOutput(const char *srcFile,
|
|||||||
// The user supplied multiple targets
|
// The user supplied multiple targets
|
||||||
std::vector<std::string> targets = lExtractTargets(target);
|
std::vector<std::string> targets = lExtractTargets(target);
|
||||||
Assert(targets.size() > 1);
|
Assert(targets.size() > 1);
|
||||||
for (unsigned int i = 0; i < targets.size(); ++i)
|
|
||||||
assert(strcmp(targets[i].c_str(), "nvptx64") < 0);
|
|
||||||
|
|
||||||
if (outFileName != NULL && strcmp(outFileName, "-") == 0) {
|
if (outFileName != NULL && strcmp(outFileName, "-") == 0) {
|
||||||
Error(SourcePos(), "Multi-target compilation can't generate output "
|
Error(SourcePos(), "Multi-target compilation can't generate output "
|
||||||
|
|||||||
44
type.cpp
44
type.cpp
@@ -2925,7 +2925,7 @@ FunctionType::GetReturnTypeString() const {
|
|||||||
llvm::FunctionType *
|
llvm::FunctionType *
|
||||||
FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
||||||
|
|
||||||
if (isTask == true) // && !g->target->isPTX()) //getISA() != Target::NVPTX64)
|
if (isTask == true)
|
||||||
Assert(removeMask == false);
|
Assert(removeMask == false);
|
||||||
|
|
||||||
// Get the LLVM Type *s for the function arguments
|
// Get the LLVM Type *s for the function arguments
|
||||||
@@ -2950,44 +2950,30 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
|||||||
llvmArgTypes.push_back(LLVMTypes::MaskType);
|
llvmArgTypes.push_back(LLVMTypes::MaskType);
|
||||||
|
|
||||||
std::vector<llvm::Type *> callTypes;
|
std::vector<llvm::Type *> callTypes;
|
||||||
if (isTask) {
|
if (isTask && g->target->getISA() != Target::NVPTX) {
|
||||||
// Tasks take three arguments: a pointer to a struct that holds the
|
// Tasks take three arguments: a pointer to a struct that holds the
|
||||||
// actual task arguments, the thread index, and the total number of
|
// actual task arguments, the thread index, and the total number of
|
||||||
// threads the tasks system has running. (Task arguments are
|
// threads the tasks system has running. (Task arguments are
|
||||||
// marshalled in a struct so that it's easy to allocate space to
|
// marshalled in a struct so that it's easy to allocate space to
|
||||||
// hold them until the task actually runs.)
|
// hold them until the task actually runs.)
|
||||||
// if (g->target->getISA() != Target::NVPTX64)
|
llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes);
|
||||||
if (!g->target->isPTX())
|
callTypes.push_back(llvm::PointerType::getUnqual(st));
|
||||||
{
|
callTypes.push_back(LLVMTypes::Int32Type); // threadIndex
|
||||||
llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes);
|
callTypes.push_back(LLVMTypes::Int32Type); // threadCount
|
||||||
callTypes.push_back(llvm::PointerType::getUnqual(st));
|
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // threadIndex
|
callTypes.push_back(LLVMTypes::Int32Type); // taskCount
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // threadCount
|
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex
|
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount
|
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex0
|
callTypes.push_back(LLVMTypes::Int32Type); // taskCount0
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex1
|
callTypes.push_back(LLVMTypes::Int32Type); // taskCount1
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskIndex2
|
callTypes.push_back(LLVMTypes::Int32Type); // taskCount2
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount0
|
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount1
|
|
||||||
callTypes.push_back(LLVMTypes::Int32Type); // taskCount2
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
|
||||||
if (g->target->getISA() == Target::NVPTX64)
|
|
||||||
callTypes = llvmArgTypes;
|
|
||||||
else
|
|
||||||
{
|
|
||||||
assert(0); /* evghenii: must be removed in final, just for test for nvptx64 target */
|
|
||||||
llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes);
|
|
||||||
callTypes.push_back(llvm::PointerType::getUnqual(st));
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
// Otherwise we already have the types of the arguments
|
// Otherwise we already have the types of the arguments
|
||||||
callTypes = llvmArgTypes;
|
callTypes = llvmArgTypes;
|
||||||
|
|
||||||
|
|
||||||
if (returnType == NULL) {
|
if (returnType == NULL) {
|
||||||
Assert(m->errorCount > 0);
|
Assert(m->errorCount > 0);
|
||||||
return NULL;
|
return NULL;
|
||||||
|
|||||||
Reference in New Issue
Block a user