diff --git a/ctx.cpp b/ctx.cpp index 3aee776a..c97da129 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -1404,15 +1404,33 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) { llvm::Value * FunctionEmitContext::ProgramIndexVector(bool is32bits) { + if (1) //g->target->getISA() != Target::NVPTX64) + { llvm::SmallVector array; for (int i = 0; i < g->target->getVectorWidth() ; ++i) { - llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i); - array.push_back(C); + llvm::Constant *C = is32bits ? LLVMInt32(i) : LLVMInt64(i); + array.push_back(C); } llvm::Constant* index = llvm::ConstantVector::get(array); return index; + } + else + { + std::vector mm; + m->symbolTable->LookupFunction("__tid_x", &mm); + if (g->target->getMaskBitCount() == 1) + AssertPos(currentPos, mm.size() == 1); + else + // There should be one with signed int signature, one unsigned int. + AssertPos(currentPos, mm.size() == 2); + // We can actually call either one, since both are i32s as far as + // LLVM's type system is concerned... + llvm::Function *fmm = mm[0]->function; + std::vector args; + return CallInst(fmm, NULL, args, "__tid_x"); + } } diff --git a/func.cpp b/func.cpp index 532f92e6..62372a21 100644 --- a/func.cpp +++ b/func.cpp @@ -523,6 +523,11 @@ Function::GenerateIR() { } // And we can now go ahead and emit the code + { /* export function with NVPTX64 target should be emitted host architecture */ + const FunctionType *type= CastType(sym->type); + if (g->target->getISA() == Target::NVPTX64 && type->isExported) + return; + } { FunctionEmitContext ec(this, sym, function, firstStmtPos); emitCode(&ec, function, firstStmtPos); @@ -540,7 +545,7 @@ Function::GenerateIR() { // the application can call it const FunctionType *type = CastType(sym->type); Assert(type != NULL); - if (type->isExported) { + if (type->isExported && g->target->getISA() != Target::NVPTX64) { if (!type->isTask) { llvm::FunctionType *ftype = type->LLVMFunctionType(g->ctx, true); llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage; diff --git a/ispc.cpp b/ispc.cpp index 8bdcb0c4..00d94000 100644 --- a/ispc.cpp +++ b/ispc.cpp @@ -167,6 +167,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : #endif m_valid(false), m_isa(SSE2), + m_isPTX(false), m_arch(""), m_is32Bit(true), m_cpu(""), @@ -639,6 +640,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) : #endif else if (!strcasecmp(isa, "nvptx64")) { this->m_isa = Target::NVPTX64; + this->m_isPTX = true; this->m_nativeVectorWidth = 1; this->m_vectorWidth = 1; this->m_attributes = "+sm_35"; diff --git a/ispc.h b/ispc.h index e2a58ba9..eea77348 100644 --- a/ispc.h +++ b/ispc.h @@ -244,6 +244,7 @@ public: bool isValid() const {return m_valid;} ISA getISA() const {return m_isa;} + bool isPTX() const {return m_isPTX;} std::string getArch() const {return m_arch;} @@ -298,6 +299,7 @@ private: /** Instruction set being compiled to. */ ISA m_isa; + bool m_isPTX; /** Target system architecture. (e.g. "x86-64", "x86"). */ std::string m_arch; diff --git a/module.cpp b/module.cpp index 28c922c8..07dc5e48 100644 --- a/module.cpp +++ b/module.cpp @@ -2316,6 +2316,61 @@ Module::CompileAndOutput(const char *srcFile, const char *hostStubFileName, const char *devStubFileName) { + if (target != NULL && !strcmp(target,"nvptx64")) + { + fprintf(stderr, "compiling nvptx64 \n"); + // We're only compiling to a single target + g->target = new Target(arch, cpu, target, generatePIC); + if (!g->target->isValid()) + return 1; + + m = new Module(srcFile); + if (m->CompileFile() == 0) { + if (outputType == CXX) { + if (target == NULL || strncmp(target, "generic-", 8) != 0) { + Error(SourcePos(), "When generating C++ output, one of the \"generic-*\" " + "targets must be used."); + return 1; + } + } + else if (outputType == Asm || outputType == Object) { + if (target != NULL && strncmp(target, "generic-", 8) == 0) { + Error(SourcePos(), "When using a \"generic-*\" compilation target, " + "%s output can not be used.", + (outputType == Asm) ? "assembly" : "object file"); + return 1; + } + } + + if (outFileName != NULL) + if (!m->writeOutput(outputType, outFileName, includeFileName)) + return 1; + if (headerFileName != NULL) + if (!m->writeOutput(Module::Header, headerFileName)) + return 1; + if (depsFileName != NULL) + if (!m->writeOutput(Module::Deps,depsFileName)) + return 1; + if (hostStubFileName != NULL) + if (!m->writeOutput(Module::HostStub,hostStubFileName)) + return 1; + if (devStubFileName != NULL) + if (!m->writeOutput(Module::DevStub,devStubFileName)) + return 1; + } + else + ++m->errorCount; + + int errorCount = m->errorCount; + delete m; + m = NULL; + + delete g->target; + g->target = NULL; + + return errorCount > 0; + } + else if (target == NULL || strchr(target, ',') == NULL) { // We're only compiling to a single target g->target = new Target(arch, cpu, target, generatePIC);