From f9ec1a0097631aa968854b55e44e081774d04aff Mon Sep 17 00:00:00 2001 From: Evghenii Date: Wed, 30 Oct 2013 16:47:30 +0100 Subject: [PATCH] .. work in programm to embed PTX into host code .. --- builtins/util.m4 | 4 ++ ctx.cpp | 167 +++++++++++++++++++++++++++++++++++------------ module.cpp | 44 +++++++++++-- type.cpp | 25 +++++-- 4 files changed, 189 insertions(+), 51 deletions(-) diff --git a/builtins/util.m4 b/builtins/util.m4 index 77d18719..acf45a38 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -1835,6 +1835,10 @@ declare void @ISPCLaunch(i8**, i8*, i8*, i32,i32,i32) nounwind declare void @ISPCSync(i8*) nounwind declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind +declare i8* @CUDAAlloc(i8**, i64, i32) nounwind +declare void @CUDALaunch(i8**, i8*,i8*,i8*, i8**, i32, i32, i32) nounwind +declare void @CUDASync(i8*) nounwind + declare i1 @__is_compile_time_constant_mask( %mask) declare i1 @__is_compile_time_constant_uniform_int32(i32) declare i1 @__is_compile_time_constant_varying_int32() diff --git a/ctx.cpp b/ctx.cpp index 6b60ec51..513c8f06 100644 --- a/ctx.cpp +++ b/ctx.cpp @@ -3521,70 +3521,153 @@ llvm::Value * FunctionEmitContext::LaunchInst(llvm::Value *callee, std::vector &argVals, llvm::Value *launchCount[3]){ - if (callee == NULL) { + + if (!g->target->isPTX()) + { + if (callee == NULL) { AssertPos(currentPos, m->errorCount > 0); return NULL; - } + } - launchedTasks = true; + launchedTasks = true; - AssertPos(currentPos, llvm::isa(callee)); - llvm::Type *argType = + AssertPos(currentPos, llvm::isa(callee)); + llvm::Type *argType = (llvm::dyn_cast(callee))->arg_begin()->getType(); - AssertPos(currentPos, llvm::PointerType::classof(argType)); - llvm::PointerType *pt = + AssertPos(currentPos, llvm::PointerType::classof(argType)); + llvm::PointerType *pt = llvm::dyn_cast(argType); - AssertPos(currentPos, llvm::StructType::classof(pt->getElementType())); - llvm::StructType *argStructType = + AssertPos(currentPos, llvm::StructType::classof(pt->getElementType())); + llvm::StructType *argStructType = static_cast(pt->getElementType()); - llvm::Function *falloc = m->module->getFunction("ISPCAlloc"); - AssertPos(currentPos, falloc != NULL); - llvm::Value *structSize = g->target->SizeOf(argStructType, bblock); - if (structSize->getType() != LLVMTypes::Int64Type) + + llvm::Function *falloc = m->module->getFunction("ISPCAlloc"); + AssertPos(currentPos, falloc != NULL); + llvm::Value *structSize = g->target->SizeOf(argStructType, bblock); + if (structSize->getType() != LLVMTypes::Int64Type) // ISPCAlloc expects the size as an uint64_t, but on 32-bit // targets, SizeOf returns a 32-bit value structSize = ZExtInst(structSize, LLVMTypes::Int64Type, - "struct_size_to_64"); - int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth()); + "struct_size_to_64"); + int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth()); - std::vector allocArgs; - allocArgs.push_back(launchGroupHandlePtr); - allocArgs.push_back(structSize); - allocArgs.push_back(LLVMInt32(align)); - llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr"); - llvm::Value *argmem = BitCastInst(voidmem, pt); + std::vector allocArgs; + allocArgs.push_back(launchGroupHandlePtr); + allocArgs.push_back(structSize); + allocArgs.push_back(LLVMInt32(align)); + llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr"); + llvm::Value *argmem = BitCastInst(voidmem, pt); - // Copy the values of the parameters into the appropriate place in - // the argument block - for (unsigned int i = 0; i < argVals.size(); ++i) { + // Copy the values of the parameters into the appropriate place in + // the argument block + for (unsigned int i = 0; i < argVals.size(); ++i) { llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg"); // don't need to do masked store here, I think StoreInst(argVals[i], ptr); - } + } - if (argStructType->getNumElements() == argVals.size() + 1) { + if (argStructType->getNumElements() == argVals.size() + 1) { // copy in the mask llvm::Value *mask = GetFullMask(); llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL, - "funarg_mask"); + "funarg_mask"); StoreInst(mask, ptr); - } + } - // And emit the call to the user-supplied task launch function, passing - // a pointer to the task function being called and a pointer to the - // argument block we just filled in - llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType); - llvm::Function *flaunch = m->module->getFunction("ISPCLaunch"); - AssertPos(currentPos, flaunch != NULL); - std::vector args; - args.push_back(launchGroupHandlePtr); - args.push_back(fptr); - args.push_back(voidmem); - args.push_back(launchCount[0]); - args.push_back(launchCount[1]); - args.push_back(launchCount[2]); - return CallInst(flaunch, NULL, args, ""); + // And emit the call to the user-supplied task launch function, passing + // a pointer to the task function being called and a pointer to the + // argument block we just filled in + llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType); + llvm::Function *flaunch = m->module->getFunction("ISPCLaunch"); + AssertPos(currentPos, flaunch != NULL); + std::vector args; + args.push_back(launchGroupHandlePtr); + args.push_back(fptr); + args.push_back(voidmem); + args.push_back(launchCount[0]); + args.push_back(launchCount[1]); + args.push_back(launchCount[2]); + return CallInst(flaunch, NULL, args, ""); + } + else /* isPTX == true */ + { + if (callee == NULL) { + AssertPos(currentPos, m->errorCount > 0); + return NULL; + } + + launchedTasks = true; + + AssertPos(currentPos, llvm::isa(callee)); + llvm::Type *argType = + (llvm::dyn_cast(callee))->arg_begin()->getType(); + AssertPos(currentPos, llvm::PointerType::classof(argType)); + llvm::PointerType *pt = + llvm::dyn_cast(argType); + AssertPos(currentPos, llvm::StructType::classof(pt->getElementType())); + llvm::StructType *argStructType = + static_cast(pt->getElementType()); + + + llvm::Function *falloc = m->module->getFunction("CUDAAlloc"); + AssertPos(currentPos, falloc != NULL); + llvm::Value *structSize = g->target->SizeOf(argStructType, bblock); + if (structSize->getType() != LLVMTypes::Int64Type) + // ISPCAlloc expects the size as an uint64_t, but on 32-bit + // targets, SizeOf returns a 32-bit value + structSize = ZExtInst(structSize, LLVMTypes::Int64Type, + "struct_size_to_64"); + int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth()); + + std::vector allocArgs; + allocArgs.push_back(launchGroupHandlePtr); + allocArgs.push_back(structSize); + allocArgs.push_back(LLVMInt32(align)); + llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr"); +#if 0 + llvm::Value *argmem = BitCastInst(voidmem, pt); + + // Copy the values of the parameters into the appropriate place in + // the argument block + for (unsigned int i = 0; i < argVals.size(); ++i) { + llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg"); + // don't need to do masked store here, I think + StoreInst(argVals[i], ptr); + } + + if (argStructType->getNumElements() == argVals.size() + 1) { + // copy in the mask + llvm::Value *mask = GetFullMask(); + llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL, + "funarg_mask"); + StoreInst(mask, ptr); + } +#endif + // And emit the call to the user-supplied task launch function, passing + // a pointer to the task function being called and a pointer to the + // argument block we just filled in + // llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType); + llvm::Function *flaunch = m->module->getFunction("CUDALaunch"); + AssertPos(currentPos, flaunch != NULL); + std::vector args; + args.push_back(launchGroupHandlePtr); /* void **handler */ + args.push_back(voidmem); /* const char * module_name */ + args.push_back(voidmem); /* const char * module */ +#if 0 + llvm::Value *fname = llvm::MDString::get(*g->ctx, + callee->getName().str().c_str()); + llvm::Value *fnameptr = BitCastInst(fname, LLVMTypes::VoidPointerType); + args.push_back(fnameptr); /* const char * func_name */ +#else + args.push_back(voidmem); /* const char * func_name */ +#endif + args.push_back(launchGroupHandlePtr); /* const void ** args */ + args.push_back(launchCount[0]); + args.push_back(launchCount[1]); + args.push_back(launchCount[2]); + return CallInst(flaunch, NULL, args, ""); + } } diff --git a/module.cpp b/module.cpp index 2f5e6167..cf06750d 100644 --- a/module.cpp +++ b/module.cpp @@ -2314,20 +2314,23 @@ Module::CompileAndOutput(const char *srcFile, const char *hostStubFileName, const char *devStubFileName) { - if (target != NULL && !strcmp(target,"nvptx64")) // NVPTX64 + if (target != NULL && strcmp(target,"nvptx64") >= 0) // NVPTX64 { + std::vector targets = lExtractTargets(target); + Assert(targets.size() > 1); // We're only compiling to a single target - const char * target_list[] = {"nvptx64", "avx"}; int errorCount = 0; const char *suffix_orig = strrchr(outFileName, '.'); ++suffix_orig; assert(suffix_orig!=NULL); + std::string PtxString; + for (int itarget = 0; itarget < 2; itarget++) { - fprintf(stderr, "compiling nvptx64 : target= %s\n",target_list[itarget]); - g->target = new Target(arch, cpu, target_list[itarget], generatePIC, /* isPTX= */ true); + fprintf(stderr, "compiling nvptx64 : target= %s\n",targets[itarget].c_str()); + g->target = new Target(arch, cpu, targets[itarget].c_str(), generatePIC, /* isPTX= */ true); if (!g->target->isValid()) return 1; @@ -2352,7 +2355,7 @@ Module::CompileAndOutput(const char *srcFile, assert(outFileName != NULL); std::string targetOutFileName = - lGetTargetFileName(outFileName, target_list[itarget]); + lGetTargetFileName(outFileName, targets[itarget].c_str()); if (outputType == Asm) { const char * targetOutFileName_c = targetOutFileName.c_str(); @@ -2366,6 +2369,35 @@ Module::CompileAndOutput(const char *srcFile, if (!m->writeOutput(outputType, targetOutFileName.c_str(), includeFileName)) return 1; + if (itarget == 0) + { /* store ptx into memory */ + llvm::PassManager pm; +#if defined(LLVM_3_1) + pm.add(new llvm::TargetData(*g->target->getDataLayout())); +#else + pm.add(new llvm::DataLayout(*g->target->getDataLayout())); +#endif + + llvm::raw_string_ostream rso(PtxString); + llvm::formatted_raw_ostream fos(rso); + + llvm::TargetMachine::CodeGenFileType fileType = llvm::TargetMachine::CGFT_AssemblyFile; + llvm::TargetMachine *targetMachine = g->target->GetTargetMachine(); + if (targetMachine->addPassesToEmitFile(pm, fos, fileType)) { + fprintf(stderr, "Fatal error adding passes to emit object file!"); + exit(1); + } + + llvm::Module *module = m->module; + pm.run(*module); + fos.flush(); + assert(!PtxString.empty()); +#if 0 + std::cout << PtxString << std::endl; +#endif + } + + if (itarget > 0) { if (headerFileName != NULL) @@ -2463,6 +2495,8 @@ Module::CompileAndOutput(const char *srcFile, // The user supplied multiple targets std::vector targets = lExtractTargets(target); Assert(targets.size() > 1); + for (unsigned int i = 0; i < targets.size(); ++i) + assert(strcmp(targets[i].c_str(), "nvptx64") < 0); if (outFileName != NULL && strcmp(outFileName, "-") == 0) { Error(SourcePos(), "Multi-target compilation can't generate output " diff --git a/type.cpp b/type.cpp index 04c02989..20076462 100644 --- a/type.cpp +++ b/type.cpp @@ -2942,7 +2942,18 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const { Assert(m->errorCount > 0); return NULL; } - llvmArgTypes.push_back(t); + if (g->target->isPTX() && g->target->getISA() != Target::NVPTX64 && isTask) + { +#if 0 + llvmArgTypes.push_back( + llvm::BitCastInst(llvm::PointerType::getUnqual(t), LLVMTypes::VoidPointerType) + ); +#endif + llvmArgTypes.push_back(llvm::PointerType::getUnqual(t)); + //llvmArgTypes.push_back(t); + } + else + llvmArgTypes.push_back(t); } // And add the function mask, if asked for @@ -2956,7 +2967,8 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const { // threads the tasks system has running. (Task arguments are // marshalled in a struct so that it's easy to allocate space to // hold them until the task actually runs.) - if (g->target->getISA() != Target::NVPTX64) +// if (g->target->getISA() != Target::NVPTX64) + if (!g->target->isPTX()) { llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes); callTypes.push_back(llvm::PointerType::getUnqual(st)); @@ -2973,9 +2985,14 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const { } else { - callTypes = llvmArgTypes; + if (g->target->getISA() == Target::NVPTX64) + callTypes = llvmArgTypes; + else + { + llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes); + callTypes.push_back(llvm::PointerType::getUnqual(st)); + } } - } else // Otherwise we already have the types of the arguments