.. work in programm to embed PTX into host code ..

This commit is contained in:
Evghenii
2013-10-30 16:47:30 +01:00
parent 47cc470bf6
commit f9ec1a0097
4 changed files with 189 additions and 51 deletions

View File

@@ -1835,6 +1835,10 @@ declare void @ISPCLaunch(i8**, i8*, i8*, i32,i32,i32) nounwind
declare void @ISPCSync(i8*) nounwind declare void @ISPCSync(i8*) nounwind
declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind
declare i8* @CUDAAlloc(i8**, i64, i32) nounwind
declare void @CUDALaunch(i8**, i8*,i8*,i8*, i8**, i32, i32, i32) nounwind
declare void @CUDASync(i8*) nounwind
declare i1 @__is_compile_time_constant_mask(<WIDTH x MASK> %mask) declare i1 @__is_compile_time_constant_mask(<WIDTH x MASK> %mask)
declare i1 @__is_compile_time_constant_uniform_int32(i32) declare i1 @__is_compile_time_constant_uniform_int32(i32)
declare i1 @__is_compile_time_constant_varying_int32(<WIDTH x i32>) declare i1 @__is_compile_time_constant_varying_int32(<WIDTH x i32>)

167
ctx.cpp
View File

@@ -3521,70 +3521,153 @@ llvm::Value *
FunctionEmitContext::LaunchInst(llvm::Value *callee, FunctionEmitContext::LaunchInst(llvm::Value *callee,
std::vector<llvm::Value *> &argVals, std::vector<llvm::Value *> &argVals,
llvm::Value *launchCount[3]){ llvm::Value *launchCount[3]){
if (callee == NULL) {
if (!g->target->isPTX())
{
if (callee == NULL) {
AssertPos(currentPos, m->errorCount > 0); AssertPos(currentPos, m->errorCount > 0);
return NULL; return NULL;
} }
launchedTasks = true; launchedTasks = true;
AssertPos(currentPos, llvm::isa<llvm::Function>(callee)); AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
llvm::Type *argType = llvm::Type *argType =
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType(); (llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
AssertPos(currentPos, llvm::PointerType::classof(argType)); AssertPos(currentPos, llvm::PointerType::classof(argType));
llvm::PointerType *pt = llvm::PointerType *pt =
llvm::dyn_cast<llvm::PointerType>(argType); llvm::dyn_cast<llvm::PointerType>(argType);
AssertPos(currentPos, llvm::StructType::classof(pt->getElementType())); AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
llvm::StructType *argStructType = llvm::StructType *argStructType =
static_cast<llvm::StructType *>(pt->getElementType()); static_cast<llvm::StructType *>(pt->getElementType());
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
AssertPos(currentPos, falloc != NULL); llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock); AssertPos(currentPos, falloc != NULL);
if (structSize->getType() != LLVMTypes::Int64Type) llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
if (structSize->getType() != LLVMTypes::Int64Type)
// ISPCAlloc expects the size as an uint64_t, but on 32-bit // ISPCAlloc expects the size as an uint64_t, but on 32-bit
// targets, SizeOf returns a 32-bit value // targets, SizeOf returns a 32-bit value
structSize = ZExtInst(structSize, LLVMTypes::Int64Type, structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
"struct_size_to_64"); "struct_size_to_64");
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth()); int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
std::vector<llvm::Value *> allocArgs; std::vector<llvm::Value *> allocArgs;
allocArgs.push_back(launchGroupHandlePtr); allocArgs.push_back(launchGroupHandlePtr);
allocArgs.push_back(structSize); allocArgs.push_back(structSize);
allocArgs.push_back(LLVMInt32(align)); allocArgs.push_back(LLVMInt32(align));
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr"); llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
llvm::Value *argmem = BitCastInst(voidmem, pt); llvm::Value *argmem = BitCastInst(voidmem, pt);
// Copy the values of the parameters into the appropriate place in // Copy the values of the parameters into the appropriate place in
// the argument block // the argument block
for (unsigned int i = 0; i < argVals.size(); ++i) { for (unsigned int i = 0; i < argVals.size(); ++i) {
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg"); llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
// don't need to do masked store here, I think // don't need to do masked store here, I think
StoreInst(argVals[i], ptr); StoreInst(argVals[i], ptr);
} }
if (argStructType->getNumElements() == argVals.size() + 1) { if (argStructType->getNumElements() == argVals.size() + 1) {
// copy in the mask // copy in the mask
llvm::Value *mask = GetFullMask(); llvm::Value *mask = GetFullMask();
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL, llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
"funarg_mask"); "funarg_mask");
StoreInst(mask, ptr); StoreInst(mask, ptr);
} }
// And emit the call to the user-supplied task launch function, passing // And emit the call to the user-supplied task launch function, passing
// a pointer to the task function being called and a pointer to the // a pointer to the task function being called and a pointer to the
// argument block we just filled in // argument block we just filled in
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType); llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch"); llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
AssertPos(currentPos, flaunch != NULL); AssertPos(currentPos, flaunch != NULL);
std::vector<llvm::Value *> args; std::vector<llvm::Value *> args;
args.push_back(launchGroupHandlePtr); args.push_back(launchGroupHandlePtr);
args.push_back(fptr); args.push_back(fptr);
args.push_back(voidmem); args.push_back(voidmem);
args.push_back(launchCount[0]); args.push_back(launchCount[0]);
args.push_back(launchCount[1]); args.push_back(launchCount[1]);
args.push_back(launchCount[2]); args.push_back(launchCount[2]);
return CallInst(flaunch, NULL, args, ""); return CallInst(flaunch, NULL, args, "");
}
else /* isPTX == true */
{
if (callee == NULL) {
AssertPos(currentPos, m->errorCount > 0);
return NULL;
}
launchedTasks = true;
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
llvm::Type *argType =
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
AssertPos(currentPos, llvm::PointerType::classof(argType));
llvm::PointerType *pt =
llvm::dyn_cast<llvm::PointerType>(argType);
AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
llvm::StructType *argStructType =
static_cast<llvm::StructType *>(pt->getElementType());
llvm::Function *falloc = m->module->getFunction("CUDAAlloc");
AssertPos(currentPos, falloc != NULL);
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
if (structSize->getType() != LLVMTypes::Int64Type)
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
// targets, SizeOf returns a 32-bit value
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
"struct_size_to_64");
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
std::vector<llvm::Value *> allocArgs;
allocArgs.push_back(launchGroupHandlePtr);
allocArgs.push_back(structSize);
allocArgs.push_back(LLVMInt32(align));
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
#if 0
llvm::Value *argmem = BitCastInst(voidmem, pt);
// Copy the values of the parameters into the appropriate place in
// the argument block
for (unsigned int i = 0; i < argVals.size(); ++i) {
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
// don't need to do masked store here, I think
StoreInst(argVals[i], ptr);
}
if (argStructType->getNumElements() == argVals.size() + 1) {
// copy in the mask
llvm::Value *mask = GetFullMask();
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
"funarg_mask");
StoreInst(mask, ptr);
}
#endif
// And emit the call to the user-supplied task launch function, passing
// a pointer to the task function being called and a pointer to the
// argument block we just filled in
// llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
llvm::Function *flaunch = m->module->getFunction("CUDALaunch");
AssertPos(currentPos, flaunch != NULL);
std::vector<llvm::Value *> args;
args.push_back(launchGroupHandlePtr); /* void **handler */
args.push_back(voidmem); /* const char * module_name */
args.push_back(voidmem); /* const char * module */
#if 0
llvm::Value *fname = llvm::MDString::get(*g->ctx,
callee->getName().str().c_str());
llvm::Value *fnameptr = BitCastInst(fname, LLVMTypes::VoidPointerType);
args.push_back(fnameptr); /* const char * func_name */
#else
args.push_back(voidmem); /* const char * func_name */
#endif
args.push_back(launchGroupHandlePtr); /* const void ** args */
args.push_back(launchCount[0]);
args.push_back(launchCount[1]);
args.push_back(launchCount[2]);
return CallInst(flaunch, NULL, args, "");
}
} }

View File

@@ -2314,20 +2314,23 @@ Module::CompileAndOutput(const char *srcFile,
const char *hostStubFileName, const char *hostStubFileName,
const char *devStubFileName) const char *devStubFileName)
{ {
if (target != NULL && !strcmp(target,"nvptx64")) // NVPTX64 if (target != NULL && strcmp(target,"nvptx64") >= 0) // NVPTX64
{ {
std::vector<std::string> targets = lExtractTargets(target);
Assert(targets.size() > 1);
// We're only compiling to a single target // We're only compiling to a single target
const char * target_list[] = {"nvptx64", "avx"};
int errorCount = 0; int errorCount = 0;
const char *suffix_orig = strrchr(outFileName, '.'); const char *suffix_orig = strrchr(outFileName, '.');
++suffix_orig; ++suffix_orig;
assert(suffix_orig!=NULL); assert(suffix_orig!=NULL);
std::string PtxString;
for (int itarget = 0; itarget < 2; itarget++) for (int itarget = 0; itarget < 2; itarget++)
{ {
fprintf(stderr, "compiling nvptx64 : target= %s\n",target_list[itarget]); fprintf(stderr, "compiling nvptx64 : target= %s\n",targets[itarget].c_str());
g->target = new Target(arch, cpu, target_list[itarget], generatePIC, /* isPTX= */ true); g->target = new Target(arch, cpu, targets[itarget].c_str(), generatePIC, /* isPTX= */ true);
if (!g->target->isValid()) if (!g->target->isValid())
return 1; return 1;
@@ -2352,7 +2355,7 @@ Module::CompileAndOutput(const char *srcFile,
assert(outFileName != NULL); assert(outFileName != NULL);
std::string targetOutFileName = std::string targetOutFileName =
lGetTargetFileName(outFileName, target_list[itarget]); lGetTargetFileName(outFileName, targets[itarget].c_str());
if (outputType == Asm) if (outputType == Asm)
{ {
const char * targetOutFileName_c = targetOutFileName.c_str(); const char * targetOutFileName_c = targetOutFileName.c_str();
@@ -2366,6 +2369,35 @@ Module::CompileAndOutput(const char *srcFile,
if (!m->writeOutput(outputType, targetOutFileName.c_str(), includeFileName)) if (!m->writeOutput(outputType, targetOutFileName.c_str(), includeFileName))
return 1; return 1;
if (itarget == 0)
{ /* store ptx into memory */
llvm::PassManager pm;
#if defined(LLVM_3_1)
pm.add(new llvm::TargetData(*g->target->getDataLayout()));
#else
pm.add(new llvm::DataLayout(*g->target->getDataLayout()));
#endif
llvm::raw_string_ostream rso(PtxString);
llvm::formatted_raw_ostream fos(rso);
llvm::TargetMachine::CodeGenFileType fileType = llvm::TargetMachine::CGFT_AssemblyFile;
llvm::TargetMachine *targetMachine = g->target->GetTargetMachine();
if (targetMachine->addPassesToEmitFile(pm, fos, fileType)) {
fprintf(stderr, "Fatal error adding passes to emit object file!");
exit(1);
}
llvm::Module *module = m->module;
pm.run(*module);
fos.flush();
assert(!PtxString.empty());
#if 0
std::cout << PtxString << std::endl;
#endif
}
if (itarget > 0) if (itarget > 0)
{ {
if (headerFileName != NULL) if (headerFileName != NULL)
@@ -2463,6 +2495,8 @@ Module::CompileAndOutput(const char *srcFile,
// The user supplied multiple targets // The user supplied multiple targets
std::vector<std::string> targets = lExtractTargets(target); std::vector<std::string> targets = lExtractTargets(target);
Assert(targets.size() > 1); Assert(targets.size() > 1);
for (unsigned int i = 0; i < targets.size(); ++i)
assert(strcmp(targets[i].c_str(), "nvptx64") < 0);
if (outFileName != NULL && strcmp(outFileName, "-") == 0) { if (outFileName != NULL && strcmp(outFileName, "-") == 0) {
Error(SourcePos(), "Multi-target compilation can't generate output " Error(SourcePos(), "Multi-target compilation can't generate output "

View File

@@ -2942,7 +2942,18 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
Assert(m->errorCount > 0); Assert(m->errorCount > 0);
return NULL; return NULL;
} }
llvmArgTypes.push_back(t); if (g->target->isPTX() && g->target->getISA() != Target::NVPTX64 && isTask)
{
#if 0
llvmArgTypes.push_back(
llvm::BitCastInst(llvm::PointerType::getUnqual(t), LLVMTypes::VoidPointerType)
);
#endif
llvmArgTypes.push_back(llvm::PointerType::getUnqual(t));
//llvmArgTypes.push_back(t);
}
else
llvmArgTypes.push_back(t);
} }
// And add the function mask, if asked for // And add the function mask, if asked for
@@ -2956,7 +2967,8 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
// threads the tasks system has running. (Task arguments are // threads the tasks system has running. (Task arguments are
// marshalled in a struct so that it's easy to allocate space to // marshalled in a struct so that it's easy to allocate space to
// hold them until the task actually runs.) // hold them until the task actually runs.)
if (g->target->getISA() != Target::NVPTX64) // if (g->target->getISA() != Target::NVPTX64)
if (!g->target->isPTX())
{ {
llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes); llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes);
callTypes.push_back(llvm::PointerType::getUnqual(st)); callTypes.push_back(llvm::PointerType::getUnqual(st));
@@ -2973,9 +2985,14 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
} }
else else
{ {
callTypes = llvmArgTypes; if (g->target->getISA() == Target::NVPTX64)
callTypes = llvmArgTypes;
else
{
llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes);
callTypes.push_back(llvm::PointerType::getUnqual(st));
}
} }
} }
else else
// Otherwise we already have the types of the arguments // Otherwise we already have the types of the arguments