.. work in programm to embed PTX into host code ..
This commit is contained in:
@@ -1835,6 +1835,10 @@ declare void @ISPCLaunch(i8**, i8*, i8*, i32,i32,i32) nounwind
|
|||||||
declare void @ISPCSync(i8*) nounwind
|
declare void @ISPCSync(i8*) nounwind
|
||||||
declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind
|
declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind
|
||||||
|
|
||||||
|
declare i8* @CUDAAlloc(i8**, i64, i32) nounwind
|
||||||
|
declare void @CUDALaunch(i8**, i8*,i8*,i8*, i8**, i32, i32, i32) nounwind
|
||||||
|
declare void @CUDASync(i8*) nounwind
|
||||||
|
|
||||||
declare i1 @__is_compile_time_constant_mask(<WIDTH x MASK> %mask)
|
declare i1 @__is_compile_time_constant_mask(<WIDTH x MASK> %mask)
|
||||||
declare i1 @__is_compile_time_constant_uniform_int32(i32)
|
declare i1 @__is_compile_time_constant_uniform_int32(i32)
|
||||||
declare i1 @__is_compile_time_constant_varying_int32(<WIDTH x i32>)
|
declare i1 @__is_compile_time_constant_varying_int32(<WIDTH x i32>)
|
||||||
|
|||||||
167
ctx.cpp
167
ctx.cpp
@@ -3521,70 +3521,153 @@ llvm::Value *
|
|||||||
FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||||
std::vector<llvm::Value *> &argVals,
|
std::vector<llvm::Value *> &argVals,
|
||||||
llvm::Value *launchCount[3]){
|
llvm::Value *launchCount[3]){
|
||||||
if (callee == NULL) {
|
|
||||||
|
if (!g->target->isPTX())
|
||||||
|
{
|
||||||
|
if (callee == NULL) {
|
||||||
AssertPos(currentPos, m->errorCount > 0);
|
AssertPos(currentPos, m->errorCount > 0);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
launchedTasks = true;
|
launchedTasks = true;
|
||||||
|
|
||||||
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
|
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
|
||||||
llvm::Type *argType =
|
llvm::Type *argType =
|
||||||
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
|
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
|
||||||
AssertPos(currentPos, llvm::PointerType::classof(argType));
|
AssertPos(currentPos, llvm::PointerType::classof(argType));
|
||||||
llvm::PointerType *pt =
|
llvm::PointerType *pt =
|
||||||
llvm::dyn_cast<llvm::PointerType>(argType);
|
llvm::dyn_cast<llvm::PointerType>(argType);
|
||||||
AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
|
AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
|
||||||
llvm::StructType *argStructType =
|
llvm::StructType *argStructType =
|
||||||
static_cast<llvm::StructType *>(pt->getElementType());
|
static_cast<llvm::StructType *>(pt->getElementType());
|
||||||
|
|
||||||
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
|
|
||||||
AssertPos(currentPos, falloc != NULL);
|
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
|
||||||
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
|
AssertPos(currentPos, falloc != NULL);
|
||||||
if (structSize->getType() != LLVMTypes::Int64Type)
|
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
|
||||||
|
if (structSize->getType() != LLVMTypes::Int64Type)
|
||||||
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
|
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
|
||||||
// targets, SizeOf returns a 32-bit value
|
// targets, SizeOf returns a 32-bit value
|
||||||
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
|
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
|
||||||
"struct_size_to_64");
|
"struct_size_to_64");
|
||||||
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
|
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
|
||||||
|
|
||||||
std::vector<llvm::Value *> allocArgs;
|
std::vector<llvm::Value *> allocArgs;
|
||||||
allocArgs.push_back(launchGroupHandlePtr);
|
allocArgs.push_back(launchGroupHandlePtr);
|
||||||
allocArgs.push_back(structSize);
|
allocArgs.push_back(structSize);
|
||||||
allocArgs.push_back(LLVMInt32(align));
|
allocArgs.push_back(LLVMInt32(align));
|
||||||
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
|
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
|
||||||
llvm::Value *argmem = BitCastInst(voidmem, pt);
|
llvm::Value *argmem = BitCastInst(voidmem, pt);
|
||||||
|
|
||||||
// Copy the values of the parameters into the appropriate place in
|
// Copy the values of the parameters into the appropriate place in
|
||||||
// the argument block
|
// the argument block
|
||||||
for (unsigned int i = 0; i < argVals.size(); ++i) {
|
for (unsigned int i = 0; i < argVals.size(); ++i) {
|
||||||
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
|
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
|
||||||
// don't need to do masked store here, I think
|
// don't need to do masked store here, I think
|
||||||
StoreInst(argVals[i], ptr);
|
StoreInst(argVals[i], ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (argStructType->getNumElements() == argVals.size() + 1) {
|
if (argStructType->getNumElements() == argVals.size() + 1) {
|
||||||
// copy in the mask
|
// copy in the mask
|
||||||
llvm::Value *mask = GetFullMask();
|
llvm::Value *mask = GetFullMask();
|
||||||
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
|
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
|
||||||
"funarg_mask");
|
"funarg_mask");
|
||||||
StoreInst(mask, ptr);
|
StoreInst(mask, ptr);
|
||||||
}
|
}
|
||||||
|
|
||||||
// And emit the call to the user-supplied task launch function, passing
|
// And emit the call to the user-supplied task launch function, passing
|
||||||
// a pointer to the task function being called and a pointer to the
|
// a pointer to the task function being called and a pointer to the
|
||||||
// argument block we just filled in
|
// argument block we just filled in
|
||||||
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
|
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
|
||||||
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
|
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
|
||||||
AssertPos(currentPos, flaunch != NULL);
|
AssertPos(currentPos, flaunch != NULL);
|
||||||
std::vector<llvm::Value *> args;
|
std::vector<llvm::Value *> args;
|
||||||
args.push_back(launchGroupHandlePtr);
|
args.push_back(launchGroupHandlePtr);
|
||||||
args.push_back(fptr);
|
args.push_back(fptr);
|
||||||
args.push_back(voidmem);
|
args.push_back(voidmem);
|
||||||
args.push_back(launchCount[0]);
|
args.push_back(launchCount[0]);
|
||||||
args.push_back(launchCount[1]);
|
args.push_back(launchCount[1]);
|
||||||
args.push_back(launchCount[2]);
|
args.push_back(launchCount[2]);
|
||||||
return CallInst(flaunch, NULL, args, "");
|
return CallInst(flaunch, NULL, args, "");
|
||||||
|
}
|
||||||
|
else /* isPTX == true */
|
||||||
|
{
|
||||||
|
if (callee == NULL) {
|
||||||
|
AssertPos(currentPos, m->errorCount > 0);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
launchedTasks = true;
|
||||||
|
|
||||||
|
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
|
||||||
|
llvm::Type *argType =
|
||||||
|
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
|
||||||
|
AssertPos(currentPos, llvm::PointerType::classof(argType));
|
||||||
|
llvm::PointerType *pt =
|
||||||
|
llvm::dyn_cast<llvm::PointerType>(argType);
|
||||||
|
AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
|
||||||
|
llvm::StructType *argStructType =
|
||||||
|
static_cast<llvm::StructType *>(pt->getElementType());
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Function *falloc = m->module->getFunction("CUDAAlloc");
|
||||||
|
AssertPos(currentPos, falloc != NULL);
|
||||||
|
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
|
||||||
|
if (structSize->getType() != LLVMTypes::Int64Type)
|
||||||
|
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
|
||||||
|
// targets, SizeOf returns a 32-bit value
|
||||||
|
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
|
||||||
|
"struct_size_to_64");
|
||||||
|
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
|
||||||
|
|
||||||
|
std::vector<llvm::Value *> allocArgs;
|
||||||
|
allocArgs.push_back(launchGroupHandlePtr);
|
||||||
|
allocArgs.push_back(structSize);
|
||||||
|
allocArgs.push_back(LLVMInt32(align));
|
||||||
|
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
|
||||||
|
#if 0
|
||||||
|
llvm::Value *argmem = BitCastInst(voidmem, pt);
|
||||||
|
|
||||||
|
// Copy the values of the parameters into the appropriate place in
|
||||||
|
// the argument block
|
||||||
|
for (unsigned int i = 0; i < argVals.size(); ++i) {
|
||||||
|
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
|
||||||
|
// don't need to do masked store here, I think
|
||||||
|
StoreInst(argVals[i], ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argStructType->getNumElements() == argVals.size() + 1) {
|
||||||
|
// copy in the mask
|
||||||
|
llvm::Value *mask = GetFullMask();
|
||||||
|
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
|
||||||
|
"funarg_mask");
|
||||||
|
StoreInst(mask, ptr);
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
// And emit the call to the user-supplied task launch function, passing
|
||||||
|
// a pointer to the task function being called and a pointer to the
|
||||||
|
// argument block we just filled in
|
||||||
|
// llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
|
||||||
|
llvm::Function *flaunch = m->module->getFunction("CUDALaunch");
|
||||||
|
AssertPos(currentPos, flaunch != NULL);
|
||||||
|
std::vector<llvm::Value *> args;
|
||||||
|
args.push_back(launchGroupHandlePtr); /* void **handler */
|
||||||
|
args.push_back(voidmem); /* const char * module_name */
|
||||||
|
args.push_back(voidmem); /* const char * module */
|
||||||
|
#if 0
|
||||||
|
llvm::Value *fname = llvm::MDString::get(*g->ctx,
|
||||||
|
callee->getName().str().c_str());
|
||||||
|
llvm::Value *fnameptr = BitCastInst(fname, LLVMTypes::VoidPointerType);
|
||||||
|
args.push_back(fnameptr); /* const char * func_name */
|
||||||
|
#else
|
||||||
|
args.push_back(voidmem); /* const char * func_name */
|
||||||
|
#endif
|
||||||
|
args.push_back(launchGroupHandlePtr); /* const void ** args */
|
||||||
|
args.push_back(launchCount[0]);
|
||||||
|
args.push_back(launchCount[1]);
|
||||||
|
args.push_back(launchCount[2]);
|
||||||
|
return CallInst(flaunch, NULL, args, "");
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
44
module.cpp
44
module.cpp
@@ -2314,20 +2314,23 @@ Module::CompileAndOutput(const char *srcFile,
|
|||||||
const char *hostStubFileName,
|
const char *hostStubFileName,
|
||||||
const char *devStubFileName)
|
const char *devStubFileName)
|
||||||
{
|
{
|
||||||
if (target != NULL && !strcmp(target,"nvptx64")) // NVPTX64
|
if (target != NULL && strcmp(target,"nvptx64") >= 0) // NVPTX64
|
||||||
{
|
{
|
||||||
|
std::vector<std::string> targets = lExtractTargets(target);
|
||||||
|
Assert(targets.size() > 1);
|
||||||
// We're only compiling to a single target
|
// We're only compiling to a single target
|
||||||
const char * target_list[] = {"nvptx64", "avx"};
|
|
||||||
int errorCount = 0;
|
int errorCount = 0;
|
||||||
|
|
||||||
const char *suffix_orig = strrchr(outFileName, '.');
|
const char *suffix_orig = strrchr(outFileName, '.');
|
||||||
++suffix_orig;
|
++suffix_orig;
|
||||||
assert(suffix_orig!=NULL);
|
assert(suffix_orig!=NULL);
|
||||||
|
|
||||||
|
std::string PtxString;
|
||||||
|
|
||||||
for (int itarget = 0; itarget < 2; itarget++)
|
for (int itarget = 0; itarget < 2; itarget++)
|
||||||
{
|
{
|
||||||
fprintf(stderr, "compiling nvptx64 : target= %s\n",target_list[itarget]);
|
fprintf(stderr, "compiling nvptx64 : target= %s\n",targets[itarget].c_str());
|
||||||
g->target = new Target(arch, cpu, target_list[itarget], generatePIC, /* isPTX= */ true);
|
g->target = new Target(arch, cpu, targets[itarget].c_str(), generatePIC, /* isPTX= */ true);
|
||||||
if (!g->target->isValid())
|
if (!g->target->isValid())
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
@@ -2352,7 +2355,7 @@ Module::CompileAndOutput(const char *srcFile,
|
|||||||
assert(outFileName != NULL);
|
assert(outFileName != NULL);
|
||||||
|
|
||||||
std::string targetOutFileName =
|
std::string targetOutFileName =
|
||||||
lGetTargetFileName(outFileName, target_list[itarget]);
|
lGetTargetFileName(outFileName, targets[itarget].c_str());
|
||||||
if (outputType == Asm)
|
if (outputType == Asm)
|
||||||
{
|
{
|
||||||
const char * targetOutFileName_c = targetOutFileName.c_str();
|
const char * targetOutFileName_c = targetOutFileName.c_str();
|
||||||
@@ -2366,6 +2369,35 @@ Module::CompileAndOutput(const char *srcFile,
|
|||||||
if (!m->writeOutput(outputType, targetOutFileName.c_str(), includeFileName))
|
if (!m->writeOutput(outputType, targetOutFileName.c_str(), includeFileName))
|
||||||
return 1;
|
return 1;
|
||||||
|
|
||||||
|
if (itarget == 0)
|
||||||
|
{ /* store ptx into memory */
|
||||||
|
llvm::PassManager pm;
|
||||||
|
#if defined(LLVM_3_1)
|
||||||
|
pm.add(new llvm::TargetData(*g->target->getDataLayout()));
|
||||||
|
#else
|
||||||
|
pm.add(new llvm::DataLayout(*g->target->getDataLayout()));
|
||||||
|
#endif
|
||||||
|
|
||||||
|
llvm::raw_string_ostream rso(PtxString);
|
||||||
|
llvm::formatted_raw_ostream fos(rso);
|
||||||
|
|
||||||
|
llvm::TargetMachine::CodeGenFileType fileType = llvm::TargetMachine::CGFT_AssemblyFile;
|
||||||
|
llvm::TargetMachine *targetMachine = g->target->GetTargetMachine();
|
||||||
|
if (targetMachine->addPassesToEmitFile(pm, fos, fileType)) {
|
||||||
|
fprintf(stderr, "Fatal error adding passes to emit object file!");
|
||||||
|
exit(1);
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::Module *module = m->module;
|
||||||
|
pm.run(*module);
|
||||||
|
fos.flush();
|
||||||
|
assert(!PtxString.empty());
|
||||||
|
#if 0
|
||||||
|
std::cout << PtxString << std::endl;
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
if (itarget > 0)
|
if (itarget > 0)
|
||||||
{
|
{
|
||||||
if (headerFileName != NULL)
|
if (headerFileName != NULL)
|
||||||
@@ -2463,6 +2495,8 @@ Module::CompileAndOutput(const char *srcFile,
|
|||||||
// The user supplied multiple targets
|
// The user supplied multiple targets
|
||||||
std::vector<std::string> targets = lExtractTargets(target);
|
std::vector<std::string> targets = lExtractTargets(target);
|
||||||
Assert(targets.size() > 1);
|
Assert(targets.size() > 1);
|
||||||
|
for (unsigned int i = 0; i < targets.size(); ++i)
|
||||||
|
assert(strcmp(targets[i].c_str(), "nvptx64") < 0);
|
||||||
|
|
||||||
if (outFileName != NULL && strcmp(outFileName, "-") == 0) {
|
if (outFileName != NULL && strcmp(outFileName, "-") == 0) {
|
||||||
Error(SourcePos(), "Multi-target compilation can't generate output "
|
Error(SourcePos(), "Multi-target compilation can't generate output "
|
||||||
|
|||||||
25
type.cpp
25
type.cpp
@@ -2942,7 +2942,18 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
|||||||
Assert(m->errorCount > 0);
|
Assert(m->errorCount > 0);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
llvmArgTypes.push_back(t);
|
if (g->target->isPTX() && g->target->getISA() != Target::NVPTX64 && isTask)
|
||||||
|
{
|
||||||
|
#if 0
|
||||||
|
llvmArgTypes.push_back(
|
||||||
|
llvm::BitCastInst(llvm::PointerType::getUnqual(t), LLVMTypes::VoidPointerType)
|
||||||
|
);
|
||||||
|
#endif
|
||||||
|
llvmArgTypes.push_back(llvm::PointerType::getUnqual(t));
|
||||||
|
//llvmArgTypes.push_back(t);
|
||||||
|
}
|
||||||
|
else
|
||||||
|
llvmArgTypes.push_back(t);
|
||||||
}
|
}
|
||||||
|
|
||||||
// And add the function mask, if asked for
|
// And add the function mask, if asked for
|
||||||
@@ -2956,7 +2967,8 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
|||||||
// threads the tasks system has running. (Task arguments are
|
// threads the tasks system has running. (Task arguments are
|
||||||
// marshalled in a struct so that it's easy to allocate space to
|
// marshalled in a struct so that it's easy to allocate space to
|
||||||
// hold them until the task actually runs.)
|
// hold them until the task actually runs.)
|
||||||
if (g->target->getISA() != Target::NVPTX64)
|
// if (g->target->getISA() != Target::NVPTX64)
|
||||||
|
if (!g->target->isPTX())
|
||||||
{
|
{
|
||||||
llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes);
|
llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes);
|
||||||
callTypes.push_back(llvm::PointerType::getUnqual(st));
|
callTypes.push_back(llvm::PointerType::getUnqual(st));
|
||||||
@@ -2973,9 +2985,14 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
|||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
callTypes = llvmArgTypes;
|
if (g->target->getISA() == Target::NVPTX64)
|
||||||
|
callTypes = llvmArgTypes;
|
||||||
|
else
|
||||||
|
{
|
||||||
|
llvm::Type *st = llvm::StructType::get(*ctx, llvmArgTypes);
|
||||||
|
callTypes.push_back(llvm::PointerType::getUnqual(st));
|
||||||
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
// Otherwise we already have the types of the arguments
|
// Otherwise we already have the types of the arguments
|
||||||
|
|||||||
Reference in New Issue
Block a user