now generates CUDALaunch call. Few tweaks are still necesary

This commit is contained in:
Evghenii
2013-10-31 16:01:34 +01:00
parent e7ddb9e642
commit 63917f8cc2
2 changed files with 85 additions and 35 deletions

111
ctx.cpp
View File

@@ -57,6 +57,8 @@
#include <llvm/IR/Instructions.h> #include <llvm/IR/Instructions.h>
#include <llvm/IR/DerivedTypes.h> #include <llvm/IR/DerivedTypes.h>
#endif #endif
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/FormattedStream.h>
/** This is a small utility structure that records information related to one /** This is a small utility structure that records information related to one
level of nested control flow. It's mostly used in correctly restoring level of nested control flow. It's mostly used in correctly restoring
@@ -3611,55 +3613,78 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
llvm::StructType *argStructType = llvm::StructType *argStructType =
static_cast<llvm::StructType *>(pt->getElementType()); static_cast<llvm::StructType *>(pt->getElementType());
llvm::Function *falloc = m->module->getFunction("CUDAAlloc"); llvm::Function *falloc = m->module->getFunction("CUDAAlloc");
AssertPos(currentPos, falloc != NULL); AssertPos(currentPos, falloc != NULL);
#if 0
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock); llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
if (structSize->getType() != LLVMTypes::Int64Type) if (structSize->getType() != LLVMTypes::Int64Type)
// ISPCAlloc expects the size as an uint64_t, but on 32-bit // ISPCAlloc expects the size as an uint64_t, but on 32-bit
// targets, SizeOf returns a 32-bit value // targets, SizeOf returns a 32-bit value
structSize = ZExtInst(structSize, LLVMTypes::Int64Type, structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
"struct_size_to_64"); "struct_size_to_64");
#else
/* CUDALaunch takes array of argVals.size() of pointer to parameters */
/* code assumes sizeof(void*) pointer size */
llvm::Value *structSize = llvm::ConstantInt::get(*g->ctx, llvm::APInt(64, sizeof(void*)*argVals.size()));
#endif
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth()); int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
std::vector<llvm::Value *> allocArgs; std::vector<llvm::Value *> allocArgs;
allocArgs.push_back(launchGroupHandlePtr); allocArgs.push_back(launchGroupHandlePtr);
allocArgs.push_back(structSize); allocArgs.push_back(structSize);
allocArgs.push_back(LLVMInt32(align)); allocArgs.push_back(LLVMInt32(align));
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
#if 0
llvm::Value *argmem = BitCastInst(voidmem, pt);
// Copy the values of the parameters into the appropriate place in // Copy the values of the parameters into the appropriate place in
// the argument block // the argument block
for (unsigned int i = 0; i < argVals.size(); ++i) {
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg"); /* allocate structure of pointer */
// don't need to do masked store here, I think llvm::ArrayType* ArrayTy_6 = llvm::ArrayType::get(LLVMTypes::VoidPointerType, argVals.size());
StoreInst(argVals[i], ptr); llvm::Value* ptrParam = AllocaInst(ArrayTy_6, "arrayStructPtr");
/* construct array of pointers to arguments */
for (unsigned int i = 0; i < argVals.size(); ++i)
{
llvm::Type* type = argStructType->getElementType(i);
llvm::Value* ptr_arg1_addr = AllocaInst(type, "argptr");
StoreInst(argVals[i], ptr_arg1_addr);
llvm::ConstantInt* const_int64_11 = llvm::ConstantInt::get(*g->ctx, llvm::APInt(64, i));
std::vector<llvm::Value*> ptr_arrayinit_begin_indices;
ptr_arrayinit_begin_indices.push_back(const_int64_11);
ptr_arrayinit_begin_indices.push_back(const_int64_11);
llvm::GetElementPtrInst* ptr_arrayinit_element =
llvm::GetElementPtrInst::Create(ptrParam, ptr_arrayinit_begin_indices, "el", bblock);
llvm::Value* ptr_15 = BitCastInst(ptr_arg1_addr, LLVMTypes::VoidPointerType, "voidptr");
#if 0
{
std::string str; llvm::raw_string_ostream rso(str); llvm::formatted_raw_ostream fos(rso);
ptr_arg1_addr->print(fos);
const_int64_11->print(fos);
ptr_arrayinit_element->print(fos);
ptr_15->print(fos);
fos.flush(); fprintf(stderr, ">>> %s\n", str.c_str());
}
#endif
StoreInst(ptr_15, ptr_arrayinit_element);
} }
if (argStructType->getNumElements() == argVals.size() + 1) { if (argStructType->getNumElements() == argVals.size() + 1)
// copy in the mask assert(0); /* must not happen as task function is unmasked for PTX target */
llvm::Value *mask = GetFullMask();
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
"funarg_mask");
StoreInst(mask, ptr);
}
#endif
// And emit the call to the user-supplied task launch function, passing // And emit the call to the user-supplied task launch function, passing
// a pointer to the task function being called and a pointer to the // a pointer to the task function being called and a pointer to the
// argument block we just filled in // argument block we just filled in
// llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
llvm::Function *flaunch = m->module->getFunction("CUDALaunch"); llvm::Function *flaunch = m->module->getFunction("CUDALaunch");
AssertPos(currentPos, flaunch != NULL); AssertPos(currentPos, flaunch != NULL);
std::vector<llvm::Value *> args; std::vector<llvm::Value *> args;
args.push_back(launchGroupHandlePtr); /* void **handler */ args.push_back(launchGroupHandlePtr); /* void **handler */
/* module name string to distinguish between different modules */
{ {
const std::string moduleNameStr("module_xyz"); const std::string moduleNameStr = m->module->getModuleIdentifier();
llvm::ArrayType* ArrayTyModuleName = llvm::ArrayType::get(llvm::IntegerType::get(*g->ctx, 8), moduleNameStr.size()+1); llvm::ArrayType* ArrayTyModuleName = llvm::ArrayType::get(llvm::IntegerType::get(*g->ctx, 8), moduleNameStr.size()+1);
llvm::GlobalVariable* gvarModuleNameStr = new llvm::GlobalVariable( llvm::GlobalVariable* gvarModuleNameStr = new llvm::GlobalVariable(
@@ -3668,8 +3693,8 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
/*isConstant=*/ true, /*isConstant=*/ true,
/*Linkage=*/ llvm::GlobalValue::PrivateLinkage, /*Linkage=*/ llvm::GlobalValue::PrivateLinkage,
/*Initializer=*/ 0, // has initializer, specified below /*Initializer=*/ 0, // has initializer, specified below
/*Name=*/ ".str"); /*Name=*/ ".module_str");
gvarModuleNameStr->setAlignment(1); //gvarModuleNameStr->setAlignment(1);
llvm::Constant *constModuleName= llvm::ConstantDataArray::getString(*g->ctx, moduleNameStr.c_str(), true); llvm::Constant *constModuleName= llvm::ConstantDataArray::getString(*g->ctx, moduleNameStr.c_str(), true);
gvarModuleNameStr->setInitializer(constModuleName); gvarModuleNameStr->setInitializer(constModuleName);
@@ -3681,8 +3706,31 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
args.push_back(const_ptr_12); /* const char * module_name */ args.push_back(const_ptr_12); /* const char * module_name */
} }
args.push_back(voidmem); /* const char * module */ /* ptx string, must be created ones */
{
const std::string moduleNameStr = g->PtxString;
g->PtxString.clear();
llvm::ArrayType* ArrayTyModuleName = llvm::ArrayType::get(llvm::IntegerType::get(*g->ctx, 8), moduleNameStr.size()+1);
llvm::GlobalVariable* gvarModuleNameStr = new llvm::GlobalVariable(
/*Module=*/ *m->module,
/*Type=*/ ArrayTyModuleName,
/*isConstant=*/ true,
/*Linkage=*/ llvm::GlobalValue::PrivateLinkage,
/*Initializer=*/ 0, // has initializer, specified below
/*Name=*/ ".ptx_str");
llvm::Constant *constModuleName= llvm::ConstantDataArray::getString(*g->ctx, moduleNameStr.c_str(), true);
gvarModuleNameStr->setInitializer(constModuleName);
std::vector<llvm::Constant*> const_ptr_12_indices;
const_ptr_12_indices.push_back(llvm::Constant::getNullValue(llvm::Type::getInt32Ty(*g->ctx)));
const_ptr_12_indices.push_back(llvm::ConstantInt::get(llvm::Type::getInt32Ty(*g->ctx),0));
llvm::Constant* const_ptr_12 = llvm::ConstantExpr::getGetElementPtr(gvarModuleNameStr, const_ptr_12_indices);
args.push_back(const_ptr_12); /* const char * module_name */
}
/* fucntion name string */
{ {
const std::string funcNameStr = callee->getName().str(); const std::string funcNameStr = callee->getName().str();
llvm::ArrayType* ArrayTyFuncName = llvm::ArrayType::get(llvm::IntegerType::get(*g->ctx, 8), funcNameStr.size()+1); llvm::ArrayType* ArrayTyFuncName = llvm::ArrayType::get(llvm::IntegerType::get(*g->ctx, 8), funcNameStr.size()+1);
@@ -3693,8 +3741,7 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
/*isConstant=*/ true, /*isConstant=*/ true,
/*Linkage=*/ llvm::GlobalValue::PrivateLinkage, /*Linkage=*/ llvm::GlobalValue::PrivateLinkage,
/*Initializer=*/ 0, // has initializer, specified below /*Initializer=*/ 0, // has initializer, specified below
/*Name=*/ ".str"); /*Name=*/ ".func_str");
gvarFuncNameStr->setAlignment(1);
llvm::Constant *constFuncName= llvm::ConstantDataArray::getString(*g->ctx, funcNameStr.c_str(), true); llvm::Constant *constFuncName= llvm::ConstantDataArray::getString(*g->ctx, funcNameStr.c_str(), true);
gvarFuncNameStr->setInitializer(constFuncName); gvarFuncNameStr->setInitializer(constFuncName);
@@ -3706,10 +3753,16 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
args.push_back(const_ptr_12); /* const char * func_name */ args.push_back(const_ptr_12); /* const char * func_name */
} }
args.push_back(launchGroupHandlePtr); /* const void ** args */ /* pass array of pointers to function arguments, this is how cuLaunchKernel accepts arguments */
//args.push_back( (llvm::dyn_cast<llvm::Function>(callee))->arg_begin() ); {
//llvm::PointerType *pt = std::vector<llvm::Value*> ptr_arraydecay_indices;
// llvm::dyn_cast<llvm::PointerType>(argType); llvm::ConstantInt* const_int32_14 = llvm::ConstantInt::get(*g->ctx, llvm::APInt(32, 0));
ptr_arraydecay_indices.push_back(const_int32_14);
ptr_arraydecay_indices.push_back(const_int32_14);
llvm::Instruction* ptr_arraydecay = llvm::GetElementPtrInst::Create(ptrParam, ptr_arraydecay_indices, "arraydecay", bblock);
args.push_back(ptr_arraydecay); /* const void ** params */
}
args.push_back(launchCount[0]); args.push_back(launchCount[0]);
args.push_back(launchCount[1]); args.push_back(launchCount[1]);
args.push_back(launchCount[2]); args.push_back(launchCount[2]);

View File

@@ -2942,17 +2942,14 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
Assert(m->errorCount > 0); Assert(m->errorCount > 0);
return NULL; return NULL;
} }
#if 0
if (g->target->isPTX() && g->target->getISA() != Target::NVPTX64 && isTask) if (g->target->isPTX() && g->target->getISA() != Target::NVPTX64 && isTask)
{ {
#if 0 /* we pass struct of pointers to CUDALaunch */
llvmArgTypes.push_back(
llvm::BitCastInst(llvm::PointerType::getUnqual(t), LLVMTypes::VoidPointerType)
);
#endif
llvmArgTypes.push_back(llvm::PointerType::getUnqual(t)); llvmArgTypes.push_back(llvm::PointerType::getUnqual(t));
//llvmArgTypes.push_back(t);
} }
else else
#endif
llvmArgTypes.push_back(t); llvmArgTypes.push_back(t);
} }