added #ifdef ISPC_NVPTX_ENALED ... #endif guards

This commit is contained in:
evghenii
2014-07-09 12:32:18 +02:00
parent 44c74728bc
commit b3c5a9c4d6
16 changed files with 726 additions and 553 deletions

View File

@@ -73,6 +73,10 @@ endif
# To enable: make ARM_ENABLED=1
ARM_ENABLED=0
# Disable NVPTX by request
# To disable: make NVPTX_ENABLED=0
NVPTX_ENABLED=1
# Add llvm bin to the path so any scripts run will go to the right llvm-config
LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir)
export PATH:=$(LLVM_BIN):$(PATH)
@@ -89,7 +93,7 @@ LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e 's/svn//' -e 's/\./_/' -e 's/\..*//')
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker nvptx
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker
# Component "option" was introduced in 3.3 and starting with 3.4 it is required for the link step.
# We check if it's available before adding it (to not break 3.2 and earlier).
ifeq ($(shell $(LLVM_CONFIG) --components |grep -c option), 1)
@@ -98,6 +102,9 @@ endif
ifneq ($(ARM_ENABLED), 0)
LLVM_COMPONENTS+=arm
endif
ifneq ($(NVPTX_ENABLED), 0)
LLVM_COMPONENTS+=nvptx
endif
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs $(LLVM_COMPONENTS))
CLANG=clang
@@ -156,6 +163,9 @@ endif
ifneq ($(ARM_ENABLED), 0)
CXXFLAGS+=-DISPC_ARM_ENABLED
endif
ifneq ($(NVPTX_ENABLED), 0)
CXXFLAGS+=-DISPC_NVPTX_ENABLED
endif
LDFLAGS=
ifeq ($(ARCH_OS),Linux)
@@ -174,12 +184,15 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
type.cpp util.cpp
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
opt.h stmt.h sym.h type.h util.h
TARGETS=nvptx avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
TARGETS=avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
ifneq ($(ARM_ENABLED), 0)
TARGETS+=neon-32 neon-16 neon-8
endif
ifneq ($(NVPTX_ENABLED), 0)
TARGETS+=nvptx
endif
# These files need to be compiled in two versions - 32 and 64 bits.
BUILTINS_SRC_TARGET=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS)))
# These are files to be compiled in single version.

View File

@@ -342,13 +342,17 @@ lSetInternalFunctions(llvm::Module *module) {
"__all",
"__any",
"__aos_to_soa3_float",
//#ifdef ISPC_NVPTX_ENABLED
"__aos_to_soa3_float1",
//#endif /* ISPC_NVPTX_ENABLED */
"__aos_to_soa3_float16",
"__aos_to_soa3_float4",
"__aos_to_soa3_float8",
"__aos_to_soa3_int32",
"__aos_to_soa4_float",
//#ifdef ISPC_NVPTX_ENABLED
"__aos_to_soa4_float1",
//#endif /* ISPC_NVPTX_ENABLED */
"__aos_to_soa4_float16",
"__aos_to_soa4_float4",
"__aos_to_soa4_float8",
@@ -357,14 +361,10 @@ lSetInternalFunctions(llvm::Module *module) {
"__atomic_add_int64_global",
"__atomic_add_uniform_int32_global",
"__atomic_add_uniform_int64_global",
"__atomic_add_varying_int32_global",
"__atomic_add_varying_int64_global",
"__atomic_and_int32_global",
"__atomic_and_int64_global",
"__atomic_and_uniform_int32_global",
"__atomic_and_uniform_int64_global",
"__atomic_and_varying_int32_global",
"__atomic_and_varying_int64_global",
"__atomic_compare_exchange_double_global",
"__atomic_compare_exchange_float_global",
"__atomic_compare_exchange_int32_global",
@@ -373,30 +373,18 @@ lSetInternalFunctions(llvm::Module *module) {
"__atomic_compare_exchange_uniform_float_global",
"__atomic_compare_exchange_uniform_int32_global",
"__atomic_compare_exchange_uniform_int64_global",
"__atomic_compare_exchange_varying_double_global",
"__atomic_compare_exchange_varying_float_global",
"__atomic_compare_exchange_varying_int32_global",
"__atomic_compare_exchange_varying_int64_global",
"__atomic_max_uniform_int32_global",
"__atomic_max_uniform_int64_global",
"__atomic_min_uniform_int32_global",
"__atomic_min_uniform_int64_global",
"__atomic_max_varying_int32_global",
"__atomic_max_varying_int64_global",
"__atomic_min_varying_int32_global",
"__atomic_min_varying_int64_global",
"__atomic_or_int32_global",
"__atomic_or_int64_global",
"__atomic_or_uniform_int32_global",
"__atomic_or_uniform_int64_global",
"__atomic_or_varying_int32_global",
"__atomic_or_varying_int64_global",
"__atomic_sub_int32_global",
"__atomic_sub_int64_global",
"__atomic_sub_uniform_int32_global",
"__atomic_sub_uniform_int64_global",
"__atomic_sub_varying_int32_global",
"__atomic_sub_varying_int64_global",
"__atomic_swap_double_global",
"__atomic_swap_float_global",
"__atomic_swap_int32_global",
@@ -405,28 +393,46 @@ lSetInternalFunctions(llvm::Module *module) {
"__atomic_swap_uniform_float_global",
"__atomic_swap_uniform_int32_global",
"__atomic_swap_uniform_int64_global",
"__atomic_swap_varying_double_global",
"__atomic_swap_varying_float_global",
"__atomic_swap_varying_int32_global",
"__atomic_swap_varying_int64_global",
"__atomic_umax_uniform_uint32_global",
"__atomic_umax_uniform_uint64_global",
"__atomic_umin_uniform_uint32_global",
"__atomic_umin_uniform_uint64_global",
"__atomic_umax_varying_uint32_global",
"__atomic_umax_varying_uint64_global",
"__atomic_umin_varying_uint32_global",
"__atomic_umin_varying_uint64_global",
"__atomic_xor_int32_global",
"__atomic_xor_int64_global",
"__atomic_xor_uniform_int32_global",
"__atomic_xor_uniform_int64_global",
//#ifdef ISPC_NVPTX_ENABLED
"__atomic_add_varying_int32_global",
"__atomic_add_varying_int64_global",
"__atomic_and_varying_int32_global",
"__atomic_and_varying_int64_global",
"__atomic_compare_exchange_varying_double_global",
"__atomic_compare_exchange_varying_float_global",
"__atomic_compare_exchange_varying_int32_global",
"__atomic_compare_exchange_varying_int64_global",
"__atomic_max_varying_int32_global",
"__atomic_max_varying_int64_global",
"__atomic_min_varying_int32_global",
"__atomic_min_varying_int64_global",
"__atomic_or_varying_int32_global",
"__atomic_or_varying_int64_global",
"__atomic_sub_varying_int32_global",
"__atomic_sub_varying_int64_global",
"__atomic_swap_varying_double_global",
"__atomic_swap_varying_float_global",
"__atomic_swap_varying_int32_global",
"__atomic_swap_varying_int64_global",
"__atomic_umax_varying_uint32_global",
"__atomic_umax_varying_uint64_global",
"__atomic_umin_varying_uint32_global",
"__atomic_umin_varying_uint64_global",
"__atomic_xor_uniform_int32_global",
"__atomic_xor_uniform_int64_global",
"__atomic_xor_varying_int32_global",
"__atomic_xor_varying_int64_global",
"__atomic_xor_varying_int32_global",
"__atomic_xor_varying_int64_global",
//#endif /* ISPC_NVPTX_ENABLED */
"__broadcast_double",
"__broadcast_float",
"__broadcast_i16",
@@ -449,7 +455,9 @@ lSetInternalFunctions(llvm::Module *module) {
"__do_assert_uniform",
"__do_assert_varying",
"__do_print",
//#ifdef ISPC_NVPTX_ENABLED
"__do_print_nvptx",
//#endif /* ISPC_NVPTX_ENABLED */
"__doublebits_uniform_int64",
"__doublebits_varying_int64",
"__exclusive_scan_add_double",
@@ -464,8 +472,10 @@ lSetInternalFunctions(llvm::Module *module) {
"__extract_int32",
"__extract_int64",
"__extract_int8",
//#ifdef ISPC_NVPTX_ENABLED
"__extract_float",
"__extract_double",
//#endif /* ISPC_NVPTX_ENABLED */
"__fastmath",
"__float_to_half_uniform",
"__float_to_half_varying",
@@ -482,8 +492,10 @@ lSetInternalFunctions(llvm::Module *module) {
"__insert_int32",
"__insert_int64",
"__insert_int8",
//#ifdef ISPC_NVPTX_ENABLED
"__insert_float",
"__insert_double",
//#endif /* ISPC_NVPTX_ENABLED */
"__intbits_uniform_double",
"__intbits_uniform_float",
"__intbits_varying_double",
@@ -520,7 +532,9 @@ lSetInternalFunctions(llvm::Module *module) {
"__min_varying_uint32",
"__min_varying_uint64",
"__movmsk",
//#ifdef ISPC_NVPTX_ENABLED
"__movmsk_ptx",
//#endif /* ISPC_NVPTX_ENABLED */
"__new_uniform_32rt",
"__new_uniform_64rt",
"__new_varying32_32rt",
@@ -610,13 +624,15 @@ lSetInternalFunctions(llvm::Module *module) {
"__shuffle_i64",
"__shuffle_i8",
"__soa_to_aos3_float",
"__soa_to_aos3_float1",
"__soa_to_aos3_float16",
"__soa_to_aos3_float4",
"__soa_to_aos3_float8",
"__soa_to_aos3_int32",
"__soa_to_aos4_float",
//#ifdef ISPC_NVPTX_ENABLED
"__soa_to_aos3_float1",
"__soa_to_aos4_float1",
//#endif /* ISPC_NVPTX_ENABLED */
"__soa_to_aos4_float16",
"__soa_to_aos4_float4",
"__soa_to_aos4_float8",
@@ -717,7 +733,7 @@ lSetInternalFunctions(llvm::Module *module) {
"__vec4_add_float",
"__vec4_add_int32",
"__vselect_float",
"__vselect_i32",
//#ifdef ISPC_NVPTX_ENABLED
"__program_index",
"__program_count",
"__warp_index",
@@ -736,6 +752,8 @@ lSetInternalFunctions(llvm::Module *module) {
"ISPCAlloc",
"ISPCLaunch",
"ISPCSync",
//#endif /* ISPC_NVPTX_ENABLED */
"__vselect_i32"
};
int count = sizeof(names) / sizeof(names[0]);
@@ -808,7 +826,9 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
g->target->getISA() != Target::NEON16 &&
g->target->getISA() != Target::NEON8)
#endif // !__arm__
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() != Target::NVPTX)
#endif /* ISPC_NVPTX_ENABLED */
{
Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
mTriple.getArch() == bcTriple.getArch());
@@ -982,6 +1002,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
// Next, add the target's custom implementations of the various needed
// builtin functions (e.g. __masked_store_32(), etc).
switch (g->target->getISA()) {
#ifdef ISPC_NVPTX_ENABLED
case Target::NVPTX:
{
if (runtime32) {
@@ -993,6 +1014,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
break;
};
#endif /* ISPC_NVPTX_ENABLED */
#ifdef ISPC_ARM_ENABLED
case Target::NEON8: {
if (runtime32) {
@@ -1262,14 +1285,18 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
// define the 'programCount' builtin variable
if (g->target->getISA() != Target::NVPTX)
{
lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
}
else
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
lDefineConstantInt("programCount", 32, module, symbolTable);
}
else
{
#endif /* ISPC_NVPTX_ENABLED */
lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
#ifdef ISPC_NVPTX_ENABLED
}
#endif /* ISPC_NVPTX_ENABLED */
// define the 'programIndex' builtin
lDefineProgramIndex(module, symbolTable);
@@ -1301,9 +1328,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(),
module, symbolTable);
#ifdef ISPC_NVPTX_ENABLED
lDefineConstantInt("__is_nvptx_target", (int)(g->target->getISA() == Target::NVPTX),
module, symbolTable);
#else
lDefineConstantInt("__is_nvptx_target", (int)0, module, symbolTable);
#endif /* ISPC_NVPTX_ENABLED */
if (g->forceAlignment != -1) {
llvm::GlobalVariable *alignment = module->getGlobalVariable("memory_alignment", true);
alignment->setInitializer(LLVMInt32(g->forceAlignment));

257
ctx.cpp
View File

@@ -57,8 +57,10 @@
#include <llvm/IR/Instructions.h>
#include <llvm/IR/DerivedTypes.h>
#endif
#ifdef ISPC_NVPTX_ENABLED
#include <llvm/Support/raw_ostream.h>
#include <llvm/Support/FormattedStream.h>
#endif /* ISPC_NVPTX_ENABLED */
/** This is a small utility structure that records information related to one
level of nested control flow. It's mostly used in correctly restoring
@@ -1373,28 +1375,30 @@ FunctionEmitContext::None(llvm::Value *mask) {
llvm::Value *
FunctionEmitContext::LaneMask(llvm::Value *v)
{
#if 1 /* this makes mandelbrot example slower with "nvptx" target. Need further investigation. */
const char *__movmsk = g->target->getISA() == Target::NVPTX ? "__movmsk_ptx" : "__movmsk";
FunctionEmitContext::LaneMask(llvm::Value *v) {
#ifdef ISPC_NVPTX_ENABLED
/* this makes mandelbrot example slower with "nvptx" target.
* Needs further investigation. */
const char *__movmsk = g->target->getISA() == Target::NVPTX ? "__movmsk_ptx" : "__movmsk";
#else
const char *__movmsk = "__movmsk";
const char *__movmsk = "__movmsk";
#endif
// Call the target-dependent movmsk function to turn the vector mask
// into an i64 value
std::vector<Symbol *> mm;
m->symbolTable->LookupFunction(__movmsk, &mm);
if (g->target->getMaskBitCount() == 1)
AssertPos(currentPos, mm.size() == 1);
else
// There should be one with signed int signature, one unsigned int.
AssertPos(currentPos, mm.size() == 2);
// We can actually call either one, since both are i32s as far as
// LLVM's type system is concerned...
llvm::Function *fmm = mm[0]->function;
return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
// Call the target-dependent movmsk function to turn the vector mask
// into an i64 value
std::vector<Symbol *> mm;
m->symbolTable->LookupFunction(__movmsk, &mm);
if (g->target->getMaskBitCount() == 1)
AssertPos(currentPos, mm.size() == 1);
else
// There should be one with signed int signature, one unsigned int.
AssertPos(currentPos, mm.size() == 2);
// We can actually call either one, since both are i32s as far as
// LLVM's type system is concerned...
llvm::Function *fmm = mm[0]->function;
return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
}
#ifdef ISPC_NVPTX_ENABLED
bool lAppendInsertExtractName(llvm::Value *vector, std::string &funcName)
{
llvm::Type *type = vector->getType();
@@ -1447,19 +1451,21 @@ FunctionEmitContext::Extract(llvm::Value *vector, llvm::Value *lane)
llvm::Value *ret = llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock());
return ret;
}
#endif /* ISPC_NVPTX_ENABLED */
llvm::Value *
FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
if (g->target->getISA() == Target::NVPTX)
{
// Compare the two masks to get a vector of i1s
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
v1, v2, "v1==v2");
return ExtractInst(cmp, 0); /* this works without calling All(..) in PTX. Why ?!? */
}
else
{
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
// Compare the two masks to get a vector of i1s
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
v1, v2, "v1==v2");
return ExtractInst(cmp, 0); /* this works without calling All(..) in PTX. Why ?!? */
}
#endif /* ISPC_NVPTX_ENABLED */
#if 0
// Compare the two masks to get a vector of i1s
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
@@ -1474,7 +1480,6 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2,
LLVMGetName("equal", v1, v2));
#endif
}
}
llvm::Value *
@@ -1489,6 +1494,8 @@ FunctionEmitContext::ProgramIndexVector(bool is32bits) {
return index;
}
#ifdef ISPC_NVPTX_ENABLED
llvm::Value *
FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
llvm::Function *func_program_index = m->module->getFunction("__program_index");
@@ -1500,6 +1507,7 @@ FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
#endif
return index;
}
#endif /* ISPC_NVPTX_ENABLED */
llvm::Value *
@@ -1919,7 +1927,6 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
if (name == NULL)
name = LLVMGetName(value, "_ptr2int");
llvm::Type *type = LLVMTypes::PointerIntType;
llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock);
AddDebugPos(inst);
@@ -3613,75 +3620,8 @@ llvm::Value *
FunctionEmitContext::LaunchInst(llvm::Value *callee,
std::vector<llvm::Value *> &argVals,
llvm::Value *launchCount[3]){
if (g->target->getISA() != Target::NVPTX)
{
if (callee == NULL) {
AssertPos(currentPos, m->errorCount > 0);
return NULL;
}
launchedTasks = true;
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
llvm::Type *argType =
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
AssertPos(currentPos, llvm::PointerType::classof(argType));
llvm::PointerType *pt =
llvm::dyn_cast<llvm::PointerType>(argType);
AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
llvm::StructType *argStructType =
static_cast<llvm::StructType *>(pt->getElementType());
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
AssertPos(currentPos, falloc != NULL);
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
if (structSize->getType() != LLVMTypes::Int64Type)
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
// targets, SizeOf returns a 32-bit value
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
"struct_size_to_64");
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
std::vector<llvm::Value *> allocArgs;
allocArgs.push_back(launchGroupHandlePtr);
allocArgs.push_back(structSize);
allocArgs.push_back(LLVMInt32(align));
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
llvm::Value *argmem = BitCastInst(voidmem, pt);
// Copy the values of the parameters into the appropriate place in
// the argument block
for (unsigned int i = 0; i < argVals.size(); ++i) {
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
// don't need to do masked store here, I think
StoreInst(argVals[i], ptr);
}
if (argStructType->getNumElements() == argVals.size() + 1) {
// copy in the mask
llvm::Value *mask = GetFullMask();
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
"funarg_mask");
StoreInst(mask, ptr);
}
// And emit the call to the user-supplied task launch function, passing
// a pointer to the task function being called and a pointer to the
// argument block we just filled in
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
AssertPos(currentPos, flaunch != NULL);
std::vector<llvm::Value *> args;
args.push_back(launchGroupHandlePtr);
args.push_back(fptr);
args.push_back(voidmem);
args.push_back(launchCount[0]);
args.push_back(launchCount[1]);
args.push_back(launchCount[2]);
return CallInst(flaunch, NULL, args, "");
}
else /* NVPTX */
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
if (callee == NULL) {
AssertPos(currentPos, m->errorCount > 0);
@@ -3764,38 +3704,79 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
llvm::Value *ret = CallInst(flaunch, NULL, args, "");
return ret;
}
#endif /* ISPC_NVPTX_ENABLED */
if (callee == NULL) {
AssertPos(currentPos, m->errorCount > 0);
return NULL;
}
launchedTasks = true;
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
llvm::Type *argType =
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
AssertPos(currentPos, llvm::PointerType::classof(argType));
llvm::PointerType *pt =
llvm::dyn_cast<llvm::PointerType>(argType);
AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
llvm::StructType *argStructType =
static_cast<llvm::StructType *>(pt->getElementType());
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
AssertPos(currentPos, falloc != NULL);
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
if (structSize->getType() != LLVMTypes::Int64Type)
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
// targets, SizeOf returns a 32-bit value
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
"struct_size_to_64");
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
std::vector<llvm::Value *> allocArgs;
allocArgs.push_back(launchGroupHandlePtr);
allocArgs.push_back(structSize);
allocArgs.push_back(LLVMInt32(align));
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
llvm::Value *argmem = BitCastInst(voidmem, pt);
// Copy the values of the parameters into the appropriate place in
// the argument block
for (unsigned int i = 0; i < argVals.size(); ++i) {
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
// don't need to do masked store here, I think
StoreInst(argVals[i], ptr);
}
if (argStructType->getNumElements() == argVals.size() + 1) {
// copy in the mask
llvm::Value *mask = GetFullMask();
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
"funarg_mask");
StoreInst(mask, ptr);
}
// And emit the call to the user-supplied task launch function, passing
// a pointer to the task function being called and a pointer to the
// argument block we just filled in
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
AssertPos(currentPos, flaunch != NULL);
std::vector<llvm::Value *> args;
args.push_back(launchGroupHandlePtr);
args.push_back(fptr);
args.push_back(voidmem);
args.push_back(launchCount[0]);
args.push_back(launchCount[1]);
args.push_back(launchCount[2]);
return CallInst(flaunch, NULL, args, "");
}
void
FunctionEmitContext::SyncInst() {
if (g->target->getISA() != Target::NVPTX)
{
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
llvm::Value *nullPtrValue =
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
llvm::CmpInst::ICMP_NE,
launchGroupHandle, nullPtrValue);
llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
BranchInst(bSync, bPostSync, nonNull);
SetCurrentBasicBlock(bSync);
llvm::Function *fsync = m->module->getFunction("ISPCSync");
if (fsync == NULL)
FATAL("Couldn't find ISPCSync declaration?!");
CallInst(fsync, NULL, launchGroupHandle, "");
// zero out the handle so that if ISPCLaunch is called again in this
// function, it knows it's starting out from scratch
StoreInst(nullPtrValue, launchGroupHandlePtr);
BranchInst(bPostSync);
SetCurrentBasicBlock(bPostSync);
}
else /* NVPTX: don't do test, just call sync */
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
llvm::Value *nullPtrValue =
@@ -3805,7 +3786,33 @@ FunctionEmitContext::SyncInst() {
FATAL("Couldn't find ISPCSync declaration?!");
CallInst(fsync, NULL, launchGroupHandle, "");
StoreInst(nullPtrValue, launchGroupHandlePtr);
return;
}
#endif /* ISPC_NVPTX_ENABLED */
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
llvm::Value *nullPtrValue =
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
llvm::CmpInst::ICMP_NE,
launchGroupHandle, nullPtrValue);
llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
BranchInst(bSync, bPostSync, nonNull);
SetCurrentBasicBlock(bSync);
llvm::Function *fsync = m->module->getFunction("ISPCSync");
if (fsync == NULL)
FATAL("Couldn't find ISPCSync declaration?!");
CallInst(fsync, NULL, launchGroupHandle, "");
// zero out the handle so that if ISPCLaunch is called again in this
// function, it knows it's starting out from scratch
StoreInst(nullPtrValue, launchGroupHandlePtr);
BranchInst(bPostSync);
SetCurrentBasicBlock(bPostSync);
}

20
ctx.h
View File

@@ -291,21 +291,21 @@ public:
of the mask is on. */
llvm::Value *LaneMask(llvm::Value *mask);
/** Given two masks of type LLVMTypes::MaskType, return an i1 value
that indicates whether the two masks are equal. */
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
/** generate constantvector, which contains programindex, i.e.
< i32 0, i32 1, i32 2, i32 3> */
llvm::Value *ProgramIndexVector(bool is32bits = true);
#ifdef ISPC_NVPTX_ENABLED
llvm::Value *ProgramIndexVectorPTX(bool is32bits = true);
/** Issues a call to __insert_int8/int16/int32/int64/float/double */
llvm::Value* Insert(llvm::Value *vector, llvm::Value *lane, llvm::Value *scalar);
/** Issues a call to __extract_int8/int16/int32/int64/float/double */
llvm::Value* Extract(llvm::Value *vector, llvm::Value *lane);
/** Given two masks of type LLVMTypes::MaskType, return an i1 value
that indicates whether the two masks are equal. */
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
/** Generate ConstantVector, which contains ProgramIndex, i.e.
< i32 0, i32 1, i32 2, i32 3> */
llvm::Value *ProgramIndexVector(bool is32bits = true);
llvm::Value *ProgramIndexVectorPTX(bool is32bits = true);
#endif
/** Given a string, create an anonymous global variable to hold its
value and return the pointer to the string. */

View File

@@ -168,6 +168,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
retType = lApplyTypeQualifiers(typeQualifiers, retType, pos);
if (soaWidth > 0) {
#ifdef ISPC_NVPTX_ENABLED
#if 0 /* see stmt.cpp in DeclStmt::EmitCode for work-around of SOAType Declaration */
if (g->target->getISA() == Target::NVPTX)
{
@@ -175,6 +176,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
return NULL;
}
#endif
#endif /* ISPC_NVPTX_ENABLED */
const StructType *st = CastType<StructType>(retType);
if (st == NULL) {
@@ -409,6 +411,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
return;
}
#ifdef ISPC_NVPTX_ENABLED
#if 0 /* NVPTX */
if (baseType->IsUniformType())
{
@@ -416,6 +419,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
baseType->IsArrayType() ? " true " : " false ");
}
#endif
#endif /* ISPC_NVPTX_ENABLED */
const Type *arrayType = new ArrayType(baseType, arraySize);
if (child != NULL) {
child->InitFromType(arrayType, ds);
@@ -544,9 +548,9 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
returnType = returnType->ResolveUnboundVariability(Variability::Varying);
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
bool isExternC = ds && (ds->storageClass == SC_EXTERN_C);
bool isExported = ds && ((ds->typeQualifiers & TYPEQUAL_EXPORT) != 0);
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
bool isUnmasked = ds && ((ds->typeQualifiers & TYPEQUAL_UNMASKED) != 0);
if (isExported && isTask) {
@@ -555,9 +559,9 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
return;
}
if (isExternC && isTask) {
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
"qualifiers");
return;
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
"qualifiers");
return;
}
if (isExternC && isExported) {
Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" "

View File

@@ -7880,12 +7880,14 @@ SizeOfExpr::TypeCheck() {
"struct type \"%s\".", type->GetString().c_str());
return NULL;
}
#ifdef ISPC_NVPTX_ENABLED
if (type != NULL)
if (g->target->getISA() == Target::NVPTX && type->IsVaryingType())
{
Error(pos, "\"sizeof\" with varying data types is not yet supported with \"nvptx\" target.");
return NULL;
}
#endif /* ISPC_NVPTX_ENABLED */
return this;
}
@@ -8718,11 +8720,13 @@ NewExpr::TypeCheck() {
AssertPos(pos, m->errorCount > 0);
return NULL;
}
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX && allocType->IsVaryingType())
{
Error(pos, "\"new\" with varying data types is not yet supported with \"nvptx\" target.");
return NULL;
}
#endif /* ISPC_NVPTX_ENABLED */
if (CastType<UndefinedStructType>(allocType) != NULL) {
Error(pos, "Can't dynamically allocate storage for declared "
"but not defined type \"%s\".", allocType->GetString().c_str());

View File

@@ -47,7 +47,9 @@
#include <stdio.h>
#if defined(LLVM_3_1) || defined(LLVM_3_2)
#ifdef ISPC_NVPTX_ENABLED
#include <llvm/Metadata.h>
#endif /* ISPC_NVPTX_ENABLED */
#include <llvm/LLVMContext.h>
#include <llvm/Module.h>
#include <llvm/Type.h>
@@ -55,7 +57,9 @@
#include <llvm/Intrinsics.h>
#include <llvm/DerivedTypes.h>
#else
#ifdef ISPC_NVPTX_ENABLED
#include <llvm/IR/Metadata.h>
#endif /* ISPC_NVPTX_ENABLED */
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
#include <llvm/IR/Type.h>
@@ -131,7 +135,11 @@ Function::Function(Symbol *s, Stmt *c) {
sym->parentFunction = this;
}
if (type->isTask && g->target->getISA() != Target::NVPTX) {
if (type->isTask
#ifdef ISPC_NVPTX_ENABLED
&& (g->target->getISA() != Target::NVPTX)
#endif
){
threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
Assert(threadIndexSym);
threadCountSym = m->symbolTable->LookupVariable("threadCount");
@@ -242,7 +250,11 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
#endif
const FunctionType *type = CastType<FunctionType>(sym->type);
Assert(type != NULL);
if (type->isTask == true && g->target->getISA() != Target::NVPTX) {
if (type->isTask == true
#ifdef ISPC_NVPTX_ENABLED
&& (g->target->getISA() != Target::NVPTX)
#endif
){
// For tasks, there should always be three parameters: the
// pointer to the structure that holds all of the arguments, the
// thread index, and the thread count variables.
@@ -340,6 +352,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
ctx->SetFunctionMask(argIter);
Assert(++argIter == function->arg_end());
}
#ifdef ISPC_NVPTX_ENABLED
if (type->isTask == true && g->target->getISA() == Target::NVPTX)
{
llvm::NamedMDNode* annotations =
@@ -350,6 +363,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
av.push_back(LLVMInt32(1));
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
}
#endif /* ISPC_NVPTX_ENABLED */
}
// Finally, we can generate code for the function
@@ -505,15 +519,14 @@ Function::GenerateIR() {
// the application can call it
const FunctionType *type = CastType<FunctionType>(sym->type);
Assert(type != NULL);
if (type->isExported) {
if (type->isExported) {
if (!type->isTask) {
llvm::FunctionType *ftype = type->LLVMFunctionType(g->ctx, true);
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
std::string functionName = sym->name;
if (g->mangleFunctionsWithTarget)
functionName += std::string("_") + g->target->GetISAString();
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
functionName += std::string("___export"); /* add ___export to the end, for ptxcc to recognize it is exported */
@@ -527,6 +540,7 @@ Function::GenerateIR() {
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
#endif
}
#endif /* ISPC_NVPTX_ENABLED */
llvm::Function *appFunction =
llvm::Function::Create(ftype, linkage, functionName.c_str(), m->module);
#if defined(LLVM_3_1)
@@ -566,6 +580,7 @@ Function::GenerateIR() {
FATAL("Function verificication failed");
}
}
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
llvm::NamedMDNode* annotations =
@@ -576,6 +591,7 @@ Function::GenerateIR() {
av.push_back(llvm::ConstantInt::get(llvm::IntegerType::get(*g->ctx,32), 1));
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
}
#endif /* ISPC_NVPTX_ENABLED */
}
}
}

View File

@@ -247,9 +247,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
arch = "arm";
else
#endif
#ifdef ISPC_NVPTX_ENABLED
if(!strncmp(isa, "nvptx", 5))
arch = "nvptx64";
else
#endif /* ISPC_NVPTX_ENABLED */
arch = "x86-64";
}
@@ -587,6 +589,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskBitCount = 32;
}
#endif
#ifdef ISPC_NVPTX_ENABLED
else if (!strcasecmp(isa, "nvptx"))
{
this->m_isa = Target::NVPTX;
@@ -602,6 +605,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasGather = this->m_hasScatter = false;
cpuFromIsa = "sm_35";
}
#endif /* ISPC_NVPTX_ENABLED */
else {
Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.",
isa, SupportedTargets());
@@ -720,8 +724,10 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
// Initialize target-specific "target-feature" attribute.
if (!m_attributes.empty()) {
llvm::AttrBuilder attrBuilder;
#ifdef ISPC_NVPTX_ENABLED
if (m_isa != Target::NVPTX)
attrBuilder.addAttribute("target-cpu", this->m_cpu);
#endif
attrBuilder.addAttribute("target-cpu", this->m_cpu);
attrBuilder.addAttribute("target-features", this->m_attributes);
this->m_tf_attributes = new llvm::AttributeSet(
llvm::AttributeSet::get(
@@ -768,6 +774,9 @@ Target::SupportedTargets() {
return
#ifdef ISPC_ARM_ENABLED
"neon-i8x16, neon-i16x8, neon-i32x4, "
#endif
#ifdef ISPC_NVPTX_ENABLED
"nvptx, "
#endif
"sse2-i32x4, sse2-i32x8, "
"sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
@@ -776,7 +785,7 @@ Target::SupportedTargets() {
"avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4 "
"avx2-i32x8, avx2-i32x16, avx2-i64x4, "
"generic-x1, generic-x4, generic-x8, generic-x16, "
"generic-x32, generic-x64, nvptx";
"generic-x32, generic-x64";
}
@@ -803,8 +812,10 @@ Target::GetTripleString() const {
triple.setArchName("i386");
else if (m_arch == "x86-64")
triple.setArchName("x86_64");
#ifdef ISPC_NVPTX_ENABLED
else if (m_arch == "nvptx64")
triple = llvm::Triple("nvptx64", "nvidia", "cuda");
#endif /* ISPC_NVPTX_ENABLED */
else
triple.setArchName(m_arch);
}
@@ -837,8 +848,10 @@ Target::ISAToString(ISA isa) {
return "avx2";
case Target::GENERIC:
return "generic";
#ifdef ISPC_NVPTX_ENABLED
case Target::NVPTX:
return "nvptx";
#endif /* ISPC_NVPTX_ENABLED */
default:
FATAL("Unhandled target in ISAToString()");
}
@@ -877,8 +890,10 @@ Target::ISAToTargetString(ISA isa) {
return "avx2-i32x8";
case Target::GENERIC:
return "generic-4";
#ifdef ISPC_NVPTX_ENABLED
case Target::NVPTX:
return "nvptx";
#endif /* ISPC_NVPTX_ENABLED */
default:
FATAL("Unhandled target in ISAToTargetString()");
}

6
ispc.h
View File

@@ -179,7 +179,10 @@ public:
flexible/performant of them will apear last in the enumerant. Note
also that __best_available_isa() needs to be updated if ISAs are
added or the enumerant values are reordered. */
enum ISA { NVPTX,
enum ISA {
#ifdef ISPC_NVPTX_ENABLED
NVPTX,
#endif
#ifdef ISPC_ARM_ENABLED
NEON32, NEON16, NEON8,
#endif
@@ -611,7 +614,6 @@ struct Globals {
/** Indicates that alignment in memory allocation routines should be
forced to have given value. -1 value means natural alignment for the platforms. */
int forceAlignment;
std::string PtxString;
};
enum {

View File

@@ -320,10 +320,12 @@ int main(int Argc, char *Argv[]) {
LLVMInitializeARMTargetMC();
#endif
#ifdef ISPC_NVPTX_ENABLED
LLVMInitializeNVPTXTargetInfo();
LLVMInitializeNVPTXTarget();
LLVMInitializeNVPTXAsmPrinter();
LLVMInitializeNVPTXTargetMC();
#endif /* ISPC_NVPTX_ENABLED */
char *file = NULL;
const char *headerFileName = NULL;

View File

@@ -58,7 +58,9 @@
#include <set>
#include <sstream>
#include <iostream>
#ifdef ISPC_NVPTX_ENABLED
#include <map>
#endif /* ISPC_NVPTX_ENABLED */
#ifdef ISPC_IS_WINDOWS
#include <windows.h>
#include <io.h>
@@ -72,7 +74,9 @@
#include <llvm/Instructions.h>
#include <llvm/Intrinsics.h>
#include <llvm/DerivedTypes.h>
#ifdef ISPC_NVPTX_ENABLED
#include "llvm/Assembly/AssemblyAnnotationWriter.h"
#endif /* ISPC_NVPTX_ENABLED */
#else
#include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h>
@@ -80,7 +84,9 @@
#include <llvm/IR/Instructions.h>
#include <llvm/IR/Intrinsics.h>
#include <llvm/IR/DerivedTypes.h>
#ifdef ISPC_NVPTX_ENABLED
#include "llvm/Assembly/AssemblyAnnotationWriter.h"
#endif /* ISPC_NVPTX_ENABLED */
#endif
#include <llvm/PassManager.h>
#include <llvm/PassRegistry.h>
@@ -446,6 +452,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
return;
}
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX &&
#if 0
!type->IsConstType() &&
@@ -476,7 +483,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
type = new ArrayType(type->GetAsUniformType(), nel);
#endif
}
#endif /* ISPC_NVPTX_ENABLED */
llvm::Type *llvmType = type->LLVMType(g->ctx);
if (llvmType == NULL)
@@ -677,6 +684,7 @@ lCheckExportedParameterTypes(const Type *type, const std::string &name,
}
}
#ifdef ISPC_NVPTX_ENABLED
static void
lCheckTaskParameterTypes(const Type *type, const std::string &name,
SourcePos pos) {
@@ -691,7 +699,7 @@ lCheckTaskParameterTypes(const Type *type, const std::string &name,
name.c_str());
}
}
#endif /* ISPC_NVPTX_ENABLED */
/** Given a function type, loop through the function parameters and see if
any are StructTypes. If so, issue an error; this is currently broken
@@ -849,8 +857,12 @@ Module::AddFunctionDeclaration(const std::string &name,
#else // LLVM 3.1 and 3.3+
function->addFnAttr(llvm::Attribute::AlwaysInline);
#endif
/* evghenii: fails function verification when "if" executed in nvptx target */
if (functionType->isTask && g->target->getISA() != Target::NVPTX)
if (functionType->isTask)
#ifdef ISPC_NVPTX_ENABLED
/* evghenii: fails function verification when "if" executed in nvptx target */
if (g->target->getISA() != Target::NVPTX)
#endif /* ISPC_NVPTX_ENABLED */
// This also applies transitively to members I think?
#if defined(LLVM_3_1)
function->setDoesNotAlias(1, true);
@@ -871,12 +883,14 @@ Module::AddFunctionDeclaration(const std::string &name,
functionType->GetReturnType()->IsVoidType() == false)
Error(pos, "Task-qualified functions must have void return type.");
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX &&
Type::Equal(functionType->GetReturnType(), AtomicType::Void) == false &&
functionType->isExported)
{
Error(pos, "Export-qualified functions must have void return type with \"nvptx\" target.");
}
#endif /* ISPC_NVPTX_ENABLED */
if (functionType->isExported || functionType->isExternC)
lCheckForStructParameters(functionType, pos);
@@ -897,9 +911,12 @@ Module::AddFunctionDeclaration(const std::string &name,
if (functionType->isExported) {
lCheckExportedParameterTypes(argType, argName, argPos);
}
#ifdef ISPC_NVPTX_ENABLED
if (functionType->isTask) {
lCheckTaskParameterTypes(argType, argName, argPos);
}
#endif /* ISPC_NVPTX_ENABLED */
// ISPC assumes that no pointers alias. (It should be possible to
// specify when this is not the case, but this should be the
@@ -1027,24 +1044,28 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
const char *fileType = NULL;
switch (outputType) {
case Asm:
if (g->target->getISA() != Target::NVPTX)
{
if (strcasecmp(suffix, "s"))
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
if (strcasecmp(suffix, "ptx"))
fileType = "assembly";
}
else
if (strcasecmp(suffix, "ptx"))
}
else
#endif /* ISPC_NVPTX_ENABLED */
if (strcasecmp(suffix, "s"))
fileType = "assembly";
break;
case Bitcode:
if (g->target->getISA() != Target::NVPTX)
{
if (strcasecmp(suffix, "bc"))
fileType = "LLVM bitcode";
}
else
if (strcasecmp(suffix, "ll"))
fileType = "LLVM assembly";
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
if (strcasecmp(suffix, "ll"))
fileType = "LLVM assembly";
}
else
#endif /* ISPC_NVPTX_ENABLED */
if (strcasecmp(suffix, "bc"))
fileType = "LLVM bitcode";
break;
case Object:
if (strcasecmp(suffix, "o") && strcasecmp(suffix, "obj"))
@@ -1113,6 +1134,7 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
return writeObjectFileOrAssembly(outputType, outFileName);
}
#ifdef ISPC_NVPTX_ENABLED
typedef std::vector<std::string> vecString_t;
static vecString_t
lSplitString(const std::string &s)
@@ -1180,6 +1202,7 @@ lFixAttributes(const vecString_t &src, vecString_t &dst)
dst.push_back(s);
}
}
#endif /* ISPC_NVPTX_ENABLED */
bool
Module::writeBitcode(llvm::Module *module, const char *outFileName) {
@@ -1204,11 +1227,8 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
}
llvm::raw_fd_ostream fos(fd, (fd != 1), false);
if (g->target->getISA() != Target::NVPTX)
{
llvm::WriteBitcodeToFile(module, fos);
}
else
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
/* when using "nvptx" target, emit patched/hacked assembly
* NVPTX only accepts 3.2-style LLVM assembly, where attributes
@@ -1240,7 +1260,9 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
fos << *it;
}
}
else
#endif /* ISPC_NVPTX_ENABLED */
llvm::WriteBitcodeToFile(module, fos);
return true;
}
@@ -2275,6 +2297,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
opts.addMacroDef(g->cppArgs[i].substr(2));
}
}
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
opts.addMacroDef("__NVPTX__");
@@ -2295,6 +2318,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
opts.addMacroDef("taskCount2=__taskCount2()");
opts.addMacroDef("taskCount=__taskCount()");
}
#endif /* ISPC_NVPTX_ENABLED */
inst.getLangOpts().LineComment = 1;
#if defined(LLVM_3_5)
@@ -2740,6 +2764,7 @@ lCreateDispatchModule(std::map<std::string, FunctionTargetVariants> &functions)
return module;
}
#ifdef ISPC_NVPTX_ENABLED
static std::string lCBEMangle(const std::string &S) {
std::string Result;
@@ -2762,7 +2787,7 @@ static std::string lCBEMangle(const std::string &S) {
}
return Result;
}
#endif /* ISPC_NVPTX_ENABLED */
int
Module::CompileAndOutput(const char *srcFile,
@@ -2778,7 +2803,7 @@ Module::CompileAndOutput(const char *srcFile,
const char *hostStubFileName,
const char *devStubFileName)
{
if (target == NULL || strchr(target, ',') == NULL) {
if (target == NULL || strchr(target, ',') == NULL) {
// We're only compiling to a single target
g->target = new Target(arch, cpu, target, generatePIC);
if (!g->target->isValid())
@@ -2786,7 +2811,7 @@ Module::CompileAndOutput(const char *srcFile,
m = new Module(srcFile);
if (m->CompileFile() == 0) {
#ifdef ISPC_NVPTX_ENABLED
/* NVPTX:
* for PTX target replace '.' with '_' in all global variables
* a PTX identifier name must match [a-zA-Z$_][a-zA-Z$_0-9]*
@@ -2811,7 +2836,7 @@ Module::CompileAndOutput(const char *srcFile,
}
}
}
#endif /* ISPC_NVPTX_ENABLED */
if (outputType == CXX) {
if (target == NULL || strncmp(target, "generic-", 8) != 0) {
Error(SourcePos(), "When generating C++ output, one of the \"generic-*\" "
@@ -3014,5 +3039,4 @@ Module::CompileAndOutput(const char *srcFile,
return errorCount > 0;
}
return true;
}

17
opt.cpp
View File

@@ -55,7 +55,9 @@
#include <llvm/Function.h>
#include <llvm/BasicBlock.h>
#include <llvm/Constants.h>
#ifdef ISPC_NVPTX_ENABLED
#include <llvm/InlineAsm.h>
#endif /* ISPC_NVPTX_ENABLED */
#else
#include <llvm/IR/Module.h>
#include <llvm/IR/Instructions.h>
@@ -63,7 +65,9 @@
#include <llvm/IR/Function.h>
#include <llvm/IR/BasicBlock.h>
#include <llvm/IR/Constants.h>
#ifdef ISPC_NVPTX_ENABLED
#include <llvm/IR/InlineAsm.h>
#endif /* ISPC_NVPTX_ENABLED */
#endif
#if defined (LLVM_3_4) || defined(LLVM_3_5)
#include <llvm/Transforms/Instrumentation.h>
@@ -131,7 +135,9 @@ static llvm::Pass *CreateDebugPass(char * output);
static llvm::Pass *CreateReplaceStdlibShiftPass();
static llvm::Pass *CreateFixBooleanSelectPass();
#ifdef ISPC_NVPTX_ENABLED
static llvm::Pass *CreatePromoteLocalToPrivatePass();
#endif /* ISPC_NVPTX_ENABLED */
#define DEBUG_START_PASS(NAME) \
if (g->debugPrint && \
@@ -495,9 +501,11 @@ Optimize(llvm::Module *module, int optLevel) {
// run absolutely no optimizations, since the front-end needs us to
// take the various __pseudo_* functions it has emitted and turn
// them into something that can actually execute.
optPM.add(CreateImproveMemoryOpsPass(), 100);
#ifdef ISPC_NVPTX_ENABLED
if (g->opt.disableGatherScatterOptimizations == false &&
g->target->getVectorWidth() > 1)
#endif /* ISPC_NVPTX_ENABLED */
optPM.add(CreateImproveMemoryOpsPass(), 100);
if (g->opt.disableHandlePseudoMemoryOps == false)
@@ -579,7 +587,9 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createReassociatePass());
optPM.add(llvm::createIPConstantPropagationPass());
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() != Target::NVPTX)
#endif /* ISPC_NVPTX_ENABLED */
optPM.add(CreateReplaceStdlibShiftPass(),229);
optPM.add(llvm::createDeadArgEliminationPass(),230);
@@ -693,7 +703,7 @@ Optimize(llvm::Module *module, int optLevel) {
// Should be the last
optPM.add(CreateFixBooleanSelectPass(), 400);
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
optPM.add(CreatePromoteLocalToPrivatePass());
@@ -799,6 +809,7 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createConstantMergePass());
#endif
}
#endif /* ISPC_NVPTX_ENABLED */
}
// Finish up by making sure we didn't mess anything up in the IR along
@@ -5410,6 +5421,7 @@ CreateFixBooleanSelectPass() {
return new FixBooleanSelectPass();
}
#ifdef ISPC_NVPTX_ENABLED
///////////////////////////////////////////////////////////////////////////////
// Detect addrspace(3)
///////////////////////////////////////////////////////////////////////////////
@@ -5498,4 +5510,5 @@ CreatePromoteLocalToPrivatePass() {
#endif /* ISPC_NVPTX_ENABLED */

View File

@@ -45,11 +45,13 @@ then
# $($LLVMAS $1 -o $TMPDIR/$fbname.bc) && $($LLVMDIS $TMPDIR/$fbname.bc -o $TMPDIR/$fbname.ll) && $($PTXGEN $TMPDIR/$fbname.ll -o $TMPDIR/$fbname.ptx) && \
$($PTXGEN $1 -o $TMPDIR/$fbname.ptx) && \
$($PTXCC $TMPDIR/$fbname.ptx -o $TMPDIR/$fbname.o -Xnvcc="-G") && \
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS);
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS) && \
$(/bin/rm -rf $TMPDIR/*$fbname*);
else
$(sed 's/\.b0/\.b32/g' $1 > $TMPDIR/$fbname) && \
$($PTXCC $TMPDIR/$fbname -o $TMPDIR/$fbname.o -Xnvcc="-G") && \
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS);
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS) && \
$(/bin/rm -rf $TMPDIR/*$fbname*);
fi

View File

@@ -233,7 +233,7 @@ def run_test(testname):
elif is_nvptx_target:
if os.environ.get("NVVM") == "1":
is_nvptx_nvvm = True
obj_name = "%s.bc" % testname
obj_name = "%s.ll" % testname
else:
obj_name = "%s.ptx" % testname
is_nvptx_nvvm = False

700
stmt.cpp
View File

@@ -142,6 +142,7 @@ lHasUnsizedArrays(const Type *type) {
return lHasUnsizedArrays(at->GetElementType());
}
#ifdef ISPC_NVPTX_ENABLED
static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value, const SourcePos &currentPos, const bool variable = false)
{
if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX)
@@ -198,6 +199,7 @@ static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *
return value;
}
#endif /* ISPC_NVPTX_ENABLED */
void
DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
@@ -261,9 +263,8 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
return;
}
if (sym->storageClass == SC_STATIC) {
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType())
{
Error(sym->pos,
@@ -279,7 +280,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
PerformanceWarning(sym->pos,
"\"const static uniform\" variable ""\"%s\" is stored in __constant address space with ""\"nvptx\" target.",
sym->name.c_str());
#endif /* ISPC_NVPTX_ENABLED */
// For static variables, we need a compile-time constant value
// for its initializer; if there's no initializer, we use a
// zero value.
@@ -307,28 +308,38 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
if (cinit == NULL)
cinit = llvm::Constant::getNullValue(llvmType);
// Allocate space for the static variable in global scope, so
// that it persists across function calls
#ifdef ISPC_NVPTX_ENABLED
int addressSpace = 0;
if (g->target->getISA() == Target::NVPTX &&
sym->type->IsConstType() &&
sym->type->IsUniformType())
addressSpace = 4;
// Allocate space for the static variable in global scope, so
// that it persists across function calls
sym->storagePtr =
new llvm::GlobalVariable(*m->module, llvmType,
sym->type->IsConstType(),
llvm::GlobalValue::InternalLinkage, cinit,
llvm::Twine("static_") +
llvm::Twine("static.") +
llvm::Twine(sym->pos.first_line) +
llvm::Twine("_") + sym->name.c_str(),
llvm::Twine(".") + sym->name.c_str(),
NULL,
llvm::GlobalVariable::NotThreadLocal,
addressSpace);
sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
#else /* ISPC_NVPTX_ENABLED */
sym->storagePtr =
new llvm::GlobalVariable(*m->module, llvmType,
sym->type->IsConstType(),
llvm::GlobalValue::InternalLinkage, cinit,
llvm::Twine("static.") +
llvm::Twine(sym->pos.first_line) +
llvm::Twine(".") + sym->name.c_str());
#endif /* ISPC_NVPTX_ENABLED */
// Tell the FunctionEmitContext about the variable
ctx->EmitVariableDebugInfo(sym);
}
#ifdef ISPC_NVPTX_ENABLED
else if ((sym->type->IsUniformType() || sym->type->IsSOAType()) &&
/* NVPTX:
* only non-constant uniform data types are stored in shared memory
@@ -396,6 +407,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
sym->parentFunction = ctx->GetFunction();
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
}
#endif /* ISPC_NVPTX_ENABLED */
else
{
// For non-static variables, allocate storage on the stack
@@ -404,7 +416,6 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
// Tell the FunctionEmitContext about the variable; must do
// this before the initializer stuff.
ctx->EmitVariableDebugInfo(sym);
if (initExpr == 0 && sym->type->IsConstType())
Error(sym->pos, "Missing initializer for const variable "
"\"%s\".", sym->name.c_str());
@@ -412,7 +423,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
// And then get it initialized...
sym->parentFunction = ctx->GetFunction();
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
}
}
}
}
@@ -571,7 +582,7 @@ IfStmt::EmitCode(FunctionEmitContext *ctx) const {
if (testValue == NULL)
return;
#ifdef ISPC_NVPTX_ENABLED
#if 0
if (!isUniform && g->target->getISA() == Target::NVPTX)
{
@@ -582,7 +593,7 @@ IfStmt::EmitCode(FunctionEmitContext *ctx) const {
isUniform = true;
}
#endif
#endif /* ISPC_NVPTX_ENABLED */
if (isUniform) {
ctx->StartUniformIf();
@@ -865,11 +876,17 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
// Do any of the program instances want to run the 'true'
// block? If not, jump ahead to bNext.
#if 1
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
#else
#ifdef ISPC_NVPTX_ENABLED
#if 0
llvm::Value *maskAnyTrueQ = ctx->ExtractInst(ctx->GetFullMask(),0);
#else
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
#endif
#else /* ISPC_NVPTX_ENABLED */
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
#endif /* ISPC_NVPTX_ENABLED */
ctx->BranchInst(bRunTrue, bNext, maskAnyTrueQ);
// Emit statements for true
@@ -886,11 +903,16 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
// Similarly, check to see if any of the instances want to
// run the 'false' block...
#if 1
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
#else
#ifdef ISPC_NVPTX_ENABLED
#if 0
llvm::Value *maskAnyFalseQ = ctx->ExtractInst(ctx->GetFullMask(),0);
#else
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
#endif
#else /* ISPC_NVPTX_ENABLED */
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
#endif /* ISPC_NVPTX_ENABLED */
ctx->BranchInst(bRunFalse, bDone, maskAnyFalseQ);
// Emit code for false
@@ -1450,10 +1472,96 @@ static llvm::Value *
lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
llvm::Value *uniformCounterPtr,
llvm::Value *varyingCounterPtr,
const std::vector<int> &spans)
{
if (g->target->getISA() != Target::NVPTX)
{
const std::vector<int> &spans) {
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
// Smear the uniform counter value out to be varying
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
llvm::Value *smearCounter = ctx->BroadcastValue(
counter, LLVMTypes::Int32VectorType, "smear_counter");
// Figure out the offsets; this is a little bit tricky. As an example,
// consider a 2D tiled foreach loop, where we're running 8-wide and
// where the inner dimension has a stride of 4 and the outer dimension
// has a stride of 2. For the inner dimension, we want the offsets
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
// (0,0,0,0,1,1,1,1).
int32_t delta[ISPC_MAX_NVEC];
const int vecWidth = 32;
std::vector<llvm::Constant*> constDeltaList;
for (int i = 0; i < vecWidth; ++i)
{
int d = i;
// First, account for the effect of any dimensions at deeper
// nesting levels than the current one.
int prevDimSpanCount = 1;
for (int j = dim; j < nDims-1; ++j)
prevDimSpanCount *= spans[j+1];
d /= prevDimSpanCount;
// And now with what's left, figure out our own offset
delta[i] = d % spans[dim];
constDeltaList.push_back(LLVMInt8(delta[i]));
}
llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
// llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
llvm::GlobalVariable* globalDelta = new llvm::GlobalVariable(
/*Module=*/*m->module,
/*Type=*/ArrayDelta,
/*isConstant=*/true,
/*Linkage=*/llvm::GlobalValue::PrivateLinkage,
/*Initializer=*/0, // has initializer, specified below
/*Name=*/"constDeltaForeach");
#if 0
/*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
/*unsigned AddressSpace=*/4 /*constant*/);
#endif
llvm::Constant* constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
globalDelta->setInitializer(constDelta);
llvm::Function *func_program_index = m->module->getFunction("__program_index");
llvm::Value *laneIdx = ctx->CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__programIndex");
std::vector<llvm::Value*> ptr_arrayidx_indices;
ptr_arrayidx_indices.push_back(LLVMInt32(0));
ptr_arrayidx_indices.push_back(laneIdx);
#if 1
llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
llvm::LoadInst* int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
llvm::Value * int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
llvm::VectorType* VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
llvm::UndefValue* const_packed_41 = llvm::UndefValue::get(VectorTy_2);
llvm::InsertElementInst* packed_43 = llvm::InsertElementInst::Create(
// llvm::UndefValue(LLVMInt32Vector),
const_packed_41,
int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
#endif
// Add the deltas to compute the varying counter values; store the
// result to memory and then return it directly as well.
#if 0
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
LLVMInt32Vector(delta), "iter_val");
#else
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
packed_43, "iter_val");
#endif
ctx->StoreInst(varyingCounter, varyingCounterPtr);
return varyingCounter;
}
#endif /* ISPC_NVPTX_ENABLED */
// Smear the uniform counter value out to be varying
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
llvm::Value *smearCounter = ctx->BroadcastValue(
@@ -1486,93 +1594,6 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
LLVMInt32Vector(delta), "iter_val");
ctx->StoreInst(varyingCounter, varyingCounterPtr);
return varyingCounter;
}
else /* NVPTX == true */
{
// Smear the uniform counter value out to be varying
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
llvm::Value *smearCounter = ctx->BroadcastValue(
counter, LLVMTypes::Int32VectorType, "smear_counter");
// Figure out the offsets; this is a little bit tricky. As an example,
// consider a 2D tiled foreach loop, where we're running 8-wide and
// where the inner dimension has a stride of 4 and the outer dimension
// has a stride of 2. For the inner dimension, we want the offsets
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
// (0,0,0,0,1,1,1,1).
int32_t delta[ISPC_MAX_NVEC];
const int vecWidth = 32;
std::vector<llvm::Constant*> constDeltaList;
for (int i = 0; i < vecWidth; ++i)
{
int d = i;
// First, account for the effect of any dimensions at deeper
// nesting levels than the current one.
int prevDimSpanCount = 1;
for (int j = dim; j < nDims-1; ++j)
prevDimSpanCount *= spans[j+1];
d /= prevDimSpanCount;
// And now with what's left, figure out our own offset
delta[i] = d % spans[dim];
constDeltaList.push_back(LLVMInt8(delta[i]));
}
llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
// llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
llvm::GlobalVariable* globalDelta = new llvm::GlobalVariable(
/*Module=*/*m->module,
/*Type=*/ArrayDelta,
/*isConstant=*/true,
/*Linkage=*/llvm::GlobalValue::PrivateLinkage,
/*Initializer=*/0, // has initializer, specified below
/*Name=*/"constDeltaForeach");
#if 0
/*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
/*unsigned AddressSpace=*/4 /*constant*/);
#endif
llvm::Constant* constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
globalDelta->setInitializer(constDelta);
llvm::Function *func_program_index = m->module->getFunction("__program_index");
llvm::Value *laneIdx = ctx->CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__programIndex");
std::vector<llvm::Value*> ptr_arrayidx_indices;
ptr_arrayidx_indices.push_back(LLVMInt32(0));
ptr_arrayidx_indices.push_back(laneIdx);
#if 1
llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
llvm::LoadInst* int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
llvm::Value * int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
llvm::VectorType* VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
llvm::UndefValue* const_packed_41 = llvm::UndefValue::get(VectorTy_2);
llvm::InsertElementInst* packed_43 = llvm::InsertElementInst::Create(
// llvm::UndefValue(LLVMInt32Vector),
const_packed_41,
int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
#endif
// Add the deltas to compute the varying counter values; store the
// result to memory and then return it directly as well.
#if 0
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
LLVMInt32Vector(delta), "iter_val");
#else
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
packed_43, "iter_val");
#endif
ctx->StoreInst(varyingCounter, varyingCounterPtr);
return varyingCounter;
}
}
@@ -1650,7 +1671,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// This should be caught during typechecking
AssertPos(pos, startExprs.size() == dimVariables.size() &&
endExprs.size() == dimVariables.size());
endExprs.size() == dimVariables.size());
int nDims = (int)dimVariables.size();
///////////////////////////////////////////////////////////////////////
@@ -1661,66 +1682,70 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
std::vector<int> span(nDims, 0);
#ifdef ISPC_NVPTX_ENABLED
const int vectorWidth =
g->target->getISA() == Target::NVPTX ? 32 : g->target->getVectorWidth();
lGetSpans(nDims-1, nDims, vectorWidth, isTiled, &span[0]);
#else /* ISPC_NVPTX_ENABLED */
lGetSpans(nDims-1, nDims, g->target->getVectorWidth(), isTiled, &span[0]);
#endif /* ISPC_NVPTX_ENABLED */
for (int i = 0; i < nDims; ++i) {
// Basic blocks that we'll fill in later with the looping logic for
// this dimension.
bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
if (i < nDims-1)
// stepping for the innermost dimension is handled specially
bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
// Basic blocks that we'll fill in later with the looping logic for
// this dimension.
bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
if (i < nDims-1)
// stepping for the innermost dimension is handled specially
bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
// Start and end value for this loop dimension
llvm::Value *sv = startExprs[i]->GetValue(ctx);
llvm::Value *ev = endExprs[i]->GetValue(ctx);
if (sv == NULL || ev == NULL)
return;
startVals.push_back(sv);
endVals.push_back(ev);
// Start and end value for this loop dimension
llvm::Value *sv = startExprs[i]->GetValue(ctx);
llvm::Value *ev = endExprs[i]->GetValue(ctx);
if (sv == NULL || ev == NULL)
return;
startVals.push_back(sv);
endVals.push_back(ev);
// nItems = endVal - startVal
llvm::Value *nItems =
ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
// nItems = endVal - startVal
llvm::Value *nItems =
ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
// nExtras = nItems % (span for this dimension)
// This gives us the number of extra elements we need to deal with
// at the end of the loop for this dimension that don't fit cleanly
// into a vector width.
nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems,
LLVMInt32(span[i]), "nextras"));
// nExtras = nItems % (span for this dimension)
// This gives us the number of extra elements we need to deal with
// at the end of the loop for this dimension that don't fit cleanly
// into a vector width.
nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems,
LLVMInt32(span[i]), "nextras"));
// alignedEnd = endVal - nExtras
alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
nExtras[i], "aligned_end"));
// alignedEnd = endVal - nExtras
alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
nExtras[i], "aligned_end"));
///////////////////////////////////////////////////////////////////////
// Each dimension has a loop counter that is a uniform value that
// goes from startVal to endVal, in steps of the span for this
// dimension. Its value is only used internally here for looping
// logic and isn't directly available in the user's program code.
uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
"counter"));
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
///////////////////////////////////////////////////////////////////////
// Each dimension has a loop counter that is a uniform value that
// goes from startVal to endVal, in steps of the span for this
// dimension. Its value is only used internally here for looping
// logic and isn't directly available in the user's program code.
uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
"counter"));
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
// There is also a varying variable that holds the set of index
// values for each dimension in the current loop iteration; this is
// the value that is program-visible.
dimVariables[i]->storagePtr =
ctx->AllocaInst(LLVMTypes::Int32VectorType,
dimVariables[i]->name.c_str());
dimVariables[i]->parentFunction = ctx->GetFunction();
ctx->EmitVariableDebugInfo(dimVariables[i]);
// There is also a varying variable that holds the set of index
// values for each dimension in the current loop iteration; this is
// the value that is program-visible.
dimVariables[i]->storagePtr =
ctx->AllocaInst(LLVMTypes::Int32VectorType,
dimVariables[i]->name.c_str());
dimVariables[i]->parentFunction = ctx->GetFunction();
ctx->EmitVariableDebugInfo(dimVariables[i]);
// Each dimension also maintains a mask that represents which of
// the varying elements in the current iteration should be
// processed. (i.e. this is used to disable the lanes that have
// out-of-bounds offsets.)
extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
// Each dimension also maintains a mask that represents which of
// the varying elements in the current iteration should be
// processed. (i.e. this is used to disable the lanes that have
// out-of-bounds offsets.)
extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
}
ctx->StartForeach(FunctionEmitContext::FOREACH_REGULAR);
@@ -1733,14 +1758,14 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// a given dimension in preparation for running through its loop again,
// after the enclosing level advances its counter.
for (int i = 0; i < nDims; ++i) {
ctx->SetCurrentBasicBlock(bbReset[i]);
if (i == 0)
ctx->BranchInst(bbExit);
else {
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
ctx->BranchInst(bbStep[i-1]);
}
ctx->SetCurrentBasicBlock(bbReset[i]);
if (i == 0)
ctx->BranchInst(bbExit);
else {
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
ctx->BranchInst(bbStep[i-1]);
}
}
///////////////////////////////////////////////////////////////////////////
@@ -1750,67 +1775,67 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// this for the innermost dimension, which has a more complex stepping
// structure..
for (int i = 0; i < nDims-1; ++i) {
ctx->SetCurrentBasicBlock(bbStep[i]);
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[i]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
ctx->BranchInst(bbTest[i]);
ctx->SetCurrentBasicBlock(bbStep[i]);
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[i]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
ctx->BranchInst(bbTest[i]);
}
///////////////////////////////////////////////////////////////////////////
// foreach_test (for all dimensions other than the innermost...)
std::vector<llvm::Value *> inExtras;
for (int i = 0; i < nDims-1; ++i) {
ctx->SetCurrentBasicBlock(bbTest[i]);
ctx->SetCurrentBasicBlock(bbTest[i]);
llvm::Value *haveExtras =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
endVals[i], alignedEnd[i], "have_extras");
llvm::Value *haveExtras =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
endVals[i], alignedEnd[i], "have_extras");
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
llvm::Value *atAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
counter, alignedEnd[i], "at_aligned_end");
llvm::Value *inEx =
ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
atAlignedEnd, "in_extras");
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
llvm::Value *atAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
counter, alignedEnd[i], "at_aligned_end");
llvm::Value *inEx =
ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
atAlignedEnd, "in_extras");
if (i == 0)
inExtras.push_back(inEx);
else
inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
inExtras[i-1], "in_extras_all"));
if (i == 0)
inExtras.push_back(inEx);
else
inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
inExtras[i-1], "in_extras_all"));
llvm::Value *varyingCounter =
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
dimVariables[i]->storagePtr, span);
llvm::Value *varyingCounter =
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
dimVariables[i]->storagePtr, span);
llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[i], LLVMTypes::Int32VectorType, "smear_end");
llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[i], LLVMTypes::Int32VectorType, "smear_end");
// Do a vector compare of its value to the end value to generate a
// mask for this last bit of work.
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask);
// Do a vector compare of its value to the end value to generate a
// mask for this last bit of work.
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask);
if (i == 0)
ctx->StoreInst(emask, extrasMaskPtrs[i]);
else {
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
llvm::Value *newMask =
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
"extras_mask");
ctx->StoreInst(newMask, extrasMaskPtrs[i]);
}
if (i == 0)
ctx->StoreInst(emask, extrasMaskPtrs[i]);
else {
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
llvm::Value *newMask =
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
"extras_mask");
ctx->StoreInst(newMask, extrasMaskPtrs[i]);
}
llvm::Value *notAtEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, endVals[i]);
ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
llvm::Value *notAtEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, endVals[i]);
ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
}
///////////////////////////////////////////////////////////////////////////
@@ -1847,18 +1872,18 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// (i.e. processing extra elements that don't exactly fit into a
// vector).
llvm::BasicBlock *bbOuterInExtras =
ctx->CreateBasicBlock("outer_in_extras");
ctx->CreateBasicBlock("outer_in_extras");
llvm::BasicBlock *bbOuterNotInExtras =
ctx->CreateBasicBlock("outer_not_in_extras");
ctx->CreateBasicBlock("outer_not_in_extras");
ctx->SetCurrentBasicBlock(bbTest[nDims-1]);
if (inExtras.size())
ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras,
inExtras.back());
ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras,
inExtras.back());
else
// for a 1D iteration domain, we certainly don't have any enclosing
// dimensions that are processing extra elements.
ctx->BranchInst(bbOuterNotInExtras);
// for a 1D iteration domain, we certainly don't have any enclosing
// dimensions that are processing extra elements.
ctx->BranchInst(bbOuterNotInExtras);
///////////////////////////////////////////////////////////////////////////
// One or more outer dimensions in extras, so we need to mask for the loop
@@ -1873,21 +1898,21 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// // run loop body with mask
// }
llvm::BasicBlock *bbAllInnerPartialOuter =
ctx->CreateBasicBlock("all_inner_partial_outer");
ctx->CreateBasicBlock("all_inner_partial_outer");
llvm::BasicBlock *bbPartial =
ctx->CreateBasicBlock("both_partial");
ctx->CreateBasicBlock("both_partial");
ctx->SetCurrentBasicBlock(bbOuterInExtras); {
// Update the varying counter value here, since all subsequent
// blocks along this path need it.
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
// Update the varying counter value here, since all subsequent
// blocks along this path need it.
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
// here we just check to see if counter < alignedEnd
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, alignedEnd[nDims-1], "before_aligned_end");
ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
// here we just check to see if counter < alignedEnd
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, alignedEnd[nDims-1], "before_aligned_end");
ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
}
// Below we have a basic block that runs the loop body code for the
@@ -1906,53 +1931,53 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// should step the loop counter for the next enclosing dimension
// instead.
llvm::Value *stepIndexAfterMaskedBodyPtr =
ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
///////////////////////////////////////////////////////////////////////////
// We're in the inner loop part where the only masking is due to outer
// dimensions but the innermost dimension fits fully into a vector's
// width. Set the mask and jump to the masked loop body.
ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); {
llvm::Value *mask;
if (nDims == 1)
// 1D loop; we shouldn't ever get here anyway
mask = LLVMMaskAllOff;
else
mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
llvm::Value *mask;
if (nDims == 1)
// 1D loop; we shouldn't ever get here anyway
mask = LLVMMaskAllOff;
else
mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
ctx->SetInternalMask(mask);
ctx->SetInternalMask(mask);
ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody);
ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody);
}
///////////////////////////////////////////////////////////////////////////
// We need to include the effect of the innermost dimension in the mask
// for the final bits here
ctx->SetCurrentBasicBlock(bbPartial); {
llvm::Value *varyingCounter =
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
llvm::Value *varyingCounter =
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask);
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask);
if (nDims == 1) {
ctx->SetInternalMask(emask);
}
else {
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
llvm::Value *newMask =
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
"extras_mask");
ctx->SetInternalMask(newMask);
}
if (nDims == 1) {
ctx->SetInternalMask(emask);
}
else {
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
llvm::Value *newMask =
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
"extras_mask");
ctx->SetInternalMask(newMask);
}
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody);
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody);
}
///////////////////////////////////////////////////////////////////////////
@@ -1968,14 +1993,14 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// // run loop body with mask
// }
llvm::BasicBlock *bbPartialInnerAllOuter =
ctx->CreateBasicBlock("partial_inner_all_outer");
ctx->CreateBasicBlock("partial_inner_all_outer");
ctx->SetCurrentBasicBlock(bbOuterNotInExtras); {
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, alignedEnd[nDims-1], "before_aligned_end");
ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter,
beforeAlignedEnd);
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, alignedEnd[nDims-1], "before_aligned_end");
ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter,
beforeAlignedEnd);
}
///////////////////////////////////////////////////////////////////////////
@@ -1985,26 +2010,26 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// value of the varying loop counter and have the statements in the
// loop body emit their code.
llvm::BasicBlock *bbFullBodyContinue =
ctx->CreateBasicBlock("foreach_full_continue");
ctx->CreateBasicBlock("foreach_full_continue");
ctx->SetCurrentBasicBlock(bbFullBody); {
ctx->SetInternalMask(LLVMMaskAllOn);
ctx->SetBlockEntryMask(LLVMMaskAllOn);
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
ctx->SetContinueTarget(bbFullBodyContinue);
ctx->AddInstrumentationPoint("foreach loop body (all on)");
stmts->EmitCode(ctx);
AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
ctx->BranchInst(bbFullBodyContinue);
ctx->SetInternalMask(LLVMMaskAllOn);
ctx->SetBlockEntryMask(LLVMMaskAllOn);
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
ctx->SetContinueTarget(bbFullBodyContinue);
ctx->AddInstrumentationPoint("foreach loop body (all on)");
stmts->EmitCode(ctx);
AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
ctx->BranchInst(bbFullBodyContinue);
}
ctx->SetCurrentBasicBlock(bbFullBodyContinue); {
ctx->RestoreContinuedLanes();
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[nDims-1]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
ctx->BranchInst(bbOuterNotInExtras);
ctx->RestoreContinuedLanes();
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[nDims-1]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
ctx->BranchInst(bbOuterNotInExtras);
}
///////////////////////////////////////////////////////////////////////////
@@ -2012,33 +2037,33 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// less than the end value, in which case we need to run the body one
// more time to get the extra bits.
llvm::BasicBlock *bbSetInnerMask =
ctx->CreateBasicBlock("partial_inner_only");
ctx->CreateBasicBlock("partial_inner_only");
ctx->SetCurrentBasicBlock(bbPartialInnerAllOuter); {
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeFullEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, endVals[nDims-1], "before_full_end");
ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd);
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeFullEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, endVals[nDims-1], "before_full_end");
ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd);
}
///////////////////////////////////////////////////////////////////////////
// The outer dimensions are all on, so the mask is just given by the
// mask for the innermost dimension
ctx->SetCurrentBasicBlock(bbSetInnerMask); {
llvm::Value *varyingCounter =
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask);
ctx->SetInternalMask(emask);
ctx->SetBlockEntryMask(emask);
llvm::Value *varyingCounter =
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span);
llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask);
ctx->SetInternalMask(emask);
ctx->SetBlockEntryMask(emask);
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody);
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody);
}
///////////////////////////////////////////////////////////////////////////
@@ -2048,34 +2073,34 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// mask known to be all-on, which in turn leads to more efficient code
// for that case.
llvm::BasicBlock *bbStepInnerIndex =
ctx->CreateBasicBlock("step_inner_index");
ctx->CreateBasicBlock("step_inner_index");
llvm::BasicBlock *bbMaskedBodyContinue =
ctx->CreateBasicBlock("foreach_masked_continue");
ctx->CreateBasicBlock("foreach_masked_continue");
ctx->SetCurrentBasicBlock(bbMaskedBody); {
ctx->AddInstrumentationPoint("foreach loop body (masked)");
ctx->SetContinueTarget(bbMaskedBodyContinue);
ctx->DisableGatherScatterWarnings();
ctx->SetBlockEntryMask(ctx->GetFullMask());
stmts->EmitCode(ctx);
ctx->EnableGatherScatterWarnings();
ctx->BranchInst(bbMaskedBodyContinue);
ctx->AddInstrumentationPoint("foreach loop body (masked)");
ctx->SetContinueTarget(bbMaskedBodyContinue);
ctx->DisableGatherScatterWarnings();
ctx->SetBlockEntryMask(ctx->GetFullMask());
stmts->EmitCode(ctx);
ctx->EnableGatherScatterWarnings();
ctx->BranchInst(bbMaskedBodyContinue);
}
ctx->SetCurrentBasicBlock(bbMaskedBodyContinue); {
ctx->RestoreContinuedLanes();
llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex);
ctx->RestoreContinuedLanes();
llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex);
}
///////////////////////////////////////////////////////////////////////////
// step the innermost index, for the case where we're doing the
// innermost for loop over full vectors.
ctx->SetCurrentBasicBlock(bbStepInnerIndex); {
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[nDims-1]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
ctx->BranchInst(bbOuterInExtras);
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[nDims-1]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
ctx->BranchInst(bbOuterInExtras);
}
///////////////////////////////////////////////////////////////////////////
@@ -2262,8 +2287,12 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
// math...)
// Get the "program index" vector value
#ifdef ISPC_NVPTX_ENABLED
llvm::Value *programIndex = g->target->getISA() == Target::NVPTX ?
ctx->ProgramIndexVectorPTX() : ctx->ProgramIndexVector();
#else /* ISPC_NVPTX_ENABLED */
llvm::Value *programIndex = ctx->ProgramIndexVector();
#endif /* ISPC_NVPTX_ENABLED */
// And smear the current lane out to a vector
llvm::Value *firstSet32 =
@@ -2460,19 +2489,22 @@ ForeachUniqueStmt::EmitCode(FunctionEmitContext *ctx) const {
// And load the corresponding element value from the temporary
// memory storing the value of the varying expr.
llvm::Value *uniqueValue;
if (g->target->getISA() != Target::NVPTX)
{
llvm::Value *uniqueValuePtr =
ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType,
"unique_index_ptr");
uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
}
else /* in case of PTX target, use __shfl PTX intrinsics via __insert/__extract function */
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX)
{
llvm::Value *firstSet32 = ctx->TruncInst(firstSet, LLVMTypes::Int32Type);
uniqueValue = ctx->Extract(exprValue, firstSet32);
}
else
{
#endif /* ISPC_NVPTX_ENABLED */
llvm::Value *uniqueValuePtr =
ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType,
"unique_index_ptr");
uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
#ifdef ISPC_NVPTX_ENABLED
}
#endif /* ISPC_NVPTX_ENABLED */
// If it's a varying pointer type, need to convert from the int
// type we store in the vector to the actual pointer type
if (llvm::dyn_cast<llvm::PointerType>(symType) != NULL)
@@ -3379,8 +3411,12 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
}
// Now we can emit code to call __do_print()
#ifdef ISPC_NVPTX_ENABLED
llvm::Function *printFunc = g->target->getISA() != Target::NVPTX ?
m->module->getFunction("__do_print") : m->module->getFunction("__do_print_nvptx");
#else /* ISPC_NVPTX_ENABLED */
llvm::Function *printFunc = m->module->getFunction("__do_print");
#endif /* ISPC_NVPTX_ENABLED */
AssertPos(pos, printFunc);
llvm::Value *mask = ctx->GetFullMask();

View File

@@ -751,7 +751,7 @@ EnumType::Mangle() const {
std::string ret;
if (isConst) ret += "C";
ret += variability.MangleString();
ret += std::string("enum_5B_") + name + std::string("_5C_");
ret += std::string("enum[") + name + std::string("]");
return ret;
}
@@ -1433,7 +1433,7 @@ ArrayType::Mangle() const {
sprintf(buf, "%d", numElements);
else
buf[0] = '\0';
return s + "_5B_" + buf + "_5C_";
return s + "[" + buf + "]";
}
@@ -2106,12 +2106,12 @@ lMangleStruct(Variability variability, bool isConst, const std::string &name) {
Assert(variability != Variability::Unbound);
std::string ret;
ret += "s_5B_";
ret += "s[";
if (isConst)
ret += "_c_";
ret += variability.MangleString();
ret += name + std::string("_5C_");
ret += name + std::string("]");
return ret;
}
@@ -3057,7 +3057,11 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
llvmArgTypes.push_back(LLVMTypes::MaskType);
std::vector<llvm::Type *> callTypes;
if (isTask && g->target->getISA() != Target::NVPTX) {
if (isTask
#ifdef ISPC_NVPTX_ENABLED
&& (g->target->getISA() != Target::NVPTX)
#endif
){
// Tasks take three arguments: a pointer to a struct that holds the
// actual task arguments, the thread index, and the total number of
// threads the tasks system has running. (Task arguments are