added #ifdef ISPC_NVPTX_ENALED ... #endif guards
This commit is contained in:
17
Makefile
17
Makefile
@@ -73,6 +73,10 @@ endif
|
||||
# To enable: make ARM_ENABLED=1
|
||||
ARM_ENABLED=0
|
||||
|
||||
# Disable NVPTX by request
|
||||
# To disable: make NVPTX_ENABLED=0
|
||||
NVPTX_ENABLED=1
|
||||
|
||||
# Add llvm bin to the path so any scripts run will go to the right llvm-config
|
||||
LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir)
|
||||
export PATH:=$(LLVM_BIN):$(PATH)
|
||||
@@ -89,7 +93,7 @@ LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
|
||||
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e 's/svn//' -e 's/\./_/' -e 's/\..*//')
|
||||
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
||||
|
||||
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker nvptx
|
||||
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker
|
||||
# Component "option" was introduced in 3.3 and starting with 3.4 it is required for the link step.
|
||||
# We check if it's available before adding it (to not break 3.2 and earlier).
|
||||
ifeq ($(shell $(LLVM_CONFIG) --components |grep -c option), 1)
|
||||
@@ -98,6 +102,9 @@ endif
|
||||
ifneq ($(ARM_ENABLED), 0)
|
||||
LLVM_COMPONENTS+=arm
|
||||
endif
|
||||
ifneq ($(NVPTX_ENABLED), 0)
|
||||
LLVM_COMPONENTS+=nvptx
|
||||
endif
|
||||
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs $(LLVM_COMPONENTS))
|
||||
|
||||
CLANG=clang
|
||||
@@ -156,6 +163,9 @@ endif
|
||||
ifneq ($(ARM_ENABLED), 0)
|
||||
CXXFLAGS+=-DISPC_ARM_ENABLED
|
||||
endif
|
||||
ifneq ($(NVPTX_ENABLED), 0)
|
||||
CXXFLAGS+=-DISPC_NVPTX_ENABLED
|
||||
endif
|
||||
|
||||
LDFLAGS=
|
||||
ifeq ($(ARCH_OS),Linux)
|
||||
@@ -174,12 +184,15 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
|
||||
type.cpp util.cpp
|
||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||
opt.h stmt.h sym.h type.h util.h
|
||||
TARGETS=nvptx avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
||||
TARGETS=avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
||||
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
|
||||
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
|
||||
ifneq ($(ARM_ENABLED), 0)
|
||||
TARGETS+=neon-32 neon-16 neon-8
|
||||
endif
|
||||
ifneq ($(NVPTX_ENABLED), 0)
|
||||
TARGETS+=nvptx
|
||||
endif
|
||||
# These files need to be compiled in two versions - 32 and 64 bits.
|
||||
BUILTINS_SRC_TARGET=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS)))
|
||||
# These are files to be compiled in single version.
|
||||
|
||||
95
builtins.cpp
95
builtins.cpp
@@ -342,13 +342,17 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__all",
|
||||
"__any",
|
||||
"__aos_to_soa3_float",
|
||||
//#ifdef ISPC_NVPTX_ENABLED
|
||||
"__aos_to_soa3_float1",
|
||||
//#endif /* ISPC_NVPTX_ENABLED */
|
||||
"__aos_to_soa3_float16",
|
||||
"__aos_to_soa3_float4",
|
||||
"__aos_to_soa3_float8",
|
||||
"__aos_to_soa3_int32",
|
||||
"__aos_to_soa4_float",
|
||||
//#ifdef ISPC_NVPTX_ENABLED
|
||||
"__aos_to_soa4_float1",
|
||||
//#endif /* ISPC_NVPTX_ENABLED */
|
||||
"__aos_to_soa4_float16",
|
||||
"__aos_to_soa4_float4",
|
||||
"__aos_to_soa4_float8",
|
||||
@@ -357,14 +361,10 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__atomic_add_int64_global",
|
||||
"__atomic_add_uniform_int32_global",
|
||||
"__atomic_add_uniform_int64_global",
|
||||
"__atomic_add_varying_int32_global",
|
||||
"__atomic_add_varying_int64_global",
|
||||
"__atomic_and_int32_global",
|
||||
"__atomic_and_int64_global",
|
||||
"__atomic_and_uniform_int32_global",
|
||||
"__atomic_and_uniform_int64_global",
|
||||
"__atomic_and_varying_int32_global",
|
||||
"__atomic_and_varying_int64_global",
|
||||
"__atomic_compare_exchange_double_global",
|
||||
"__atomic_compare_exchange_float_global",
|
||||
"__atomic_compare_exchange_int32_global",
|
||||
@@ -373,30 +373,18 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__atomic_compare_exchange_uniform_float_global",
|
||||
"__atomic_compare_exchange_uniform_int32_global",
|
||||
"__atomic_compare_exchange_uniform_int64_global",
|
||||
"__atomic_compare_exchange_varying_double_global",
|
||||
"__atomic_compare_exchange_varying_float_global",
|
||||
"__atomic_compare_exchange_varying_int32_global",
|
||||
"__atomic_compare_exchange_varying_int64_global",
|
||||
"__atomic_max_uniform_int32_global",
|
||||
"__atomic_max_uniform_int64_global",
|
||||
"__atomic_min_uniform_int32_global",
|
||||
"__atomic_min_uniform_int64_global",
|
||||
"__atomic_max_varying_int32_global",
|
||||
"__atomic_max_varying_int64_global",
|
||||
"__atomic_min_varying_int32_global",
|
||||
"__atomic_min_varying_int64_global",
|
||||
"__atomic_or_int32_global",
|
||||
"__atomic_or_int64_global",
|
||||
"__atomic_or_uniform_int32_global",
|
||||
"__atomic_or_uniform_int64_global",
|
||||
"__atomic_or_varying_int32_global",
|
||||
"__atomic_or_varying_int64_global",
|
||||
"__atomic_sub_int32_global",
|
||||
"__atomic_sub_int64_global",
|
||||
"__atomic_sub_uniform_int32_global",
|
||||
"__atomic_sub_uniform_int64_global",
|
||||
"__atomic_sub_varying_int32_global",
|
||||
"__atomic_sub_varying_int64_global",
|
||||
"__atomic_swap_double_global",
|
||||
"__atomic_swap_float_global",
|
||||
"__atomic_swap_int32_global",
|
||||
@@ -405,28 +393,46 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__atomic_swap_uniform_float_global",
|
||||
"__atomic_swap_uniform_int32_global",
|
||||
"__atomic_swap_uniform_int64_global",
|
||||
"__atomic_swap_varying_double_global",
|
||||
"__atomic_swap_varying_float_global",
|
||||
"__atomic_swap_varying_int32_global",
|
||||
"__atomic_swap_varying_int64_global",
|
||||
"__atomic_umax_uniform_uint32_global",
|
||||
"__atomic_umax_uniform_uint64_global",
|
||||
"__atomic_umin_uniform_uint32_global",
|
||||
"__atomic_umin_uniform_uint64_global",
|
||||
"__atomic_umax_varying_uint32_global",
|
||||
"__atomic_umax_varying_uint64_global",
|
||||
"__atomic_umin_varying_uint32_global",
|
||||
"__atomic_umin_varying_uint64_global",
|
||||
"__atomic_xor_int32_global",
|
||||
"__atomic_xor_int64_global",
|
||||
"__atomic_xor_uniform_int32_global",
|
||||
"__atomic_xor_uniform_int64_global",
|
||||
//#ifdef ISPC_NVPTX_ENABLED
|
||||
"__atomic_add_varying_int32_global",
|
||||
"__atomic_add_varying_int64_global",
|
||||
"__atomic_and_varying_int32_global",
|
||||
"__atomic_and_varying_int64_global",
|
||||
"__atomic_compare_exchange_varying_double_global",
|
||||
"__atomic_compare_exchange_varying_float_global",
|
||||
"__atomic_compare_exchange_varying_int32_global",
|
||||
"__atomic_compare_exchange_varying_int64_global",
|
||||
"__atomic_max_varying_int32_global",
|
||||
"__atomic_max_varying_int64_global",
|
||||
"__atomic_min_varying_int32_global",
|
||||
"__atomic_min_varying_int64_global",
|
||||
"__atomic_or_varying_int32_global",
|
||||
"__atomic_or_varying_int64_global",
|
||||
"__atomic_sub_varying_int32_global",
|
||||
"__atomic_sub_varying_int64_global",
|
||||
"__atomic_swap_varying_double_global",
|
||||
"__atomic_swap_varying_float_global",
|
||||
"__atomic_swap_varying_int32_global",
|
||||
"__atomic_swap_varying_int64_global",
|
||||
"__atomic_umax_varying_uint32_global",
|
||||
"__atomic_umax_varying_uint64_global",
|
||||
"__atomic_umin_varying_uint32_global",
|
||||
"__atomic_umin_varying_uint64_global",
|
||||
"__atomic_xor_uniform_int32_global",
|
||||
"__atomic_xor_uniform_int64_global",
|
||||
"__atomic_xor_varying_int32_global",
|
||||
"__atomic_xor_varying_int64_global",
|
||||
"__atomic_xor_varying_int32_global",
|
||||
"__atomic_xor_varying_int64_global",
|
||||
//#endif /* ISPC_NVPTX_ENABLED */
|
||||
"__broadcast_double",
|
||||
"__broadcast_float",
|
||||
"__broadcast_i16",
|
||||
@@ -449,7 +455,9 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__do_assert_uniform",
|
||||
"__do_assert_varying",
|
||||
"__do_print",
|
||||
//#ifdef ISPC_NVPTX_ENABLED
|
||||
"__do_print_nvptx",
|
||||
//#endif /* ISPC_NVPTX_ENABLED */
|
||||
"__doublebits_uniform_int64",
|
||||
"__doublebits_varying_int64",
|
||||
"__exclusive_scan_add_double",
|
||||
@@ -464,8 +472,10 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__extract_int32",
|
||||
"__extract_int64",
|
||||
"__extract_int8",
|
||||
//#ifdef ISPC_NVPTX_ENABLED
|
||||
"__extract_float",
|
||||
"__extract_double",
|
||||
//#endif /* ISPC_NVPTX_ENABLED */
|
||||
"__fastmath",
|
||||
"__float_to_half_uniform",
|
||||
"__float_to_half_varying",
|
||||
@@ -482,8 +492,10 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__insert_int32",
|
||||
"__insert_int64",
|
||||
"__insert_int8",
|
||||
//#ifdef ISPC_NVPTX_ENABLED
|
||||
"__insert_float",
|
||||
"__insert_double",
|
||||
//#endif /* ISPC_NVPTX_ENABLED */
|
||||
"__intbits_uniform_double",
|
||||
"__intbits_uniform_float",
|
||||
"__intbits_varying_double",
|
||||
@@ -520,7 +532,9 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__min_varying_uint32",
|
||||
"__min_varying_uint64",
|
||||
"__movmsk",
|
||||
//#ifdef ISPC_NVPTX_ENABLED
|
||||
"__movmsk_ptx",
|
||||
//#endif /* ISPC_NVPTX_ENABLED */
|
||||
"__new_uniform_32rt",
|
||||
"__new_uniform_64rt",
|
||||
"__new_varying32_32rt",
|
||||
@@ -610,13 +624,15 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__shuffle_i64",
|
||||
"__shuffle_i8",
|
||||
"__soa_to_aos3_float",
|
||||
"__soa_to_aos3_float1",
|
||||
"__soa_to_aos3_float16",
|
||||
"__soa_to_aos3_float4",
|
||||
"__soa_to_aos3_float8",
|
||||
"__soa_to_aos3_int32",
|
||||
"__soa_to_aos4_float",
|
||||
//#ifdef ISPC_NVPTX_ENABLED
|
||||
"__soa_to_aos3_float1",
|
||||
"__soa_to_aos4_float1",
|
||||
//#endif /* ISPC_NVPTX_ENABLED */
|
||||
"__soa_to_aos4_float16",
|
||||
"__soa_to_aos4_float4",
|
||||
"__soa_to_aos4_float8",
|
||||
@@ -717,7 +733,7 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__vec4_add_float",
|
||||
"__vec4_add_int32",
|
||||
"__vselect_float",
|
||||
"__vselect_i32",
|
||||
//#ifdef ISPC_NVPTX_ENABLED
|
||||
"__program_index",
|
||||
"__program_count",
|
||||
"__warp_index",
|
||||
@@ -736,6 +752,8 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"ISPCAlloc",
|
||||
"ISPCLaunch",
|
||||
"ISPCSync",
|
||||
//#endif /* ISPC_NVPTX_ENABLED */
|
||||
"__vselect_i32"
|
||||
};
|
||||
|
||||
int count = sizeof(names) / sizeof(names[0]);
|
||||
@@ -808,7 +826,9 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
||||
g->target->getISA() != Target::NEON16 &&
|
||||
g->target->getISA() != Target::NEON8)
|
||||
#endif // !__arm__
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
{
|
||||
Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
|
||||
mTriple.getArch() == bcTriple.getArch());
|
||||
@@ -982,6 +1002,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
// Next, add the target's custom implementations of the various needed
|
||||
// builtin functions (e.g. __masked_store_32(), etc).
|
||||
switch (g->target->getISA()) {
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
case Target::NVPTX:
|
||||
{
|
||||
if (runtime32) {
|
||||
@@ -993,6 +1014,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
}
|
||||
break;
|
||||
};
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
case Target::NEON8: {
|
||||
if (runtime32) {
|
||||
@@ -1262,14 +1285,18 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
}
|
||||
|
||||
// define the 'programCount' builtin variable
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
|
||||
}
|
||||
else
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
lDefineConstantInt("programCount", 32, module, symbolTable);
|
||||
}
|
||||
else
|
||||
{
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
// define the 'programIndex' builtin
|
||||
lDefineProgramIndex(module, symbolTable);
|
||||
@@ -1301,9 +1328,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(),
|
||||
module, symbolTable);
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
lDefineConstantInt("__is_nvptx_target", (int)(g->target->getISA() == Target::NVPTX),
|
||||
module, symbolTable);
|
||||
|
||||
#else
|
||||
lDefineConstantInt("__is_nvptx_target", (int)0, module, symbolTable);
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
if (g->forceAlignment != -1) {
|
||||
llvm::GlobalVariable *alignment = module->getGlobalVariable("memory_alignment", true);
|
||||
alignment->setInitializer(LLVMInt32(g->forceAlignment));
|
||||
|
||||
257
ctx.cpp
257
ctx.cpp
@@ -57,8 +57,10 @@
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#include <llvm/IR/DerivedTypes.h>
|
||||
#endif
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
#include <llvm/Support/FormattedStream.h>
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
/** This is a small utility structure that records information related to one
|
||||
level of nested control flow. It's mostly used in correctly restoring
|
||||
@@ -1373,28 +1375,30 @@ FunctionEmitContext::None(llvm::Value *mask) {
|
||||
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::LaneMask(llvm::Value *v)
|
||||
{
|
||||
#if 1 /* this makes mandelbrot example slower with "nvptx" target. Need further investigation. */
|
||||
const char *__movmsk = g->target->getISA() == Target::NVPTX ? "__movmsk_ptx" : "__movmsk";
|
||||
FunctionEmitContext::LaneMask(llvm::Value *v) {
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
/* this makes mandelbrot example slower with "nvptx" target.
|
||||
* Needs further investigation. */
|
||||
const char *__movmsk = g->target->getISA() == Target::NVPTX ? "__movmsk_ptx" : "__movmsk";
|
||||
#else
|
||||
const char *__movmsk = "__movmsk";
|
||||
const char *__movmsk = "__movmsk";
|
||||
#endif
|
||||
// Call the target-dependent movmsk function to turn the vector mask
|
||||
// into an i64 value
|
||||
std::vector<Symbol *> mm;
|
||||
m->symbolTable->LookupFunction(__movmsk, &mm);
|
||||
if (g->target->getMaskBitCount() == 1)
|
||||
AssertPos(currentPos, mm.size() == 1);
|
||||
else
|
||||
// There should be one with signed int signature, one unsigned int.
|
||||
AssertPos(currentPos, mm.size() == 2);
|
||||
// We can actually call either one, since both are i32s as far as
|
||||
// LLVM's type system is concerned...
|
||||
llvm::Function *fmm = mm[0]->function;
|
||||
return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
|
||||
// Call the target-dependent movmsk function to turn the vector mask
|
||||
// into an i64 value
|
||||
std::vector<Symbol *> mm;
|
||||
m->symbolTable->LookupFunction(__movmsk, &mm);
|
||||
if (g->target->getMaskBitCount() == 1)
|
||||
AssertPos(currentPos, mm.size() == 1);
|
||||
else
|
||||
// There should be one with signed int signature, one unsigned int.
|
||||
AssertPos(currentPos, mm.size() == 2);
|
||||
// We can actually call either one, since both are i32s as far as
|
||||
// LLVM's type system is concerned...
|
||||
llvm::Function *fmm = mm[0]->function;
|
||||
return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
|
||||
}
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
bool lAppendInsertExtractName(llvm::Value *vector, std::string &funcName)
|
||||
{
|
||||
llvm::Type *type = vector->getType();
|
||||
@@ -1447,19 +1451,21 @@ FunctionEmitContext::Extract(llvm::Value *vector, llvm::Value *lane)
|
||||
llvm::Value *ret = llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock());
|
||||
return ret;
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
// Compare the two masks to get a vector of i1s
|
||||
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
||||
v1, v2, "v1==v2");
|
||||
return ExtractInst(cmp, 0); /* this works without calling All(..) in PTX. Why ?!? */
|
||||
}
|
||||
else
|
||||
{
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
// Compare the two masks to get a vector of i1s
|
||||
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
||||
v1, v2, "v1==v2");
|
||||
return ExtractInst(cmp, 0); /* this works without calling All(..) in PTX. Why ?!? */
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
#if 0
|
||||
// Compare the two masks to get a vector of i1s
|
||||
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
||||
@@ -1474,7 +1480,6 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
||||
return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2,
|
||||
LLVMGetName("equal", v1, v2));
|
||||
#endif
|
||||
}
|
||||
}
|
||||
|
||||
llvm::Value *
|
||||
@@ -1489,6 +1494,8 @@ FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
||||
|
||||
return index;
|
||||
}
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
llvm::Value *
|
||||
FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
|
||||
llvm::Function *func_program_index = m->module->getFunction("__program_index");
|
||||
@@ -1500,6 +1507,7 @@ FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
|
||||
#endif
|
||||
return index;
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
|
||||
llvm::Value *
|
||||
@@ -1919,7 +1927,6 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
|
||||
|
||||
if (name == NULL)
|
||||
name = LLVMGetName(value, "_ptr2int");
|
||||
|
||||
llvm::Type *type = LLVMTypes::PointerIntType;
|
||||
llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock);
|
||||
AddDebugPos(inst);
|
||||
@@ -3613,75 +3620,8 @@ llvm::Value *
|
||||
FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||
std::vector<llvm::Value *> &argVals,
|
||||
llvm::Value *launchCount[3]){
|
||||
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
if (callee == NULL) {
|
||||
AssertPos(currentPos, m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
launchedTasks = true;
|
||||
|
||||
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
|
||||
llvm::Type *argType =
|
||||
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
|
||||
AssertPos(currentPos, llvm::PointerType::classof(argType));
|
||||
llvm::PointerType *pt =
|
||||
llvm::dyn_cast<llvm::PointerType>(argType);
|
||||
AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
|
||||
llvm::StructType *argStructType =
|
||||
static_cast<llvm::StructType *>(pt->getElementType());
|
||||
|
||||
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
|
||||
AssertPos(currentPos, falloc != NULL);
|
||||
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
|
||||
if (structSize->getType() != LLVMTypes::Int64Type)
|
||||
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
|
||||
// targets, SizeOf returns a 32-bit value
|
||||
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
|
||||
"struct_size_to_64");
|
||||
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
|
||||
|
||||
std::vector<llvm::Value *> allocArgs;
|
||||
allocArgs.push_back(launchGroupHandlePtr);
|
||||
allocArgs.push_back(structSize);
|
||||
allocArgs.push_back(LLVMInt32(align));
|
||||
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
|
||||
llvm::Value *argmem = BitCastInst(voidmem, pt);
|
||||
|
||||
// Copy the values of the parameters into the appropriate place in
|
||||
// the argument block
|
||||
for (unsigned int i = 0; i < argVals.size(); ++i) {
|
||||
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
|
||||
// don't need to do masked store here, I think
|
||||
StoreInst(argVals[i], ptr);
|
||||
}
|
||||
|
||||
if (argStructType->getNumElements() == argVals.size() + 1) {
|
||||
// copy in the mask
|
||||
llvm::Value *mask = GetFullMask();
|
||||
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
|
||||
"funarg_mask");
|
||||
StoreInst(mask, ptr);
|
||||
}
|
||||
|
||||
// And emit the call to the user-supplied task launch function, passing
|
||||
// a pointer to the task function being called and a pointer to the
|
||||
// argument block we just filled in
|
||||
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
|
||||
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
|
||||
AssertPos(currentPos, flaunch != NULL);
|
||||
std::vector<llvm::Value *> args;
|
||||
args.push_back(launchGroupHandlePtr);
|
||||
args.push_back(fptr);
|
||||
args.push_back(voidmem);
|
||||
args.push_back(launchCount[0]);
|
||||
args.push_back(launchCount[1]);
|
||||
args.push_back(launchCount[2]);
|
||||
return CallInst(flaunch, NULL, args, "");
|
||||
}
|
||||
else /* NVPTX */
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
if (callee == NULL) {
|
||||
AssertPos(currentPos, m->errorCount > 0);
|
||||
@@ -3764,38 +3704,79 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||
llvm::Value *ret = CallInst(flaunch, NULL, args, "");
|
||||
return ret;
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
if (callee == NULL) {
|
||||
AssertPos(currentPos, m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
launchedTasks = true;
|
||||
|
||||
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
|
||||
llvm::Type *argType =
|
||||
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
|
||||
AssertPos(currentPos, llvm::PointerType::classof(argType));
|
||||
llvm::PointerType *pt =
|
||||
llvm::dyn_cast<llvm::PointerType>(argType);
|
||||
AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
|
||||
llvm::StructType *argStructType =
|
||||
static_cast<llvm::StructType *>(pt->getElementType());
|
||||
|
||||
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
|
||||
AssertPos(currentPos, falloc != NULL);
|
||||
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
|
||||
if (structSize->getType() != LLVMTypes::Int64Type)
|
||||
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
|
||||
// targets, SizeOf returns a 32-bit value
|
||||
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
|
||||
"struct_size_to_64");
|
||||
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
|
||||
|
||||
std::vector<llvm::Value *> allocArgs;
|
||||
allocArgs.push_back(launchGroupHandlePtr);
|
||||
allocArgs.push_back(structSize);
|
||||
allocArgs.push_back(LLVMInt32(align));
|
||||
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
|
||||
llvm::Value *argmem = BitCastInst(voidmem, pt);
|
||||
|
||||
// Copy the values of the parameters into the appropriate place in
|
||||
// the argument block
|
||||
for (unsigned int i = 0; i < argVals.size(); ++i) {
|
||||
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
|
||||
// don't need to do masked store here, I think
|
||||
StoreInst(argVals[i], ptr);
|
||||
}
|
||||
|
||||
if (argStructType->getNumElements() == argVals.size() + 1) {
|
||||
// copy in the mask
|
||||
llvm::Value *mask = GetFullMask();
|
||||
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
|
||||
"funarg_mask");
|
||||
StoreInst(mask, ptr);
|
||||
}
|
||||
|
||||
// And emit the call to the user-supplied task launch function, passing
|
||||
// a pointer to the task function being called and a pointer to the
|
||||
// argument block we just filled in
|
||||
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
|
||||
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
|
||||
AssertPos(currentPos, flaunch != NULL);
|
||||
std::vector<llvm::Value *> args;
|
||||
args.push_back(launchGroupHandlePtr);
|
||||
args.push_back(fptr);
|
||||
args.push_back(voidmem);
|
||||
args.push_back(launchCount[0]);
|
||||
args.push_back(launchCount[1]);
|
||||
args.push_back(launchCount[2]);
|
||||
return CallInst(flaunch, NULL, args, "");
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
FunctionEmitContext::SyncInst() {
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
||||
llvm::Value *nullPtrValue =
|
||||
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
|
||||
llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
|
||||
llvm::CmpInst::ICMP_NE,
|
||||
launchGroupHandle, nullPtrValue);
|
||||
llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
|
||||
llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
|
||||
BranchInst(bSync, bPostSync, nonNull);
|
||||
|
||||
SetCurrentBasicBlock(bSync);
|
||||
llvm::Function *fsync = m->module->getFunction("ISPCSync");
|
||||
if (fsync == NULL)
|
||||
FATAL("Couldn't find ISPCSync declaration?!");
|
||||
CallInst(fsync, NULL, launchGroupHandle, "");
|
||||
|
||||
// zero out the handle so that if ISPCLaunch is called again in this
|
||||
// function, it knows it's starting out from scratch
|
||||
StoreInst(nullPtrValue, launchGroupHandlePtr);
|
||||
|
||||
BranchInst(bPostSync);
|
||||
|
||||
SetCurrentBasicBlock(bPostSync);
|
||||
}
|
||||
else /* NVPTX: don't do test, just call sync */
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
||||
llvm::Value *nullPtrValue =
|
||||
@@ -3805,7 +3786,33 @@ FunctionEmitContext::SyncInst() {
|
||||
FATAL("Couldn't find ISPCSync declaration?!");
|
||||
CallInst(fsync, NULL, launchGroupHandle, "");
|
||||
StoreInst(nullPtrValue, launchGroupHandlePtr);
|
||||
return;
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
||||
llvm::Value *nullPtrValue =
|
||||
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
|
||||
llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
|
||||
llvm::CmpInst::ICMP_NE,
|
||||
launchGroupHandle, nullPtrValue);
|
||||
llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
|
||||
llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
|
||||
BranchInst(bSync, bPostSync, nonNull);
|
||||
|
||||
SetCurrentBasicBlock(bSync);
|
||||
llvm::Function *fsync = m->module->getFunction("ISPCSync");
|
||||
if (fsync == NULL)
|
||||
FATAL("Couldn't find ISPCSync declaration?!");
|
||||
CallInst(fsync, NULL, launchGroupHandle, "");
|
||||
|
||||
// zero out the handle so that if ISPCLaunch is called again in this
|
||||
// function, it knows it's starting out from scratch
|
||||
StoreInst(nullPtrValue, launchGroupHandlePtr);
|
||||
|
||||
BranchInst(bPostSync);
|
||||
|
||||
SetCurrentBasicBlock(bPostSync);
|
||||
}
|
||||
|
||||
|
||||
|
||||
20
ctx.h
20
ctx.h
@@ -291,21 +291,21 @@ public:
|
||||
of the mask is on. */
|
||||
llvm::Value *LaneMask(llvm::Value *mask);
|
||||
|
||||
/** Given two masks of type LLVMTypes::MaskType, return an i1 value
|
||||
that indicates whether the two masks are equal. */
|
||||
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
|
||||
|
||||
/** generate constantvector, which contains programindex, i.e.
|
||||
< i32 0, i32 1, i32 2, i32 3> */
|
||||
llvm::Value *ProgramIndexVector(bool is32bits = true);
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
llvm::Value *ProgramIndexVectorPTX(bool is32bits = true);
|
||||
|
||||
/** Issues a call to __insert_int8/int16/int32/int64/float/double */
|
||||
llvm::Value* Insert(llvm::Value *vector, llvm::Value *lane, llvm::Value *scalar);
|
||||
/** Issues a call to __extract_int8/int16/int32/int64/float/double */
|
||||
llvm::Value* Extract(llvm::Value *vector, llvm::Value *lane);
|
||||
|
||||
|
||||
/** Given two masks of type LLVMTypes::MaskType, return an i1 value
|
||||
that indicates whether the two masks are equal. */
|
||||
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
|
||||
|
||||
/** Generate ConstantVector, which contains ProgramIndex, i.e.
|
||||
< i32 0, i32 1, i32 2, i32 3> */
|
||||
llvm::Value *ProgramIndexVector(bool is32bits = true);
|
||||
llvm::Value *ProgramIndexVectorPTX(bool is32bits = true);
|
||||
#endif
|
||||
|
||||
/** Given a string, create an anonymous global variable to hold its
|
||||
value and return the pointer to the string. */
|
||||
|
||||
12
decl.cpp
12
decl.cpp
@@ -168,6 +168,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
|
||||
retType = lApplyTypeQualifiers(typeQualifiers, retType, pos);
|
||||
|
||||
if (soaWidth > 0) {
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
#if 0 /* see stmt.cpp in DeclStmt::EmitCode for work-around of SOAType Declaration */
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
@@ -175,6 +176,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
|
||||
return NULL;
|
||||
}
|
||||
#endif
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
const StructType *st = CastType<StructType>(retType);
|
||||
|
||||
if (st == NULL) {
|
||||
@@ -409,6 +411,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
#if 0 /* NVPTX */
|
||||
if (baseType->IsUniformType())
|
||||
{
|
||||
@@ -416,6 +419,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
||||
baseType->IsArrayType() ? " true " : " false ");
|
||||
}
|
||||
#endif
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
const Type *arrayType = new ArrayType(baseType, arraySize);
|
||||
if (child != NULL) {
|
||||
child->InitFromType(arrayType, ds);
|
||||
@@ -544,9 +548,9 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
||||
|
||||
returnType = returnType->ResolveUnboundVariability(Variability::Varying);
|
||||
|
||||
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
|
||||
bool isExternC = ds && (ds->storageClass == SC_EXTERN_C);
|
||||
bool isExported = ds && ((ds->typeQualifiers & TYPEQUAL_EXPORT) != 0);
|
||||
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
|
||||
bool isUnmasked = ds && ((ds->typeQualifiers & TYPEQUAL_UNMASKED) != 0);
|
||||
|
||||
if (isExported && isTask) {
|
||||
@@ -555,9 +559,9 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
||||
return;
|
||||
}
|
||||
if (isExternC && isTask) {
|
||||
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
|
||||
"qualifiers");
|
||||
return;
|
||||
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
|
||||
"qualifiers");
|
||||
return;
|
||||
}
|
||||
if (isExternC && isExported) {
|
||||
Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" "
|
||||
|
||||
4
expr.cpp
4
expr.cpp
@@ -7880,12 +7880,14 @@ SizeOfExpr::TypeCheck() {
|
||||
"struct type \"%s\".", type->GetString().c_str());
|
||||
return NULL;
|
||||
}
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (type != NULL)
|
||||
if (g->target->getISA() == Target::NVPTX && type->IsVaryingType())
|
||||
{
|
||||
Error(pos, "\"sizeof\" with varying data types is not yet supported with \"nvptx\" target.");
|
||||
return NULL;
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
return this;
|
||||
}
|
||||
@@ -8718,11 +8720,13 @@ NewExpr::TypeCheck() {
|
||||
AssertPos(pos, m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX && allocType->IsVaryingType())
|
||||
{
|
||||
Error(pos, "\"new\" with varying data types is not yet supported with \"nvptx\" target.");
|
||||
return NULL;
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
if (CastType<UndefinedStructType>(allocType) != NULL) {
|
||||
Error(pos, "Can't dynamically allocate storage for declared "
|
||||
"but not defined type \"%s\".", allocType->GetString().c_str());
|
||||
|
||||
26
func.cpp
26
func.cpp
@@ -47,7 +47,9 @@
|
||||
#include <stdio.h>
|
||||
|
||||
#if defined(LLVM_3_1) || defined(LLVM_3_2)
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
#include <llvm/Metadata.h>
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
#include <llvm/LLVMContext.h>
|
||||
#include <llvm/Module.h>
|
||||
#include <llvm/Type.h>
|
||||
@@ -55,7 +57,9 @@
|
||||
#include <llvm/Intrinsics.h>
|
||||
#include <llvm/DerivedTypes.h>
|
||||
#else
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
#include <llvm/IR/Metadata.h>
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
#include <llvm/IR/LLVMContext.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/IR/Type.h>
|
||||
@@ -131,7 +135,11 @@ Function::Function(Symbol *s, Stmt *c) {
|
||||
sym->parentFunction = this;
|
||||
}
|
||||
|
||||
if (type->isTask && g->target->getISA() != Target::NVPTX) {
|
||||
if (type->isTask
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
&& (g->target->getISA() != Target::NVPTX)
|
||||
#endif
|
||||
){
|
||||
threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
|
||||
Assert(threadIndexSym);
|
||||
threadCountSym = m->symbolTable->LookupVariable("threadCount");
|
||||
@@ -242,7 +250,11 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
#endif
|
||||
const FunctionType *type = CastType<FunctionType>(sym->type);
|
||||
Assert(type != NULL);
|
||||
if (type->isTask == true && g->target->getISA() != Target::NVPTX) {
|
||||
if (type->isTask == true
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
&& (g->target->getISA() != Target::NVPTX)
|
||||
#endif
|
||||
){
|
||||
// For tasks, there should always be three parameters: the
|
||||
// pointer to the structure that holds all of the arguments, the
|
||||
// thread index, and the thread count variables.
|
||||
@@ -340,6 +352,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
ctx->SetFunctionMask(argIter);
|
||||
Assert(++argIter == function->arg_end());
|
||||
}
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (type->isTask == true && g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
llvm::NamedMDNode* annotations =
|
||||
@@ -350,6 +363,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
av.push_back(LLVMInt32(1));
|
||||
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
}
|
||||
|
||||
// Finally, we can generate code for the function
|
||||
@@ -505,15 +519,14 @@ Function::GenerateIR() {
|
||||
// the application can call it
|
||||
const FunctionType *type = CastType<FunctionType>(sym->type);
|
||||
Assert(type != NULL);
|
||||
if (type->isExported) {
|
||||
if (type->isExported) {
|
||||
if (!type->isTask) {
|
||||
llvm::FunctionType *ftype = type->LLVMFunctionType(g->ctx, true);
|
||||
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
|
||||
std::string functionName = sym->name;
|
||||
|
||||
if (g->mangleFunctionsWithTarget)
|
||||
functionName += std::string("_") + g->target->GetISAString();
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
functionName += std::string("___export"); /* add ___export to the end, for ptxcc to recognize it is exported */
|
||||
@@ -527,6 +540,7 @@ Function::GenerateIR() {
|
||||
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
||||
#endif
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
llvm::Function *appFunction =
|
||||
llvm::Function::Create(ftype, linkage, functionName.c_str(), m->module);
|
||||
#if defined(LLVM_3_1)
|
||||
@@ -566,6 +580,7 @@ Function::GenerateIR() {
|
||||
FATAL("Function verificication failed");
|
||||
}
|
||||
}
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
llvm::NamedMDNode* annotations =
|
||||
@@ -576,6 +591,7 @@ Function::GenerateIR() {
|
||||
av.push_back(llvm::ConstantInt::get(llvm::IntegerType::get(*g->ctx,32), 1));
|
||||
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
19
ispc.cpp
19
ispc.cpp
@@ -247,9 +247,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
arch = "arm";
|
||||
else
|
||||
#endif
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if(!strncmp(isa, "nvptx", 5))
|
||||
arch = "nvptx64";
|
||||
else
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
arch = "x86-64";
|
||||
}
|
||||
|
||||
@@ -587,6 +589,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_maskBitCount = 32;
|
||||
}
|
||||
#endif
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
else if (!strcasecmp(isa, "nvptx"))
|
||||
{
|
||||
this->m_isa = Target::NVPTX;
|
||||
@@ -602,6 +605,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
this->m_hasGather = this->m_hasScatter = false;
|
||||
cpuFromIsa = "sm_35";
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
else {
|
||||
Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.",
|
||||
isa, SupportedTargets());
|
||||
@@ -720,8 +724,10 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
||||
// Initialize target-specific "target-feature" attribute.
|
||||
if (!m_attributes.empty()) {
|
||||
llvm::AttrBuilder attrBuilder;
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (m_isa != Target::NVPTX)
|
||||
attrBuilder.addAttribute("target-cpu", this->m_cpu);
|
||||
#endif
|
||||
attrBuilder.addAttribute("target-cpu", this->m_cpu);
|
||||
attrBuilder.addAttribute("target-features", this->m_attributes);
|
||||
this->m_tf_attributes = new llvm::AttributeSet(
|
||||
llvm::AttributeSet::get(
|
||||
@@ -768,6 +774,9 @@ Target::SupportedTargets() {
|
||||
return
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
"neon-i8x16, neon-i16x8, neon-i32x4, "
|
||||
#endif
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
"nvptx, "
|
||||
#endif
|
||||
"sse2-i32x4, sse2-i32x8, "
|
||||
"sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
|
||||
@@ -776,7 +785,7 @@ Target::SupportedTargets() {
|
||||
"avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4 "
|
||||
"avx2-i32x8, avx2-i32x16, avx2-i64x4, "
|
||||
"generic-x1, generic-x4, generic-x8, generic-x16, "
|
||||
"generic-x32, generic-x64, nvptx";
|
||||
"generic-x32, generic-x64";
|
||||
}
|
||||
|
||||
|
||||
@@ -803,8 +812,10 @@ Target::GetTripleString() const {
|
||||
triple.setArchName("i386");
|
||||
else if (m_arch == "x86-64")
|
||||
triple.setArchName("x86_64");
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
else if (m_arch == "nvptx64")
|
||||
triple = llvm::Triple("nvptx64", "nvidia", "cuda");
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
else
|
||||
triple.setArchName(m_arch);
|
||||
}
|
||||
@@ -837,8 +848,10 @@ Target::ISAToString(ISA isa) {
|
||||
return "avx2";
|
||||
case Target::GENERIC:
|
||||
return "generic";
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
case Target::NVPTX:
|
||||
return "nvptx";
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
default:
|
||||
FATAL("Unhandled target in ISAToString()");
|
||||
}
|
||||
@@ -877,8 +890,10 @@ Target::ISAToTargetString(ISA isa) {
|
||||
return "avx2-i32x8";
|
||||
case Target::GENERIC:
|
||||
return "generic-4";
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
case Target::NVPTX:
|
||||
return "nvptx";
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
default:
|
||||
FATAL("Unhandled target in ISAToTargetString()");
|
||||
}
|
||||
|
||||
6
ispc.h
6
ispc.h
@@ -179,7 +179,10 @@ public:
|
||||
flexible/performant of them will apear last in the enumerant. Note
|
||||
also that __best_available_isa() needs to be updated if ISAs are
|
||||
added or the enumerant values are reordered. */
|
||||
enum ISA { NVPTX,
|
||||
enum ISA {
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
NVPTX,
|
||||
#endif
|
||||
#ifdef ISPC_ARM_ENABLED
|
||||
NEON32, NEON16, NEON8,
|
||||
#endif
|
||||
@@ -611,7 +614,6 @@ struct Globals {
|
||||
/** Indicates that alignment in memory allocation routines should be
|
||||
forced to have given value. -1 value means natural alignment for the platforms. */
|
||||
int forceAlignment;
|
||||
std::string PtxString;
|
||||
};
|
||||
|
||||
enum {
|
||||
|
||||
2
main.cpp
2
main.cpp
@@ -320,10 +320,12 @@ int main(int Argc, char *Argv[]) {
|
||||
LLVMInitializeARMTargetMC();
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
LLVMInitializeNVPTXTargetInfo();
|
||||
LLVMInitializeNVPTXTarget();
|
||||
LLVMInitializeNVPTXAsmPrinter();
|
||||
LLVMInitializeNVPTXTargetMC();
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
char *file = NULL;
|
||||
const char *headerFileName = NULL;
|
||||
|
||||
82
module.cpp
82
module.cpp
@@ -58,7 +58,9 @@
|
||||
#include <set>
|
||||
#include <sstream>
|
||||
#include <iostream>
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
#include <map>
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#include <windows.h>
|
||||
#include <io.h>
|
||||
@@ -72,7 +74,9 @@
|
||||
#include <llvm/Instructions.h>
|
||||
#include <llvm/Intrinsics.h>
|
||||
#include <llvm/DerivedTypes.h>
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
#include "llvm/Assembly/AssemblyAnnotationWriter.h"
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
#else
|
||||
#include <llvm/IR/LLVMContext.h>
|
||||
#include <llvm/IR/Module.h>
|
||||
@@ -80,7 +84,9 @@
|
||||
#include <llvm/IR/Instructions.h>
|
||||
#include <llvm/IR/Intrinsics.h>
|
||||
#include <llvm/IR/DerivedTypes.h>
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
#include "llvm/Assembly/AssemblyAnnotationWriter.h"
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
#endif
|
||||
#include <llvm/PassManager.h>
|
||||
#include <llvm/PassRegistry.h>
|
||||
@@ -446,6 +452,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
|
||||
return;
|
||||
}
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX &&
|
||||
#if 0
|
||||
!type->IsConstType() &&
|
||||
@@ -476,7 +483,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
|
||||
type = new ArrayType(type->GetAsUniformType(), nel);
|
||||
#endif
|
||||
}
|
||||
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
llvm::Type *llvmType = type->LLVMType(g->ctx);
|
||||
if (llvmType == NULL)
|
||||
@@ -677,6 +684,7 @@ lCheckExportedParameterTypes(const Type *type, const std::string &name,
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
static void
|
||||
lCheckTaskParameterTypes(const Type *type, const std::string &name,
|
||||
SourcePos pos) {
|
||||
@@ -691,7 +699,7 @@ lCheckTaskParameterTypes(const Type *type, const std::string &name,
|
||||
name.c_str());
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
/** Given a function type, loop through the function parameters and see if
|
||||
any are StructTypes. If so, issue an error; this is currently broken
|
||||
@@ -849,8 +857,12 @@ Module::AddFunctionDeclaration(const std::string &name,
|
||||
#else // LLVM 3.1 and 3.3+
|
||||
function->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
#endif
|
||||
/* evghenii: fails function verification when "if" executed in nvptx target */
|
||||
if (functionType->isTask && g->target->getISA() != Target::NVPTX)
|
||||
|
||||
if (functionType->isTask)
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
/* evghenii: fails function verification when "if" executed in nvptx target */
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
// This also applies transitively to members I think?
|
||||
#if defined(LLVM_3_1)
|
||||
function->setDoesNotAlias(1, true);
|
||||
@@ -871,12 +883,14 @@ Module::AddFunctionDeclaration(const std::string &name,
|
||||
functionType->GetReturnType()->IsVoidType() == false)
|
||||
Error(pos, "Task-qualified functions must have void return type.");
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX &&
|
||||
Type::Equal(functionType->GetReturnType(), AtomicType::Void) == false &&
|
||||
functionType->isExported)
|
||||
{
|
||||
Error(pos, "Export-qualified functions must have void return type with \"nvptx\" target.");
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
if (functionType->isExported || functionType->isExternC)
|
||||
lCheckForStructParameters(functionType, pos);
|
||||
@@ -897,9 +911,12 @@ Module::AddFunctionDeclaration(const std::string &name,
|
||||
if (functionType->isExported) {
|
||||
lCheckExportedParameterTypes(argType, argName, argPos);
|
||||
}
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (functionType->isTask) {
|
||||
lCheckTaskParameterTypes(argType, argName, argPos);
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
// ISPC assumes that no pointers alias. (It should be possible to
|
||||
// specify when this is not the case, but this should be the
|
||||
@@ -1027,24 +1044,28 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
|
||||
const char *fileType = NULL;
|
||||
switch (outputType) {
|
||||
case Asm:
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
if (strcasecmp(suffix, "s"))
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
if (strcasecmp(suffix, "ptx"))
|
||||
fileType = "assembly";
|
||||
}
|
||||
else
|
||||
if (strcasecmp(suffix, "ptx"))
|
||||
}
|
||||
else
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
if (strcasecmp(suffix, "s"))
|
||||
fileType = "assembly";
|
||||
break;
|
||||
case Bitcode:
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
if (strcasecmp(suffix, "bc"))
|
||||
fileType = "LLVM bitcode";
|
||||
}
|
||||
else
|
||||
if (strcasecmp(suffix, "ll"))
|
||||
fileType = "LLVM assembly";
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
if (strcasecmp(suffix, "ll"))
|
||||
fileType = "LLVM assembly";
|
||||
}
|
||||
else
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
if (strcasecmp(suffix, "bc"))
|
||||
fileType = "LLVM bitcode";
|
||||
break;
|
||||
case Object:
|
||||
if (strcasecmp(suffix, "o") && strcasecmp(suffix, "obj"))
|
||||
@@ -1113,6 +1134,7 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
|
||||
return writeObjectFileOrAssembly(outputType, outFileName);
|
||||
}
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
typedef std::vector<std::string> vecString_t;
|
||||
static vecString_t
|
||||
lSplitString(const std::string &s)
|
||||
@@ -1180,6 +1202,7 @@ lFixAttributes(const vecString_t &src, vecString_t &dst)
|
||||
dst.push_back(s);
|
||||
}
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
bool
|
||||
Module::writeBitcode(llvm::Module *module, const char *outFileName) {
|
||||
@@ -1204,11 +1227,8 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
|
||||
}
|
||||
|
||||
llvm::raw_fd_ostream fos(fd, (fd != 1), false);
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
llvm::WriteBitcodeToFile(module, fos);
|
||||
}
|
||||
else
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
/* when using "nvptx" target, emit patched/hacked assembly
|
||||
* NVPTX only accepts 3.2-style LLVM assembly, where attributes
|
||||
@@ -1240,7 +1260,9 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
|
||||
fos << *it;
|
||||
}
|
||||
}
|
||||
|
||||
else
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
llvm::WriteBitcodeToFile(module, fos);
|
||||
|
||||
return true;
|
||||
}
|
||||
@@ -2275,6 +2297,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
|
||||
opts.addMacroDef(g->cppArgs[i].substr(2));
|
||||
}
|
||||
}
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
opts.addMacroDef("__NVPTX__");
|
||||
@@ -2295,6 +2318,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
|
||||
opts.addMacroDef("taskCount2=__taskCount2()");
|
||||
opts.addMacroDef("taskCount=__taskCount()");
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
inst.getLangOpts().LineComment = 1;
|
||||
#if defined(LLVM_3_5)
|
||||
@@ -2740,6 +2764,7 @@ lCreateDispatchModule(std::map<std::string, FunctionTargetVariants> &functions)
|
||||
return module;
|
||||
}
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
static std::string lCBEMangle(const std::string &S) {
|
||||
std::string Result;
|
||||
|
||||
@@ -2762,7 +2787,7 @@ static std::string lCBEMangle(const std::string &S) {
|
||||
}
|
||||
return Result;
|
||||
}
|
||||
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
int
|
||||
Module::CompileAndOutput(const char *srcFile,
|
||||
@@ -2778,7 +2803,7 @@ Module::CompileAndOutput(const char *srcFile,
|
||||
const char *hostStubFileName,
|
||||
const char *devStubFileName)
|
||||
{
|
||||
if (target == NULL || strchr(target, ',') == NULL) {
|
||||
if (target == NULL || strchr(target, ',') == NULL) {
|
||||
// We're only compiling to a single target
|
||||
g->target = new Target(arch, cpu, target, generatePIC);
|
||||
if (!g->target->isValid())
|
||||
@@ -2786,7 +2811,7 @@ Module::CompileAndOutput(const char *srcFile,
|
||||
|
||||
m = new Module(srcFile);
|
||||
if (m->CompileFile() == 0) {
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
/* NVPTX:
|
||||
* for PTX target replace '.' with '_' in all global variables
|
||||
* a PTX identifier name must match [a-zA-Z$_][a-zA-Z$_0-9]*
|
||||
@@ -2811,7 +2836,7 @@ Module::CompileAndOutput(const char *srcFile,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
if (outputType == CXX) {
|
||||
if (target == NULL || strncmp(target, "generic-", 8) != 0) {
|
||||
Error(SourcePos(), "When generating C++ output, one of the \"generic-*\" "
|
||||
@@ -3014,5 +3039,4 @@ Module::CompileAndOutput(const char *srcFile,
|
||||
|
||||
return errorCount > 0;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
17
opt.cpp
17
opt.cpp
@@ -55,7 +55,9 @@
|
||||
#include <llvm/Function.h>
|
||||
#include <llvm/BasicBlock.h>
|
||||
#include <llvm/Constants.h>
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
#include <llvm/InlineAsm.h>
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
#else
|
||||
#include <llvm/IR/Module.h>
|
||||
#include <llvm/IR/Instructions.h>
|
||||
@@ -63,7 +65,9 @@
|
||||
#include <llvm/IR/Function.h>
|
||||
#include <llvm/IR/BasicBlock.h>
|
||||
#include <llvm/IR/Constants.h>
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
#include <llvm/IR/InlineAsm.h>
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
#endif
|
||||
#if defined (LLVM_3_4) || defined(LLVM_3_5)
|
||||
#include <llvm/Transforms/Instrumentation.h>
|
||||
@@ -131,7 +135,9 @@ static llvm::Pass *CreateDebugPass(char * output);
|
||||
static llvm::Pass *CreateReplaceStdlibShiftPass();
|
||||
|
||||
static llvm::Pass *CreateFixBooleanSelectPass();
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
static llvm::Pass *CreatePromoteLocalToPrivatePass();
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
#define DEBUG_START_PASS(NAME) \
|
||||
if (g->debugPrint && \
|
||||
@@ -495,9 +501,11 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
// run absolutely no optimizations, since the front-end needs us to
|
||||
// take the various __pseudo_* functions it has emitted and turn
|
||||
// them into something that can actually execute.
|
||||
|
||||
optPM.add(CreateImproveMemoryOpsPass(), 100);
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->opt.disableGatherScatterOptimizations == false &&
|
||||
g->target->getVectorWidth() > 1)
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
optPM.add(CreateImproveMemoryOpsPass(), 100);
|
||||
|
||||
if (g->opt.disableHandlePseudoMemoryOps == false)
|
||||
@@ -579,7 +587,9 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
optPM.add(llvm::createReassociatePass());
|
||||
optPM.add(llvm::createIPConstantPropagationPass());
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
optPM.add(CreateReplaceStdlibShiftPass(),229);
|
||||
|
||||
optPM.add(llvm::createDeadArgEliminationPass(),230);
|
||||
@@ -693,7 +703,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
|
||||
// Should be the last
|
||||
optPM.add(CreateFixBooleanSelectPass(), 400);
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
optPM.add(CreatePromoteLocalToPrivatePass());
|
||||
@@ -799,6 +809,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
optPM.add(llvm::createConstantMergePass());
|
||||
#endif
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
}
|
||||
|
||||
// Finish up by making sure we didn't mess anything up in the IR along
|
||||
@@ -5410,6 +5421,7 @@ CreateFixBooleanSelectPass() {
|
||||
return new FixBooleanSelectPass();
|
||||
}
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
// Detect addrspace(3)
|
||||
///////////////////////////////////////////////////////////////////////////////
|
||||
@@ -5498,4 +5510,5 @@ CreatePromoteLocalToPrivatePass() {
|
||||
|
||||
|
||||
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
|
||||
@@ -45,11 +45,13 @@ then
|
||||
# $($LLVMAS $1 -o $TMPDIR/$fbname.bc) && $($LLVMDIS $TMPDIR/$fbname.bc -o $TMPDIR/$fbname.ll) && $($PTXGEN $TMPDIR/$fbname.ll -o $TMPDIR/$fbname.ptx) && \
|
||||
$($PTXGEN $1 -o $TMPDIR/$fbname.ptx) && \
|
||||
$($PTXCC $TMPDIR/$fbname.ptx -o $TMPDIR/$fbname.o -Xnvcc="-G") && \
|
||||
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS);
|
||||
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS) && \
|
||||
$(/bin/rm -rf $TMPDIR/*$fbname*);
|
||||
else
|
||||
$(sed 's/\.b0/\.b32/g' $1 > $TMPDIR/$fbname) && \
|
||||
$($PTXCC $TMPDIR/$fbname -o $TMPDIR/$fbname.o -Xnvcc="-G") && \
|
||||
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS);
|
||||
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS) && \
|
||||
$(/bin/rm -rf $TMPDIR/*$fbname*);
|
||||
fi
|
||||
|
||||
|
||||
|
||||
@@ -233,7 +233,7 @@ def run_test(testname):
|
||||
elif is_nvptx_target:
|
||||
if os.environ.get("NVVM") == "1":
|
||||
is_nvptx_nvvm = True
|
||||
obj_name = "%s.bc" % testname
|
||||
obj_name = "%s.ll" % testname
|
||||
else:
|
||||
obj_name = "%s.ptx" % testname
|
||||
is_nvptx_nvvm = False
|
||||
|
||||
700
stmt.cpp
700
stmt.cpp
@@ -142,6 +142,7 @@ lHasUnsizedArrays(const Type *type) {
|
||||
return lHasUnsizedArrays(at->GetElementType());
|
||||
}
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value, const SourcePos ¤tPos, const bool variable = false)
|
||||
{
|
||||
if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX)
|
||||
@@ -198,6 +199,7 @@ static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *
|
||||
|
||||
return value;
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
void
|
||||
DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
@@ -261,9 +263,8 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
if (sym->storageClass == SC_STATIC) {
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType())
|
||||
{
|
||||
Error(sym->pos,
|
||||
@@ -279,7 +280,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
PerformanceWarning(sym->pos,
|
||||
"\"const static uniform\" variable ""\"%s\" is stored in __constant address space with ""\"nvptx\" target.",
|
||||
sym->name.c_str());
|
||||
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
// For static variables, we need a compile-time constant value
|
||||
// for its initializer; if there's no initializer, we use a
|
||||
// zero value.
|
||||
@@ -307,28 +308,38 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
if (cinit == NULL)
|
||||
cinit = llvm::Constant::getNullValue(llvmType);
|
||||
|
||||
// Allocate space for the static variable in global scope, so
|
||||
// that it persists across function calls
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
int addressSpace = 0;
|
||||
if (g->target->getISA() == Target::NVPTX &&
|
||||
sym->type->IsConstType() &&
|
||||
sym->type->IsUniformType())
|
||||
addressSpace = 4;
|
||||
|
||||
// Allocate space for the static variable in global scope, so
|
||||
// that it persists across function calls
|
||||
sym->storagePtr =
|
||||
new llvm::GlobalVariable(*m->module, llvmType,
|
||||
sym->type->IsConstType(),
|
||||
llvm::GlobalValue::InternalLinkage, cinit,
|
||||
llvm::Twine("static_") +
|
||||
llvm::Twine("static.") +
|
||||
llvm::Twine(sym->pos.first_line) +
|
||||
llvm::Twine("_") + sym->name.c_str(),
|
||||
llvm::Twine(".") + sym->name.c_str(),
|
||||
NULL,
|
||||
llvm::GlobalVariable::NotThreadLocal,
|
||||
addressSpace);
|
||||
sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
|
||||
#else /* ISPC_NVPTX_ENABLED */
|
||||
sym->storagePtr =
|
||||
new llvm::GlobalVariable(*m->module, llvmType,
|
||||
sym->type->IsConstType(),
|
||||
llvm::GlobalValue::InternalLinkage, cinit,
|
||||
llvm::Twine("static.") +
|
||||
llvm::Twine(sym->pos.first_line) +
|
||||
llvm::Twine(".") + sym->name.c_str());
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
// Tell the FunctionEmitContext about the variable
|
||||
ctx->EmitVariableDebugInfo(sym);
|
||||
}
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
else if ((sym->type->IsUniformType() || sym->type->IsSOAType()) &&
|
||||
/* NVPTX:
|
||||
* only non-constant uniform data types are stored in shared memory
|
||||
@@ -396,6 +407,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
sym->parentFunction = ctx->GetFunction();
|
||||
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
else
|
||||
{
|
||||
// For non-static variables, allocate storage on the stack
|
||||
@@ -404,7 +416,6 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// Tell the FunctionEmitContext about the variable; must do
|
||||
// this before the initializer stuff.
|
||||
ctx->EmitVariableDebugInfo(sym);
|
||||
|
||||
if (initExpr == 0 && sym->type->IsConstType())
|
||||
Error(sym->pos, "Missing initializer for const variable "
|
||||
"\"%s\".", sym->name.c_str());
|
||||
@@ -412,7 +423,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// And then get it initialized...
|
||||
sym->parentFunction = ctx->GetFunction();
|
||||
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
@@ -571,7 +582,7 @@ IfStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
if (testValue == NULL)
|
||||
return;
|
||||
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
#if 0
|
||||
if (!isUniform && g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
@@ -582,7 +593,7 @@ IfStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
isUniform = true;
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
if (isUniform) {
|
||||
ctx->StartUniformIf();
|
||||
@@ -865,11 +876,17 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
|
||||
// Do any of the program instances want to run the 'true'
|
||||
// block? If not, jump ahead to bNext.
|
||||
#if 1
|
||||
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
|
||||
#else
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
#if 0
|
||||
llvm::Value *maskAnyTrueQ = ctx->ExtractInst(ctx->GetFullMask(),0);
|
||||
#else
|
||||
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
|
||||
#endif
|
||||
#else /* ISPC_NVPTX_ENABLED */
|
||||
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
ctx->BranchInst(bRunTrue, bNext, maskAnyTrueQ);
|
||||
|
||||
// Emit statements for true
|
||||
@@ -886,11 +903,16 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
|
||||
// Similarly, check to see if any of the instances want to
|
||||
// run the 'false' block...
|
||||
#if 1
|
||||
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
|
||||
#else
|
||||
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
#if 0
|
||||
llvm::Value *maskAnyFalseQ = ctx->ExtractInst(ctx->GetFullMask(),0);
|
||||
#else
|
||||
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
|
||||
#endif
|
||||
#else /* ISPC_NVPTX_ENABLED */
|
||||
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
ctx->BranchInst(bRunFalse, bDone, maskAnyFalseQ);
|
||||
|
||||
// Emit code for false
|
||||
@@ -1450,10 +1472,96 @@ static llvm::Value *
|
||||
lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
|
||||
llvm::Value *uniformCounterPtr,
|
||||
llvm::Value *varyingCounterPtr,
|
||||
const std::vector<int> &spans)
|
||||
{
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
const std::vector<int> &spans) {
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
// Smear the uniform counter value out to be varying
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
||||
llvm::Value *smearCounter = ctx->BroadcastValue(
|
||||
counter, LLVMTypes::Int32VectorType, "smear_counter");
|
||||
|
||||
// Figure out the offsets; this is a little bit tricky. As an example,
|
||||
// consider a 2D tiled foreach loop, where we're running 8-wide and
|
||||
// where the inner dimension has a stride of 4 and the outer dimension
|
||||
// has a stride of 2. For the inner dimension, we want the offsets
|
||||
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
|
||||
// (0,0,0,0,1,1,1,1).
|
||||
int32_t delta[ISPC_MAX_NVEC];
|
||||
const int vecWidth = 32;
|
||||
std::vector<llvm::Constant*> constDeltaList;
|
||||
for (int i = 0; i < vecWidth; ++i)
|
||||
{
|
||||
int d = i;
|
||||
// First, account for the effect of any dimensions at deeper
|
||||
// nesting levels than the current one.
|
||||
int prevDimSpanCount = 1;
|
||||
for (int j = dim; j < nDims-1; ++j)
|
||||
prevDimSpanCount *= spans[j+1];
|
||||
d /= prevDimSpanCount;
|
||||
|
||||
// And now with what's left, figure out our own offset
|
||||
delta[i] = d % spans[dim];
|
||||
constDeltaList.push_back(LLVMInt8(delta[i]));
|
||||
}
|
||||
|
||||
llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
|
||||
// llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
|
||||
|
||||
|
||||
llvm::GlobalVariable* globalDelta = new llvm::GlobalVariable(
|
||||
/*Module=*/*m->module,
|
||||
/*Type=*/ArrayDelta,
|
||||
/*isConstant=*/true,
|
||||
/*Linkage=*/llvm::GlobalValue::PrivateLinkage,
|
||||
/*Initializer=*/0, // has initializer, specified below
|
||||
/*Name=*/"constDeltaForeach");
|
||||
#if 0
|
||||
/*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
|
||||
/*unsigned AddressSpace=*/4 /*constant*/);
|
||||
#endif
|
||||
|
||||
|
||||
llvm::Constant* constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
|
||||
|
||||
globalDelta->setInitializer(constDelta);
|
||||
llvm::Function *func_program_index = m->module->getFunction("__program_index");
|
||||
llvm::Value *laneIdx = ctx->CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__programIndex");
|
||||
|
||||
std::vector<llvm::Value*> ptr_arrayidx_indices;
|
||||
ptr_arrayidx_indices.push_back(LLVMInt32(0));
|
||||
ptr_arrayidx_indices.push_back(laneIdx);
|
||||
#if 1
|
||||
llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
|
||||
llvm::LoadInst* int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
|
||||
llvm::Value * int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
|
||||
|
||||
llvm::VectorType* VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
|
||||
llvm::UndefValue* const_packed_41 = llvm::UndefValue::get(VectorTy_2);
|
||||
|
||||
llvm::InsertElementInst* packed_43 = llvm::InsertElementInst::Create(
|
||||
// llvm::UndefValue(LLVMInt32Vector),
|
||||
const_packed_41,
|
||||
int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
|
||||
#endif
|
||||
|
||||
|
||||
// Add the deltas to compute the varying counter values; store the
|
||||
// result to memory and then return it directly as well.
|
||||
#if 0
|
||||
llvm::Value *varyingCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
|
||||
LLVMInt32Vector(delta), "iter_val");
|
||||
#else
|
||||
llvm::Value *varyingCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
|
||||
packed_43, "iter_val");
|
||||
#endif
|
||||
ctx->StoreInst(varyingCounter, varyingCounterPtr);
|
||||
return varyingCounter;
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
// Smear the uniform counter value out to be varying
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
||||
llvm::Value *smearCounter = ctx->BroadcastValue(
|
||||
@@ -1486,93 +1594,6 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
|
||||
LLVMInt32Vector(delta), "iter_val");
|
||||
ctx->StoreInst(varyingCounter, varyingCounterPtr);
|
||||
return varyingCounter;
|
||||
}
|
||||
else /* NVPTX == true */
|
||||
{
|
||||
// Smear the uniform counter value out to be varying
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
||||
llvm::Value *smearCounter = ctx->BroadcastValue(
|
||||
counter, LLVMTypes::Int32VectorType, "smear_counter");
|
||||
|
||||
// Figure out the offsets; this is a little bit tricky. As an example,
|
||||
// consider a 2D tiled foreach loop, where we're running 8-wide and
|
||||
// where the inner dimension has a stride of 4 and the outer dimension
|
||||
// has a stride of 2. For the inner dimension, we want the offsets
|
||||
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
|
||||
// (0,0,0,0,1,1,1,1).
|
||||
int32_t delta[ISPC_MAX_NVEC];
|
||||
const int vecWidth = 32;
|
||||
std::vector<llvm::Constant*> constDeltaList;
|
||||
for (int i = 0; i < vecWidth; ++i)
|
||||
{
|
||||
int d = i;
|
||||
// First, account for the effect of any dimensions at deeper
|
||||
// nesting levels than the current one.
|
||||
int prevDimSpanCount = 1;
|
||||
for (int j = dim; j < nDims-1; ++j)
|
||||
prevDimSpanCount *= spans[j+1];
|
||||
d /= prevDimSpanCount;
|
||||
|
||||
// And now with what's left, figure out our own offset
|
||||
delta[i] = d % spans[dim];
|
||||
constDeltaList.push_back(LLVMInt8(delta[i]));
|
||||
}
|
||||
|
||||
llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
|
||||
// llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
|
||||
|
||||
|
||||
llvm::GlobalVariable* globalDelta = new llvm::GlobalVariable(
|
||||
/*Module=*/*m->module,
|
||||
/*Type=*/ArrayDelta,
|
||||
/*isConstant=*/true,
|
||||
/*Linkage=*/llvm::GlobalValue::PrivateLinkage,
|
||||
/*Initializer=*/0, // has initializer, specified below
|
||||
/*Name=*/"constDeltaForeach");
|
||||
#if 0
|
||||
/*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
|
||||
/*unsigned AddressSpace=*/4 /*constant*/);
|
||||
#endif
|
||||
|
||||
|
||||
llvm::Constant* constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
|
||||
|
||||
globalDelta->setInitializer(constDelta);
|
||||
llvm::Function *func_program_index = m->module->getFunction("__program_index");
|
||||
llvm::Value *laneIdx = ctx->CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__programIndex");
|
||||
|
||||
std::vector<llvm::Value*> ptr_arrayidx_indices;
|
||||
ptr_arrayidx_indices.push_back(LLVMInt32(0));
|
||||
ptr_arrayidx_indices.push_back(laneIdx);
|
||||
#if 1
|
||||
llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
|
||||
llvm::LoadInst* int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
|
||||
llvm::Value * int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
|
||||
|
||||
llvm::VectorType* VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
|
||||
llvm::UndefValue* const_packed_41 = llvm::UndefValue::get(VectorTy_2);
|
||||
|
||||
llvm::InsertElementInst* packed_43 = llvm::InsertElementInst::Create(
|
||||
// llvm::UndefValue(LLVMInt32Vector),
|
||||
const_packed_41,
|
||||
int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
|
||||
#endif
|
||||
|
||||
|
||||
// Add the deltas to compute the varying counter values; store the
|
||||
// result to memory and then return it directly as well.
|
||||
#if 0
|
||||
llvm::Value *varyingCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
|
||||
LLVMInt32Vector(delta), "iter_val");
|
||||
#else
|
||||
llvm::Value *varyingCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
|
||||
packed_43, "iter_val");
|
||||
#endif
|
||||
ctx->StoreInst(varyingCounter, varyingCounterPtr);
|
||||
return varyingCounter;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1650,7 +1671,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
|
||||
// This should be caught during typechecking
|
||||
AssertPos(pos, startExprs.size() == dimVariables.size() &&
|
||||
endExprs.size() == dimVariables.size());
|
||||
endExprs.size() == dimVariables.size());
|
||||
int nDims = (int)dimVariables.size();
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
@@ -1661,66 +1682,70 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
|
||||
|
||||
std::vector<int> span(nDims, 0);
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
const int vectorWidth =
|
||||
g->target->getISA() == Target::NVPTX ? 32 : g->target->getVectorWidth();
|
||||
lGetSpans(nDims-1, nDims, vectorWidth, isTiled, &span[0]);
|
||||
#else /* ISPC_NVPTX_ENABLED */
|
||||
lGetSpans(nDims-1, nDims, g->target->getVectorWidth(), isTiled, &span[0]);
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
for (int i = 0; i < nDims; ++i) {
|
||||
// Basic blocks that we'll fill in later with the looping logic for
|
||||
// this dimension.
|
||||
bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
|
||||
if (i < nDims-1)
|
||||
// stepping for the innermost dimension is handled specially
|
||||
bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
|
||||
bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
|
||||
// Basic blocks that we'll fill in later with the looping logic for
|
||||
// this dimension.
|
||||
bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
|
||||
if (i < nDims-1)
|
||||
// stepping for the innermost dimension is handled specially
|
||||
bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
|
||||
bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
|
||||
|
||||
// Start and end value for this loop dimension
|
||||
llvm::Value *sv = startExprs[i]->GetValue(ctx);
|
||||
llvm::Value *ev = endExprs[i]->GetValue(ctx);
|
||||
if (sv == NULL || ev == NULL)
|
||||
return;
|
||||
startVals.push_back(sv);
|
||||
endVals.push_back(ev);
|
||||
// Start and end value for this loop dimension
|
||||
llvm::Value *sv = startExprs[i]->GetValue(ctx);
|
||||
llvm::Value *ev = endExprs[i]->GetValue(ctx);
|
||||
if (sv == NULL || ev == NULL)
|
||||
return;
|
||||
startVals.push_back(sv);
|
||||
endVals.push_back(ev);
|
||||
|
||||
// nItems = endVal - startVal
|
||||
llvm::Value *nItems =
|
||||
ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
|
||||
// nItems = endVal - startVal
|
||||
llvm::Value *nItems =
|
||||
ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
|
||||
|
||||
// nExtras = nItems % (span for this dimension)
|
||||
// This gives us the number of extra elements we need to deal with
|
||||
// at the end of the loop for this dimension that don't fit cleanly
|
||||
// into a vector width.
|
||||
nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems,
|
||||
LLVMInt32(span[i]), "nextras"));
|
||||
// nExtras = nItems % (span for this dimension)
|
||||
// This gives us the number of extra elements we need to deal with
|
||||
// at the end of the loop for this dimension that don't fit cleanly
|
||||
// into a vector width.
|
||||
nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems,
|
||||
LLVMInt32(span[i]), "nextras"));
|
||||
|
||||
// alignedEnd = endVal - nExtras
|
||||
alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
|
||||
nExtras[i], "aligned_end"));
|
||||
// alignedEnd = endVal - nExtras
|
||||
alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
|
||||
nExtras[i], "aligned_end"));
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// Each dimension has a loop counter that is a uniform value that
|
||||
// goes from startVal to endVal, in steps of the span for this
|
||||
// dimension. Its value is only used internally here for looping
|
||||
// logic and isn't directly available in the user's program code.
|
||||
uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
|
||||
"counter"));
|
||||
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
// Each dimension has a loop counter that is a uniform value that
|
||||
// goes from startVal to endVal, in steps of the span for this
|
||||
// dimension. Its value is only used internally here for looping
|
||||
// logic and isn't directly available in the user's program code.
|
||||
uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
|
||||
"counter"));
|
||||
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
||||
|
||||
// There is also a varying variable that holds the set of index
|
||||
// values for each dimension in the current loop iteration; this is
|
||||
// the value that is program-visible.
|
||||
dimVariables[i]->storagePtr =
|
||||
ctx->AllocaInst(LLVMTypes::Int32VectorType,
|
||||
dimVariables[i]->name.c_str());
|
||||
dimVariables[i]->parentFunction = ctx->GetFunction();
|
||||
ctx->EmitVariableDebugInfo(dimVariables[i]);
|
||||
// There is also a varying variable that holds the set of index
|
||||
// values for each dimension in the current loop iteration; this is
|
||||
// the value that is program-visible.
|
||||
dimVariables[i]->storagePtr =
|
||||
ctx->AllocaInst(LLVMTypes::Int32VectorType,
|
||||
dimVariables[i]->name.c_str());
|
||||
dimVariables[i]->parentFunction = ctx->GetFunction();
|
||||
ctx->EmitVariableDebugInfo(dimVariables[i]);
|
||||
|
||||
// Each dimension also maintains a mask that represents which of
|
||||
// the varying elements in the current iteration should be
|
||||
// processed. (i.e. this is used to disable the lanes that have
|
||||
// out-of-bounds offsets.)
|
||||
extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
|
||||
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
||||
// Each dimension also maintains a mask that represents which of
|
||||
// the varying elements in the current iteration should be
|
||||
// processed. (i.e. this is used to disable the lanes that have
|
||||
// out-of-bounds offsets.)
|
||||
extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
|
||||
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
||||
}
|
||||
|
||||
ctx->StartForeach(FunctionEmitContext::FOREACH_REGULAR);
|
||||
@@ -1733,14 +1758,14 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// a given dimension in preparation for running through its loop again,
|
||||
// after the enclosing level advances its counter.
|
||||
for (int i = 0; i < nDims; ++i) {
|
||||
ctx->SetCurrentBasicBlock(bbReset[i]);
|
||||
if (i == 0)
|
||||
ctx->BranchInst(bbExit);
|
||||
else {
|
||||
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
||||
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
||||
ctx->BranchInst(bbStep[i-1]);
|
||||
}
|
||||
ctx->SetCurrentBasicBlock(bbReset[i]);
|
||||
if (i == 0)
|
||||
ctx->BranchInst(bbExit);
|
||||
else {
|
||||
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
||||
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
||||
ctx->BranchInst(bbStep[i-1]);
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -1750,67 +1775,67 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// this for the innermost dimension, which has a more complex stepping
|
||||
// structure..
|
||||
for (int i = 0; i < nDims-1; ++i) {
|
||||
ctx->SetCurrentBasicBlock(bbStep[i]);
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
|
||||
llvm::Value *newCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||
LLVMInt32(span[i]), "new_counter");
|
||||
ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
|
||||
ctx->BranchInst(bbTest[i]);
|
||||
ctx->SetCurrentBasicBlock(bbStep[i]);
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
|
||||
llvm::Value *newCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||
LLVMInt32(span[i]), "new_counter");
|
||||
ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
|
||||
ctx->BranchInst(bbTest[i]);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// foreach_test (for all dimensions other than the innermost...)
|
||||
std::vector<llvm::Value *> inExtras;
|
||||
for (int i = 0; i < nDims-1; ++i) {
|
||||
ctx->SetCurrentBasicBlock(bbTest[i]);
|
||||
ctx->SetCurrentBasicBlock(bbTest[i]);
|
||||
|
||||
llvm::Value *haveExtras =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
|
||||
endVals[i], alignedEnd[i], "have_extras");
|
||||
llvm::Value *haveExtras =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
|
||||
endVals[i], alignedEnd[i], "have_extras");
|
||||
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
|
||||
llvm::Value *atAlignedEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
||||
counter, alignedEnd[i], "at_aligned_end");
|
||||
llvm::Value *inEx =
|
||||
ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
|
||||
atAlignedEnd, "in_extras");
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
|
||||
llvm::Value *atAlignedEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
||||
counter, alignedEnd[i], "at_aligned_end");
|
||||
llvm::Value *inEx =
|
||||
ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
|
||||
atAlignedEnd, "in_extras");
|
||||
|
||||
if (i == 0)
|
||||
inExtras.push_back(inEx);
|
||||
else
|
||||
inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
|
||||
inExtras[i-1], "in_extras_all"));
|
||||
if (i == 0)
|
||||
inExtras.push_back(inEx);
|
||||
else
|
||||
inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
|
||||
inExtras[i-1], "in_extras_all"));
|
||||
|
||||
llvm::Value *varyingCounter =
|
||||
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
|
||||
dimVariables[i]->storagePtr, span);
|
||||
llvm::Value *varyingCounter =
|
||||
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
|
||||
dimVariables[i]->storagePtr, span);
|
||||
|
||||
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||
endVals[i], LLVMTypes::Int32VectorType, "smear_end");
|
||||
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||
endVals[i], LLVMTypes::Int32VectorType, "smear_end");
|
||||
|
||||
// Do a vector compare of its value to the end value to generate a
|
||||
// mask for this last bit of work.
|
||||
llvm::Value *emask =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
varyingCounter, smearEnd);
|
||||
emask = ctx->I1VecToBoolVec(emask);
|
||||
// Do a vector compare of its value to the end value to generate a
|
||||
// mask for this last bit of work.
|
||||
llvm::Value *emask =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
varyingCounter, smearEnd);
|
||||
emask = ctx->I1VecToBoolVec(emask);
|
||||
|
||||
if (i == 0)
|
||||
ctx->StoreInst(emask, extrasMaskPtrs[i]);
|
||||
else {
|
||||
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
|
||||
llvm::Value *newMask =
|
||||
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
|
||||
"extras_mask");
|
||||
ctx->StoreInst(newMask, extrasMaskPtrs[i]);
|
||||
}
|
||||
if (i == 0)
|
||||
ctx->StoreInst(emask, extrasMaskPtrs[i]);
|
||||
else {
|
||||
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
|
||||
llvm::Value *newMask =
|
||||
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
|
||||
"extras_mask");
|
||||
ctx->StoreInst(newMask, extrasMaskPtrs[i]);
|
||||
}
|
||||
|
||||
llvm::Value *notAtEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
counter, endVals[i]);
|
||||
ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
|
||||
llvm::Value *notAtEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
counter, endVals[i]);
|
||||
ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -1847,18 +1872,18 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// (i.e. processing extra elements that don't exactly fit into a
|
||||
// vector).
|
||||
llvm::BasicBlock *bbOuterInExtras =
|
||||
ctx->CreateBasicBlock("outer_in_extras");
|
||||
ctx->CreateBasicBlock("outer_in_extras");
|
||||
llvm::BasicBlock *bbOuterNotInExtras =
|
||||
ctx->CreateBasicBlock("outer_not_in_extras");
|
||||
ctx->CreateBasicBlock("outer_not_in_extras");
|
||||
|
||||
ctx->SetCurrentBasicBlock(bbTest[nDims-1]);
|
||||
if (inExtras.size())
|
||||
ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras,
|
||||
inExtras.back());
|
||||
ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras,
|
||||
inExtras.back());
|
||||
else
|
||||
// for a 1D iteration domain, we certainly don't have any enclosing
|
||||
// dimensions that are processing extra elements.
|
||||
ctx->BranchInst(bbOuterNotInExtras);
|
||||
// for a 1D iteration domain, we certainly don't have any enclosing
|
||||
// dimensions that are processing extra elements.
|
||||
ctx->BranchInst(bbOuterNotInExtras);
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// One or more outer dimensions in extras, so we need to mask for the loop
|
||||
@@ -1873,21 +1898,21 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// // run loop body with mask
|
||||
// }
|
||||
llvm::BasicBlock *bbAllInnerPartialOuter =
|
||||
ctx->CreateBasicBlock("all_inner_partial_outer");
|
||||
ctx->CreateBasicBlock("all_inner_partial_outer");
|
||||
llvm::BasicBlock *bbPartial =
|
||||
ctx->CreateBasicBlock("both_partial");
|
||||
ctx->CreateBasicBlock("both_partial");
|
||||
ctx->SetCurrentBasicBlock(bbOuterInExtras); {
|
||||
// Update the varying counter value here, since all subsequent
|
||||
// blocks along this path need it.
|
||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||
dimVariables[nDims-1]->storagePtr, span);
|
||||
// Update the varying counter value here, since all subsequent
|
||||
// blocks along this path need it.
|
||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||
dimVariables[nDims-1]->storagePtr, span);
|
||||
|
||||
// here we just check to see if counter < alignedEnd
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
||||
llvm::Value *beforeAlignedEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
counter, alignedEnd[nDims-1], "before_aligned_end");
|
||||
ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
|
||||
// here we just check to see if counter < alignedEnd
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
||||
llvm::Value *beforeAlignedEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
counter, alignedEnd[nDims-1], "before_aligned_end");
|
||||
ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
|
||||
}
|
||||
|
||||
// Below we have a basic block that runs the loop body code for the
|
||||
@@ -1906,53 +1931,53 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// should step the loop counter for the next enclosing dimension
|
||||
// instead.
|
||||
llvm::Value *stepIndexAfterMaskedBodyPtr =
|
||||
ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
|
||||
ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// We're in the inner loop part where the only masking is due to outer
|
||||
// dimensions but the innermost dimension fits fully into a vector's
|
||||
// width. Set the mask and jump to the masked loop body.
|
||||
ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); {
|
||||
llvm::Value *mask;
|
||||
if (nDims == 1)
|
||||
// 1D loop; we shouldn't ever get here anyway
|
||||
mask = LLVMMaskAllOff;
|
||||
else
|
||||
mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
|
||||
llvm::Value *mask;
|
||||
if (nDims == 1)
|
||||
// 1D loop; we shouldn't ever get here anyway
|
||||
mask = LLVMMaskAllOff;
|
||||
else
|
||||
mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
|
||||
|
||||
ctx->SetInternalMask(mask);
|
||||
ctx->SetInternalMask(mask);
|
||||
|
||||
ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
|
||||
ctx->BranchInst(bbMaskedBody);
|
||||
ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
|
||||
ctx->BranchInst(bbMaskedBody);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// We need to include the effect of the innermost dimension in the mask
|
||||
// for the final bits here
|
||||
ctx->SetCurrentBasicBlock(bbPartial); {
|
||||
llvm::Value *varyingCounter =
|
||||
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
|
||||
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
||||
llvm::Value *varyingCounter =
|
||||
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
|
||||
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
||||
|
||||
llvm::Value *emask =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
varyingCounter, smearEnd);
|
||||
emask = ctx->I1VecToBoolVec(emask);
|
||||
llvm::Value *emask =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
varyingCounter, smearEnd);
|
||||
emask = ctx->I1VecToBoolVec(emask);
|
||||
|
||||
if (nDims == 1) {
|
||||
ctx->SetInternalMask(emask);
|
||||
}
|
||||
else {
|
||||
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
|
||||
llvm::Value *newMask =
|
||||
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
|
||||
"extras_mask");
|
||||
ctx->SetInternalMask(newMask);
|
||||
}
|
||||
if (nDims == 1) {
|
||||
ctx->SetInternalMask(emask);
|
||||
}
|
||||
else {
|
||||
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
|
||||
llvm::Value *newMask =
|
||||
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
|
||||
"extras_mask");
|
||||
ctx->SetInternalMask(newMask);
|
||||
}
|
||||
|
||||
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
|
||||
ctx->BranchInst(bbMaskedBody);
|
||||
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
|
||||
ctx->BranchInst(bbMaskedBody);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -1968,14 +1993,14 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// // run loop body with mask
|
||||
// }
|
||||
llvm::BasicBlock *bbPartialInnerAllOuter =
|
||||
ctx->CreateBasicBlock("partial_inner_all_outer");
|
||||
ctx->CreateBasicBlock("partial_inner_all_outer");
|
||||
ctx->SetCurrentBasicBlock(bbOuterNotInExtras); {
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
||||
llvm::Value *beforeAlignedEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
counter, alignedEnd[nDims-1], "before_aligned_end");
|
||||
ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter,
|
||||
beforeAlignedEnd);
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
||||
llvm::Value *beforeAlignedEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
counter, alignedEnd[nDims-1], "before_aligned_end");
|
||||
ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter,
|
||||
beforeAlignedEnd);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -1985,26 +2010,26 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// value of the varying loop counter and have the statements in the
|
||||
// loop body emit their code.
|
||||
llvm::BasicBlock *bbFullBodyContinue =
|
||||
ctx->CreateBasicBlock("foreach_full_continue");
|
||||
ctx->CreateBasicBlock("foreach_full_continue");
|
||||
ctx->SetCurrentBasicBlock(bbFullBody); {
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
ctx->SetBlockEntryMask(LLVMMaskAllOn);
|
||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||
dimVariables[nDims-1]->storagePtr, span);
|
||||
ctx->SetContinueTarget(bbFullBodyContinue);
|
||||
ctx->AddInstrumentationPoint("foreach loop body (all on)");
|
||||
stmts->EmitCode(ctx);
|
||||
AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
|
||||
ctx->BranchInst(bbFullBodyContinue);
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
ctx->SetBlockEntryMask(LLVMMaskAllOn);
|
||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||
dimVariables[nDims-1]->storagePtr, span);
|
||||
ctx->SetContinueTarget(bbFullBodyContinue);
|
||||
ctx->AddInstrumentationPoint("foreach loop body (all on)");
|
||||
stmts->EmitCode(ctx);
|
||||
AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
|
||||
ctx->BranchInst(bbFullBodyContinue);
|
||||
}
|
||||
ctx->SetCurrentBasicBlock(bbFullBodyContinue); {
|
||||
ctx->RestoreContinuedLanes();
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
|
||||
llvm::Value *newCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||
LLVMInt32(span[nDims-1]), "new_counter");
|
||||
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
|
||||
ctx->BranchInst(bbOuterNotInExtras);
|
||||
ctx->RestoreContinuedLanes();
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
|
||||
llvm::Value *newCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||
LLVMInt32(span[nDims-1]), "new_counter");
|
||||
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
|
||||
ctx->BranchInst(bbOuterNotInExtras);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -2012,33 +2037,33 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// less than the end value, in which case we need to run the body one
|
||||
// more time to get the extra bits.
|
||||
llvm::BasicBlock *bbSetInnerMask =
|
||||
ctx->CreateBasicBlock("partial_inner_only");
|
||||
ctx->CreateBasicBlock("partial_inner_only");
|
||||
ctx->SetCurrentBasicBlock(bbPartialInnerAllOuter); {
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
||||
llvm::Value *beforeFullEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
counter, endVals[nDims-1], "before_full_end");
|
||||
ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd);
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
||||
llvm::Value *beforeFullEnd =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
counter, endVals[nDims-1], "before_full_end");
|
||||
ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// The outer dimensions are all on, so the mask is just given by the
|
||||
// mask for the innermost dimension
|
||||
ctx->SetCurrentBasicBlock(bbSetInnerMask); {
|
||||
llvm::Value *varyingCounter =
|
||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||
dimVariables[nDims-1]->storagePtr, span);
|
||||
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
||||
llvm::Value *emask =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
varyingCounter, smearEnd);
|
||||
emask = ctx->I1VecToBoolVec(emask);
|
||||
ctx->SetInternalMask(emask);
|
||||
ctx->SetBlockEntryMask(emask);
|
||||
llvm::Value *varyingCounter =
|
||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||
dimVariables[nDims-1]->storagePtr, span);
|
||||
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
||||
llvm::Value *emask =
|
||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||
varyingCounter, smearEnd);
|
||||
emask = ctx->I1VecToBoolVec(emask);
|
||||
ctx->SetInternalMask(emask);
|
||||
ctx->SetBlockEntryMask(emask);
|
||||
|
||||
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
|
||||
ctx->BranchInst(bbMaskedBody);
|
||||
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
|
||||
ctx->BranchInst(bbMaskedBody);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -2048,34 +2073,34 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// mask known to be all-on, which in turn leads to more efficient code
|
||||
// for that case.
|
||||
llvm::BasicBlock *bbStepInnerIndex =
|
||||
ctx->CreateBasicBlock("step_inner_index");
|
||||
ctx->CreateBasicBlock("step_inner_index");
|
||||
llvm::BasicBlock *bbMaskedBodyContinue =
|
||||
ctx->CreateBasicBlock("foreach_masked_continue");
|
||||
ctx->CreateBasicBlock("foreach_masked_continue");
|
||||
ctx->SetCurrentBasicBlock(bbMaskedBody); {
|
||||
ctx->AddInstrumentationPoint("foreach loop body (masked)");
|
||||
ctx->SetContinueTarget(bbMaskedBodyContinue);
|
||||
ctx->DisableGatherScatterWarnings();
|
||||
ctx->SetBlockEntryMask(ctx->GetFullMask());
|
||||
stmts->EmitCode(ctx);
|
||||
ctx->EnableGatherScatterWarnings();
|
||||
ctx->BranchInst(bbMaskedBodyContinue);
|
||||
ctx->AddInstrumentationPoint("foreach loop body (masked)");
|
||||
ctx->SetContinueTarget(bbMaskedBodyContinue);
|
||||
ctx->DisableGatherScatterWarnings();
|
||||
ctx->SetBlockEntryMask(ctx->GetFullMask());
|
||||
stmts->EmitCode(ctx);
|
||||
ctx->EnableGatherScatterWarnings();
|
||||
ctx->BranchInst(bbMaskedBodyContinue);
|
||||
}
|
||||
ctx->SetCurrentBasicBlock(bbMaskedBodyContinue); {
|
||||
ctx->RestoreContinuedLanes();
|
||||
llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
|
||||
ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex);
|
||||
ctx->RestoreContinuedLanes();
|
||||
llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
|
||||
ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// step the innermost index, for the case where we're doing the
|
||||
// innermost for loop over full vectors.
|
||||
ctx->SetCurrentBasicBlock(bbStepInnerIndex); {
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
|
||||
llvm::Value *newCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||
LLVMInt32(span[nDims-1]), "new_counter");
|
||||
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
|
||||
ctx->BranchInst(bbOuterInExtras);
|
||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
|
||||
llvm::Value *newCounter =
|
||||
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||
LLVMInt32(span[nDims-1]), "new_counter");
|
||||
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
|
||||
ctx->BranchInst(bbOuterInExtras);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
@@ -2262,8 +2287,12 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// math...)
|
||||
|
||||
// Get the "program index" vector value
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
llvm::Value *programIndex = g->target->getISA() == Target::NVPTX ?
|
||||
ctx->ProgramIndexVectorPTX() : ctx->ProgramIndexVector();
|
||||
#else /* ISPC_NVPTX_ENABLED */
|
||||
llvm::Value *programIndex = ctx->ProgramIndexVector();
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
|
||||
// And smear the current lane out to a vector
|
||||
llvm::Value *firstSet32 =
|
||||
@@ -2460,19 +2489,22 @@ ForeachUniqueStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// And load the corresponding element value from the temporary
|
||||
// memory storing the value of the varying expr.
|
||||
llvm::Value *uniqueValue;
|
||||
if (g->target->getISA() != Target::NVPTX)
|
||||
{
|
||||
llvm::Value *uniqueValuePtr =
|
||||
ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType,
|
||||
"unique_index_ptr");
|
||||
uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
|
||||
}
|
||||
else /* in case of PTX target, use __shfl PTX intrinsics via __insert/__extract function */
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
if (g->target->getISA() == Target::NVPTX)
|
||||
{
|
||||
llvm::Value *firstSet32 = ctx->TruncInst(firstSet, LLVMTypes::Int32Type);
|
||||
uniqueValue = ctx->Extract(exprValue, firstSet32);
|
||||
}
|
||||
|
||||
else
|
||||
{
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
llvm::Value *uniqueValuePtr =
|
||||
ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType,
|
||||
"unique_index_ptr");
|
||||
uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
}
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
// If it's a varying pointer type, need to convert from the int
|
||||
// type we store in the vector to the actual pointer type
|
||||
if (llvm::dyn_cast<llvm::PointerType>(symType) != NULL)
|
||||
@@ -3379,8 +3411,12 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
}
|
||||
|
||||
// Now we can emit code to call __do_print()
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
llvm::Function *printFunc = g->target->getISA() != Target::NVPTX ?
|
||||
m->module->getFunction("__do_print") : m->module->getFunction("__do_print_nvptx");
|
||||
#else /* ISPC_NVPTX_ENABLED */
|
||||
llvm::Function *printFunc = m->module->getFunction("__do_print");
|
||||
#endif /* ISPC_NVPTX_ENABLED */
|
||||
AssertPos(pos, printFunc);
|
||||
|
||||
llvm::Value *mask = ctx->GetFullMask();
|
||||
|
||||
14
type.cpp
14
type.cpp
@@ -751,7 +751,7 @@ EnumType::Mangle() const {
|
||||
std::string ret;
|
||||
if (isConst) ret += "C";
|
||||
ret += variability.MangleString();
|
||||
ret += std::string("enum_5B_") + name + std::string("_5C_");
|
||||
ret += std::string("enum[") + name + std::string("]");
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1433,7 +1433,7 @@ ArrayType::Mangle() const {
|
||||
sprintf(buf, "%d", numElements);
|
||||
else
|
||||
buf[0] = '\0';
|
||||
return s + "_5B_" + buf + "_5C_";
|
||||
return s + "[" + buf + "]";
|
||||
}
|
||||
|
||||
|
||||
@@ -2106,12 +2106,12 @@ lMangleStruct(Variability variability, bool isConst, const std::string &name) {
|
||||
Assert(variability != Variability::Unbound);
|
||||
|
||||
std::string ret;
|
||||
ret += "s_5B_";
|
||||
ret += "s[";
|
||||
if (isConst)
|
||||
ret += "_c_";
|
||||
ret += variability.MangleString();
|
||||
|
||||
ret += name + std::string("_5C_");
|
||||
ret += name + std::string("]");
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -3057,7 +3057,11 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
||||
llvmArgTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
std::vector<llvm::Type *> callTypes;
|
||||
if (isTask && g->target->getISA() != Target::NVPTX) {
|
||||
if (isTask
|
||||
#ifdef ISPC_NVPTX_ENABLED
|
||||
&& (g->target->getISA() != Target::NVPTX)
|
||||
#endif
|
||||
){
|
||||
// Tasks take three arguments: a pointer to a struct that holds the
|
||||
// actual task arguments, the thread index, and the total number of
|
||||
// threads the tasks system has running. (Task arguments are
|
||||
|
||||
Reference in New Issue
Block a user