added #ifdef ISPC_NVPTX_ENALED ... #endif guards

This commit is contained in:
evghenii
2014-07-09 12:32:18 +02:00
parent 44c74728bc
commit b3c5a9c4d6
16 changed files with 726 additions and 553 deletions

View File

@@ -73,6 +73,10 @@ endif
# To enable: make ARM_ENABLED=1 # To enable: make ARM_ENABLED=1
ARM_ENABLED=0 ARM_ENABLED=0
# Disable NVPTX by request
# To disable: make NVPTX_ENABLED=0
NVPTX_ENABLED=1
# Add llvm bin to the path so any scripts run will go to the right llvm-config # Add llvm bin to the path so any scripts run will go to the right llvm-config
LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir) LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir)
export PATH:=$(LLVM_BIN):$(PATH) export PATH:=$(LLVM_BIN):$(PATH)
@@ -89,7 +93,7 @@ LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e 's/svn//' -e 's/\./_/' -e 's/\..*//') LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e 's/svn//' -e 's/\./_/' -e 's/\..*//')
LLVM_VERSION_DEF=-D$(LLVM_VERSION) LLVM_VERSION_DEF=-D$(LLVM_VERSION)
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker nvptx LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker
# Component "option" was introduced in 3.3 and starting with 3.4 it is required for the link step. # Component "option" was introduced in 3.3 and starting with 3.4 it is required for the link step.
# We check if it's available before adding it (to not break 3.2 and earlier). # We check if it's available before adding it (to not break 3.2 and earlier).
ifeq ($(shell $(LLVM_CONFIG) --components |grep -c option), 1) ifeq ($(shell $(LLVM_CONFIG) --components |grep -c option), 1)
@@ -98,6 +102,9 @@ endif
ifneq ($(ARM_ENABLED), 0) ifneq ($(ARM_ENABLED), 0)
LLVM_COMPONENTS+=arm LLVM_COMPONENTS+=arm
endif endif
ifneq ($(NVPTX_ENABLED), 0)
LLVM_COMPONENTS+=nvptx
endif
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs $(LLVM_COMPONENTS)) LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs $(LLVM_COMPONENTS))
CLANG=clang CLANG=clang
@@ -156,6 +163,9 @@ endif
ifneq ($(ARM_ENABLED), 0) ifneq ($(ARM_ENABLED), 0)
CXXFLAGS+=-DISPC_ARM_ENABLED CXXFLAGS+=-DISPC_ARM_ENABLED
endif endif
ifneq ($(NVPTX_ENABLED), 0)
CXXFLAGS+=-DISPC_NVPTX_ENABLED
endif
LDFLAGS= LDFLAGS=
ifeq ($(ARCH_OS),Linux) ifeq ($(ARCH_OS),Linux)
@@ -174,12 +184,15 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
type.cpp util.cpp type.cpp util.cpp
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \ HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
opt.h stmt.h sym.h type.h util.h opt.h stmt.h sym.h type.h util.h
TARGETS=nvptx avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \ TARGETS=avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \ sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1 generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
ifneq ($(ARM_ENABLED), 0) ifneq ($(ARM_ENABLED), 0)
TARGETS+=neon-32 neon-16 neon-8 TARGETS+=neon-32 neon-16 neon-8
endif endif
ifneq ($(NVPTX_ENABLED), 0)
TARGETS+=nvptx
endif
# These files need to be compiled in two versions - 32 and 64 bits. # These files need to be compiled in two versions - 32 and 64 bits.
BUILTINS_SRC_TARGET=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) BUILTINS_SRC_TARGET=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS)))
# These are files to be compiled in single version. # These are files to be compiled in single version.

View File

@@ -342,13 +342,17 @@ lSetInternalFunctions(llvm::Module *module) {
"__all", "__all",
"__any", "__any",
"__aos_to_soa3_float", "__aos_to_soa3_float",
//#ifdef ISPC_NVPTX_ENABLED
"__aos_to_soa3_float1", "__aos_to_soa3_float1",
//#endif /* ISPC_NVPTX_ENABLED */
"__aos_to_soa3_float16", "__aos_to_soa3_float16",
"__aos_to_soa3_float4", "__aos_to_soa3_float4",
"__aos_to_soa3_float8", "__aos_to_soa3_float8",
"__aos_to_soa3_int32", "__aos_to_soa3_int32",
"__aos_to_soa4_float", "__aos_to_soa4_float",
//#ifdef ISPC_NVPTX_ENABLED
"__aos_to_soa4_float1", "__aos_to_soa4_float1",
//#endif /* ISPC_NVPTX_ENABLED */
"__aos_to_soa4_float16", "__aos_to_soa4_float16",
"__aos_to_soa4_float4", "__aos_to_soa4_float4",
"__aos_to_soa4_float8", "__aos_to_soa4_float8",
@@ -357,14 +361,10 @@ lSetInternalFunctions(llvm::Module *module) {
"__atomic_add_int64_global", "__atomic_add_int64_global",
"__atomic_add_uniform_int32_global", "__atomic_add_uniform_int32_global",
"__atomic_add_uniform_int64_global", "__atomic_add_uniform_int64_global",
"__atomic_add_varying_int32_global",
"__atomic_add_varying_int64_global",
"__atomic_and_int32_global", "__atomic_and_int32_global",
"__atomic_and_int64_global", "__atomic_and_int64_global",
"__atomic_and_uniform_int32_global", "__atomic_and_uniform_int32_global",
"__atomic_and_uniform_int64_global", "__atomic_and_uniform_int64_global",
"__atomic_and_varying_int32_global",
"__atomic_and_varying_int64_global",
"__atomic_compare_exchange_double_global", "__atomic_compare_exchange_double_global",
"__atomic_compare_exchange_float_global", "__atomic_compare_exchange_float_global",
"__atomic_compare_exchange_int32_global", "__atomic_compare_exchange_int32_global",
@@ -373,30 +373,18 @@ lSetInternalFunctions(llvm::Module *module) {
"__atomic_compare_exchange_uniform_float_global", "__atomic_compare_exchange_uniform_float_global",
"__atomic_compare_exchange_uniform_int32_global", "__atomic_compare_exchange_uniform_int32_global",
"__atomic_compare_exchange_uniform_int64_global", "__atomic_compare_exchange_uniform_int64_global",
"__atomic_compare_exchange_varying_double_global",
"__atomic_compare_exchange_varying_float_global",
"__atomic_compare_exchange_varying_int32_global",
"__atomic_compare_exchange_varying_int64_global",
"__atomic_max_uniform_int32_global", "__atomic_max_uniform_int32_global",
"__atomic_max_uniform_int64_global", "__atomic_max_uniform_int64_global",
"__atomic_min_uniform_int32_global", "__atomic_min_uniform_int32_global",
"__atomic_min_uniform_int64_global", "__atomic_min_uniform_int64_global",
"__atomic_max_varying_int32_global",
"__atomic_max_varying_int64_global",
"__atomic_min_varying_int32_global",
"__atomic_min_varying_int64_global",
"__atomic_or_int32_global", "__atomic_or_int32_global",
"__atomic_or_int64_global", "__atomic_or_int64_global",
"__atomic_or_uniform_int32_global", "__atomic_or_uniform_int32_global",
"__atomic_or_uniform_int64_global", "__atomic_or_uniform_int64_global",
"__atomic_or_varying_int32_global",
"__atomic_or_varying_int64_global",
"__atomic_sub_int32_global", "__atomic_sub_int32_global",
"__atomic_sub_int64_global", "__atomic_sub_int64_global",
"__atomic_sub_uniform_int32_global", "__atomic_sub_uniform_int32_global",
"__atomic_sub_uniform_int64_global", "__atomic_sub_uniform_int64_global",
"__atomic_sub_varying_int32_global",
"__atomic_sub_varying_int64_global",
"__atomic_swap_double_global", "__atomic_swap_double_global",
"__atomic_swap_float_global", "__atomic_swap_float_global",
"__atomic_swap_int32_global", "__atomic_swap_int32_global",
@@ -405,28 +393,46 @@ lSetInternalFunctions(llvm::Module *module) {
"__atomic_swap_uniform_float_global", "__atomic_swap_uniform_float_global",
"__atomic_swap_uniform_int32_global", "__atomic_swap_uniform_int32_global",
"__atomic_swap_uniform_int64_global", "__atomic_swap_uniform_int64_global",
"__atomic_swap_varying_double_global",
"__atomic_swap_varying_float_global",
"__atomic_swap_varying_int32_global",
"__atomic_swap_varying_int64_global",
"__atomic_umax_uniform_uint32_global", "__atomic_umax_uniform_uint32_global",
"__atomic_umax_uniform_uint64_global", "__atomic_umax_uniform_uint64_global",
"__atomic_umin_uniform_uint32_global", "__atomic_umin_uniform_uint32_global",
"__atomic_umin_uniform_uint64_global", "__atomic_umin_uniform_uint64_global",
"__atomic_umax_varying_uint32_global",
"__atomic_umax_varying_uint64_global",
"__atomic_umin_varying_uint32_global",
"__atomic_umin_varying_uint64_global",
"__atomic_xor_int32_global", "__atomic_xor_int32_global",
"__atomic_xor_int64_global", "__atomic_xor_int64_global",
"__atomic_xor_uniform_int32_global", "__atomic_xor_uniform_int32_global",
"__atomic_xor_uniform_int64_global", "__atomic_xor_uniform_int64_global",
//#ifdef ISPC_NVPTX_ENABLED
"__atomic_add_varying_int32_global",
"__atomic_add_varying_int64_global",
"__atomic_and_varying_int32_global",
"__atomic_and_varying_int64_global",
"__atomic_compare_exchange_varying_double_global",
"__atomic_compare_exchange_varying_float_global",
"__atomic_compare_exchange_varying_int32_global",
"__atomic_compare_exchange_varying_int64_global",
"__atomic_max_varying_int32_global",
"__atomic_max_varying_int64_global",
"__atomic_min_varying_int32_global",
"__atomic_min_varying_int64_global",
"__atomic_or_varying_int32_global",
"__atomic_or_varying_int64_global",
"__atomic_sub_varying_int32_global",
"__atomic_sub_varying_int64_global",
"__atomic_swap_varying_double_global",
"__atomic_swap_varying_float_global",
"__atomic_swap_varying_int32_global",
"__atomic_swap_varying_int64_global",
"__atomic_umax_varying_uint32_global",
"__atomic_umax_varying_uint64_global",
"__atomic_umin_varying_uint32_global",
"__atomic_umin_varying_uint64_global",
"__atomic_xor_uniform_int32_global", "__atomic_xor_uniform_int32_global",
"__atomic_xor_uniform_int64_global", "__atomic_xor_uniform_int64_global",
"__atomic_xor_varying_int32_global", "__atomic_xor_varying_int32_global",
"__atomic_xor_varying_int64_global", "__atomic_xor_varying_int64_global",
"__atomic_xor_varying_int32_global", "__atomic_xor_varying_int32_global",
"__atomic_xor_varying_int64_global", "__atomic_xor_varying_int64_global",
//#endif /* ISPC_NVPTX_ENABLED */
"__broadcast_double", "__broadcast_double",
"__broadcast_float", "__broadcast_float",
"__broadcast_i16", "__broadcast_i16",
@@ -449,7 +455,9 @@ lSetInternalFunctions(llvm::Module *module) {
"__do_assert_uniform", "__do_assert_uniform",
"__do_assert_varying", "__do_assert_varying",
"__do_print", "__do_print",
//#ifdef ISPC_NVPTX_ENABLED
"__do_print_nvptx", "__do_print_nvptx",
//#endif /* ISPC_NVPTX_ENABLED */
"__doublebits_uniform_int64", "__doublebits_uniform_int64",
"__doublebits_varying_int64", "__doublebits_varying_int64",
"__exclusive_scan_add_double", "__exclusive_scan_add_double",
@@ -464,8 +472,10 @@ lSetInternalFunctions(llvm::Module *module) {
"__extract_int32", "__extract_int32",
"__extract_int64", "__extract_int64",
"__extract_int8", "__extract_int8",
//#ifdef ISPC_NVPTX_ENABLED
"__extract_float", "__extract_float",
"__extract_double", "__extract_double",
//#endif /* ISPC_NVPTX_ENABLED */
"__fastmath", "__fastmath",
"__float_to_half_uniform", "__float_to_half_uniform",
"__float_to_half_varying", "__float_to_half_varying",
@@ -482,8 +492,10 @@ lSetInternalFunctions(llvm::Module *module) {
"__insert_int32", "__insert_int32",
"__insert_int64", "__insert_int64",
"__insert_int8", "__insert_int8",
//#ifdef ISPC_NVPTX_ENABLED
"__insert_float", "__insert_float",
"__insert_double", "__insert_double",
//#endif /* ISPC_NVPTX_ENABLED */
"__intbits_uniform_double", "__intbits_uniform_double",
"__intbits_uniform_float", "__intbits_uniform_float",
"__intbits_varying_double", "__intbits_varying_double",
@@ -520,7 +532,9 @@ lSetInternalFunctions(llvm::Module *module) {
"__min_varying_uint32", "__min_varying_uint32",
"__min_varying_uint64", "__min_varying_uint64",
"__movmsk", "__movmsk",
//#ifdef ISPC_NVPTX_ENABLED
"__movmsk_ptx", "__movmsk_ptx",
//#endif /* ISPC_NVPTX_ENABLED */
"__new_uniform_32rt", "__new_uniform_32rt",
"__new_uniform_64rt", "__new_uniform_64rt",
"__new_varying32_32rt", "__new_varying32_32rt",
@@ -610,13 +624,15 @@ lSetInternalFunctions(llvm::Module *module) {
"__shuffle_i64", "__shuffle_i64",
"__shuffle_i8", "__shuffle_i8",
"__soa_to_aos3_float", "__soa_to_aos3_float",
"__soa_to_aos3_float1",
"__soa_to_aos3_float16", "__soa_to_aos3_float16",
"__soa_to_aos3_float4", "__soa_to_aos3_float4",
"__soa_to_aos3_float8", "__soa_to_aos3_float8",
"__soa_to_aos3_int32", "__soa_to_aos3_int32",
"__soa_to_aos4_float", "__soa_to_aos4_float",
//#ifdef ISPC_NVPTX_ENABLED
"__soa_to_aos3_float1",
"__soa_to_aos4_float1", "__soa_to_aos4_float1",
//#endif /* ISPC_NVPTX_ENABLED */
"__soa_to_aos4_float16", "__soa_to_aos4_float16",
"__soa_to_aos4_float4", "__soa_to_aos4_float4",
"__soa_to_aos4_float8", "__soa_to_aos4_float8",
@@ -717,7 +733,7 @@ lSetInternalFunctions(llvm::Module *module) {
"__vec4_add_float", "__vec4_add_float",
"__vec4_add_int32", "__vec4_add_int32",
"__vselect_float", "__vselect_float",
"__vselect_i32", //#ifdef ISPC_NVPTX_ENABLED
"__program_index", "__program_index",
"__program_count", "__program_count",
"__warp_index", "__warp_index",
@@ -736,6 +752,8 @@ lSetInternalFunctions(llvm::Module *module) {
"ISPCAlloc", "ISPCAlloc",
"ISPCLaunch", "ISPCLaunch",
"ISPCSync", "ISPCSync",
//#endif /* ISPC_NVPTX_ENABLED */
"__vselect_i32"
}; };
int count = sizeof(names) / sizeof(names[0]); int count = sizeof(names) / sizeof(names[0]);
@@ -808,7 +826,9 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
g->target->getISA() != Target::NEON16 && g->target->getISA() != Target::NEON16 &&
g->target->getISA() != Target::NEON8) g->target->getISA() != Target::NEON8)
#endif // !__arm__ #endif // !__arm__
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() != Target::NVPTX) if (g->target->getISA() != Target::NVPTX)
#endif /* ISPC_NVPTX_ENABLED */
{ {
Assert(bcTriple.getArch() == llvm::Triple::UnknownArch || Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
mTriple.getArch() == bcTriple.getArch()); mTriple.getArch() == bcTriple.getArch());
@@ -982,6 +1002,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
// Next, add the target's custom implementations of the various needed // Next, add the target's custom implementations of the various needed
// builtin functions (e.g. __masked_store_32(), etc). // builtin functions (e.g. __masked_store_32(), etc).
switch (g->target->getISA()) { switch (g->target->getISA()) {
#ifdef ISPC_NVPTX_ENABLED
case Target::NVPTX: case Target::NVPTX:
{ {
if (runtime32) { if (runtime32) {
@@ -993,6 +1014,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
} }
break; break;
}; };
#endif /* ISPC_NVPTX_ENABLED */
#ifdef ISPC_ARM_ENABLED #ifdef ISPC_ARM_ENABLED
case Target::NEON8: { case Target::NEON8: {
if (runtime32) { if (runtime32) {
@@ -1262,14 +1285,18 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
} }
// define the 'programCount' builtin variable // define the 'programCount' builtin variable
if (g->target->getISA() != Target::NVPTX) #ifdef ISPC_NVPTX_ENABLED
{ if (g->target->getISA() == Target::NVPTX)
lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
}
else
{ {
lDefineConstantInt("programCount", 32, module, symbolTable); lDefineConstantInt("programCount", 32, module, symbolTable);
} }
else
{
#endif /* ISPC_NVPTX_ENABLED */
lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
#ifdef ISPC_NVPTX_ENABLED
}
#endif /* ISPC_NVPTX_ENABLED */
// define the 'programIndex' builtin // define the 'programIndex' builtin
lDefineProgramIndex(module, symbolTable); lDefineProgramIndex(module, symbolTable);
@@ -1301,9 +1328,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(), lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(),
module, symbolTable); module, symbolTable);
#ifdef ISPC_NVPTX_ENABLED
lDefineConstantInt("__is_nvptx_target", (int)(g->target->getISA() == Target::NVPTX), lDefineConstantInt("__is_nvptx_target", (int)(g->target->getISA() == Target::NVPTX),
module, symbolTable); module, symbolTable);
#else
lDefineConstantInt("__is_nvptx_target", (int)0, module, symbolTable);
#endif /* ISPC_NVPTX_ENABLED */
if (g->forceAlignment != -1) { if (g->forceAlignment != -1) {
llvm::GlobalVariable *alignment = module->getGlobalVariable("memory_alignment", true); llvm::GlobalVariable *alignment = module->getGlobalVariable("memory_alignment", true);
alignment->setInitializer(LLVMInt32(g->forceAlignment)); alignment->setInitializer(LLVMInt32(g->forceAlignment));

257
ctx.cpp
View File

@@ -57,8 +57,10 @@
#include <llvm/IR/Instructions.h> #include <llvm/IR/Instructions.h>
#include <llvm/IR/DerivedTypes.h> #include <llvm/IR/DerivedTypes.h>
#endif #endif
#ifdef ISPC_NVPTX_ENABLED
#include <llvm/Support/raw_ostream.h> #include <llvm/Support/raw_ostream.h>
#include <llvm/Support/FormattedStream.h> #include <llvm/Support/FormattedStream.h>
#endif /* ISPC_NVPTX_ENABLED */
/** This is a small utility structure that records information related to one /** This is a small utility structure that records information related to one
level of nested control flow. It's mostly used in correctly restoring level of nested control flow. It's mostly used in correctly restoring
@@ -1373,28 +1375,30 @@ FunctionEmitContext::None(llvm::Value *mask) {
llvm::Value * llvm::Value *
FunctionEmitContext::LaneMask(llvm::Value *v) FunctionEmitContext::LaneMask(llvm::Value *v) {
{ #ifdef ISPC_NVPTX_ENABLED
#if 1 /* this makes mandelbrot example slower with "nvptx" target. Need further investigation. */ /* this makes mandelbrot example slower with "nvptx" target.
const char *__movmsk = g->target->getISA() == Target::NVPTX ? "__movmsk_ptx" : "__movmsk"; * Needs further investigation. */
const char *__movmsk = g->target->getISA() == Target::NVPTX ? "__movmsk_ptx" : "__movmsk";
#else #else
const char *__movmsk = "__movmsk"; const char *__movmsk = "__movmsk";
#endif #endif
// Call the target-dependent movmsk function to turn the vector mask // Call the target-dependent movmsk function to turn the vector mask
// into an i64 value // into an i64 value
std::vector<Symbol *> mm; std::vector<Symbol *> mm;
m->symbolTable->LookupFunction(__movmsk, &mm); m->symbolTable->LookupFunction(__movmsk, &mm);
if (g->target->getMaskBitCount() == 1) if (g->target->getMaskBitCount() == 1)
AssertPos(currentPos, mm.size() == 1); AssertPos(currentPos, mm.size() == 1);
else else
// There should be one with signed int signature, one unsigned int. // There should be one with signed int signature, one unsigned int.
AssertPos(currentPos, mm.size() == 2); AssertPos(currentPos, mm.size() == 2);
// We can actually call either one, since both are i32s as far as // We can actually call either one, since both are i32s as far as
// LLVM's type system is concerned... // LLVM's type system is concerned...
llvm::Function *fmm = mm[0]->function; llvm::Function *fmm = mm[0]->function;
return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk")); return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
} }
#ifdef ISPC_NVPTX_ENABLED
bool lAppendInsertExtractName(llvm::Value *vector, std::string &funcName) bool lAppendInsertExtractName(llvm::Value *vector, std::string &funcName)
{ {
llvm::Type *type = vector->getType(); llvm::Type *type = vector->getType();
@@ -1447,19 +1451,21 @@ FunctionEmitContext::Extract(llvm::Value *vector, llvm::Value *lane)
llvm::Value *ret = llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock()); llvm::Value *ret = llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock());
return ret; return ret;
} }
#endif /* ISPC_NVPTX_ENABLED */
llvm::Value * llvm::Value *
FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) { FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
if (g->target->getISA() == Target::NVPTX) #ifdef ISPC_NVPTX_ENABLED
{ if (g->target->getISA() == Target::NVPTX)
// Compare the two masks to get a vector of i1s {
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, // Compare the two masks to get a vector of i1s
v1, v2, "v1==v2"); llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
return ExtractInst(cmp, 0); /* this works without calling All(..) in PTX. Why ?!? */ v1, v2, "v1==v2");
} return ExtractInst(cmp, 0); /* this works without calling All(..) in PTX. Why ?!? */
else }
{ #endif /* ISPC_NVPTX_ENABLED */
#if 0 #if 0
// Compare the two masks to get a vector of i1s // Compare the two masks to get a vector of i1s
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
@@ -1474,7 +1480,6 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2, return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2,
LLVMGetName("equal", v1, v2)); LLVMGetName("equal", v1, v2));
#endif #endif
}
} }
llvm::Value * llvm::Value *
@@ -1489,6 +1494,8 @@ FunctionEmitContext::ProgramIndexVector(bool is32bits) {
return index; return index;
} }
#ifdef ISPC_NVPTX_ENABLED
llvm::Value * llvm::Value *
FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) { FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
llvm::Function *func_program_index = m->module->getFunction("__program_index"); llvm::Function *func_program_index = m->module->getFunction("__program_index");
@@ -1500,6 +1507,7 @@ FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
#endif #endif
return index; return index;
} }
#endif /* ISPC_NVPTX_ENABLED */
llvm::Value * llvm::Value *
@@ -1919,7 +1927,6 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
if (name == NULL) if (name == NULL)
name = LLVMGetName(value, "_ptr2int"); name = LLVMGetName(value, "_ptr2int");
llvm::Type *type = LLVMTypes::PointerIntType; llvm::Type *type = LLVMTypes::PointerIntType;
llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock); llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock);
AddDebugPos(inst); AddDebugPos(inst);
@@ -3613,75 +3620,8 @@ llvm::Value *
FunctionEmitContext::LaunchInst(llvm::Value *callee, FunctionEmitContext::LaunchInst(llvm::Value *callee,
std::vector<llvm::Value *> &argVals, std::vector<llvm::Value *> &argVals,
llvm::Value *launchCount[3]){ llvm::Value *launchCount[3]){
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() != Target::NVPTX) if (g->target->getISA() == Target::NVPTX)
{
if (callee == NULL) {
AssertPos(currentPos, m->errorCount > 0);
return NULL;
}
launchedTasks = true;
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
llvm::Type *argType =
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
AssertPos(currentPos, llvm::PointerType::classof(argType));
llvm::PointerType *pt =
llvm::dyn_cast<llvm::PointerType>(argType);
AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
llvm::StructType *argStructType =
static_cast<llvm::StructType *>(pt->getElementType());
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
AssertPos(currentPos, falloc != NULL);
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
if (structSize->getType() != LLVMTypes::Int64Type)
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
// targets, SizeOf returns a 32-bit value
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
"struct_size_to_64");
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
std::vector<llvm::Value *> allocArgs;
allocArgs.push_back(launchGroupHandlePtr);
allocArgs.push_back(structSize);
allocArgs.push_back(LLVMInt32(align));
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
llvm::Value *argmem = BitCastInst(voidmem, pt);
// Copy the values of the parameters into the appropriate place in
// the argument block
for (unsigned int i = 0; i < argVals.size(); ++i) {
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
// don't need to do masked store here, I think
StoreInst(argVals[i], ptr);
}
if (argStructType->getNumElements() == argVals.size() + 1) {
// copy in the mask
llvm::Value *mask = GetFullMask();
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
"funarg_mask");
StoreInst(mask, ptr);
}
// And emit the call to the user-supplied task launch function, passing
// a pointer to the task function being called and a pointer to the
// argument block we just filled in
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
AssertPos(currentPos, flaunch != NULL);
std::vector<llvm::Value *> args;
args.push_back(launchGroupHandlePtr);
args.push_back(fptr);
args.push_back(voidmem);
args.push_back(launchCount[0]);
args.push_back(launchCount[1]);
args.push_back(launchCount[2]);
return CallInst(flaunch, NULL, args, "");
}
else /* NVPTX */
{ {
if (callee == NULL) { if (callee == NULL) {
AssertPos(currentPos, m->errorCount > 0); AssertPos(currentPos, m->errorCount > 0);
@@ -3764,38 +3704,79 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
llvm::Value *ret = CallInst(flaunch, NULL, args, ""); llvm::Value *ret = CallInst(flaunch, NULL, args, "");
return ret; return ret;
} }
#endif /* ISPC_NVPTX_ENABLED */
if (callee == NULL) {
AssertPos(currentPos, m->errorCount > 0);
return NULL;
}
launchedTasks = true;
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
llvm::Type *argType =
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
AssertPos(currentPos, llvm::PointerType::classof(argType));
llvm::PointerType *pt =
llvm::dyn_cast<llvm::PointerType>(argType);
AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
llvm::StructType *argStructType =
static_cast<llvm::StructType *>(pt->getElementType());
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
AssertPos(currentPos, falloc != NULL);
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
if (structSize->getType() != LLVMTypes::Int64Type)
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
// targets, SizeOf returns a 32-bit value
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
"struct_size_to_64");
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
std::vector<llvm::Value *> allocArgs;
allocArgs.push_back(launchGroupHandlePtr);
allocArgs.push_back(structSize);
allocArgs.push_back(LLVMInt32(align));
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
llvm::Value *argmem = BitCastInst(voidmem, pt);
// Copy the values of the parameters into the appropriate place in
// the argument block
for (unsigned int i = 0; i < argVals.size(); ++i) {
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
// don't need to do masked store here, I think
StoreInst(argVals[i], ptr);
}
if (argStructType->getNumElements() == argVals.size() + 1) {
// copy in the mask
llvm::Value *mask = GetFullMask();
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
"funarg_mask");
StoreInst(mask, ptr);
}
// And emit the call to the user-supplied task launch function, passing
// a pointer to the task function being called and a pointer to the
// argument block we just filled in
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
AssertPos(currentPos, flaunch != NULL);
std::vector<llvm::Value *> args;
args.push_back(launchGroupHandlePtr);
args.push_back(fptr);
args.push_back(voidmem);
args.push_back(launchCount[0]);
args.push_back(launchCount[1]);
args.push_back(launchCount[2]);
return CallInst(flaunch, NULL, args, "");
} }
void void
FunctionEmitContext::SyncInst() { FunctionEmitContext::SyncInst() {
if (g->target->getISA() != Target::NVPTX) #ifdef ISPC_NVPTX_ENABLED
{ if (g->target->getISA() == Target::NVPTX)
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
llvm::Value *nullPtrValue =
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
llvm::CmpInst::ICMP_NE,
launchGroupHandle, nullPtrValue);
llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
BranchInst(bSync, bPostSync, nonNull);
SetCurrentBasicBlock(bSync);
llvm::Function *fsync = m->module->getFunction("ISPCSync");
if (fsync == NULL)
FATAL("Couldn't find ISPCSync declaration?!");
CallInst(fsync, NULL, launchGroupHandle, "");
// zero out the handle so that if ISPCLaunch is called again in this
// function, it knows it's starting out from scratch
StoreInst(nullPtrValue, launchGroupHandlePtr);
BranchInst(bPostSync);
SetCurrentBasicBlock(bPostSync);
}
else /* NVPTX: don't do test, just call sync */
{ {
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr); llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
llvm::Value *nullPtrValue = llvm::Value *nullPtrValue =
@@ -3805,7 +3786,33 @@ FunctionEmitContext::SyncInst() {
FATAL("Couldn't find ISPCSync declaration?!"); FATAL("Couldn't find ISPCSync declaration?!");
CallInst(fsync, NULL, launchGroupHandle, ""); CallInst(fsync, NULL, launchGroupHandle, "");
StoreInst(nullPtrValue, launchGroupHandlePtr); StoreInst(nullPtrValue, launchGroupHandlePtr);
return;
} }
#endif /* ISPC_NVPTX_ENABLED */
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
llvm::Value *nullPtrValue =
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
llvm::CmpInst::ICMP_NE,
launchGroupHandle, nullPtrValue);
llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
BranchInst(bSync, bPostSync, nonNull);
SetCurrentBasicBlock(bSync);
llvm::Function *fsync = m->module->getFunction("ISPCSync");
if (fsync == NULL)
FATAL("Couldn't find ISPCSync declaration?!");
CallInst(fsync, NULL, launchGroupHandle, "");
// zero out the handle so that if ISPCLaunch is called again in this
// function, it knows it's starting out from scratch
StoreInst(nullPtrValue, launchGroupHandlePtr);
BranchInst(bPostSync);
SetCurrentBasicBlock(bPostSync);
} }

20
ctx.h
View File

@@ -291,21 +291,21 @@ public:
of the mask is on. */ of the mask is on. */
llvm::Value *LaneMask(llvm::Value *mask); llvm::Value *LaneMask(llvm::Value *mask);
/** Given two masks of type LLVMTypes::MaskType, return an i1 value
that indicates whether the two masks are equal. */
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
/** generate constantvector, which contains programindex, i.e.
< i32 0, i32 1, i32 2, i32 3> */
llvm::Value *ProgramIndexVector(bool is32bits = true);
#ifdef ISPC_NVPTX_ENABLED
llvm::Value *ProgramIndexVectorPTX(bool is32bits = true);
/** Issues a call to __insert_int8/int16/int32/int64/float/double */ /** Issues a call to __insert_int8/int16/int32/int64/float/double */
llvm::Value* Insert(llvm::Value *vector, llvm::Value *lane, llvm::Value *scalar); llvm::Value* Insert(llvm::Value *vector, llvm::Value *lane, llvm::Value *scalar);
/** Issues a call to __extract_int8/int16/int32/int64/float/double */ /** Issues a call to __extract_int8/int16/int32/int64/float/double */
llvm::Value* Extract(llvm::Value *vector, llvm::Value *lane); llvm::Value* Extract(llvm::Value *vector, llvm::Value *lane);
#endif
/** Given two masks of type LLVMTypes::MaskType, return an i1 value
that indicates whether the two masks are equal. */
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
/** Generate ConstantVector, which contains ProgramIndex, i.e.
< i32 0, i32 1, i32 2, i32 3> */
llvm::Value *ProgramIndexVector(bool is32bits = true);
llvm::Value *ProgramIndexVectorPTX(bool is32bits = true);
/** Given a string, create an anonymous global variable to hold its /** Given a string, create an anonymous global variable to hold its
value and return the pointer to the string. */ value and return the pointer to the string. */

View File

@@ -168,6 +168,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
retType = lApplyTypeQualifiers(typeQualifiers, retType, pos); retType = lApplyTypeQualifiers(typeQualifiers, retType, pos);
if (soaWidth > 0) { if (soaWidth > 0) {
#ifdef ISPC_NVPTX_ENABLED
#if 0 /* see stmt.cpp in DeclStmt::EmitCode for work-around of SOAType Declaration */ #if 0 /* see stmt.cpp in DeclStmt::EmitCode for work-around of SOAType Declaration */
if (g->target->getISA() == Target::NVPTX) if (g->target->getISA() == Target::NVPTX)
{ {
@@ -175,6 +176,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
return NULL; return NULL;
} }
#endif #endif
#endif /* ISPC_NVPTX_ENABLED */
const StructType *st = CastType<StructType>(retType); const StructType *st = CastType<StructType>(retType);
if (st == NULL) { if (st == NULL) {
@@ -409,6 +411,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
return; return;
} }
#ifdef ISPC_NVPTX_ENABLED
#if 0 /* NVPTX */ #if 0 /* NVPTX */
if (baseType->IsUniformType()) if (baseType->IsUniformType())
{ {
@@ -416,6 +419,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
baseType->IsArrayType() ? " true " : " false "); baseType->IsArrayType() ? " true " : " false ");
} }
#endif #endif
#endif /* ISPC_NVPTX_ENABLED */
const Type *arrayType = new ArrayType(baseType, arraySize); const Type *arrayType = new ArrayType(baseType, arraySize);
if (child != NULL) { if (child != NULL) {
child->InitFromType(arrayType, ds); child->InitFromType(arrayType, ds);
@@ -544,9 +548,9 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
returnType = returnType->ResolveUnboundVariability(Variability::Varying); returnType = returnType->ResolveUnboundVariability(Variability::Varying);
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
bool isExternC = ds && (ds->storageClass == SC_EXTERN_C); bool isExternC = ds && (ds->storageClass == SC_EXTERN_C);
bool isExported = ds && ((ds->typeQualifiers & TYPEQUAL_EXPORT) != 0); bool isExported = ds && ((ds->typeQualifiers & TYPEQUAL_EXPORT) != 0);
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
bool isUnmasked = ds && ((ds->typeQualifiers & TYPEQUAL_UNMASKED) != 0); bool isUnmasked = ds && ((ds->typeQualifiers & TYPEQUAL_UNMASKED) != 0);
if (isExported && isTask) { if (isExported && isTask) {
@@ -555,9 +559,9 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
return; return;
} }
if (isExternC && isTask) { if (isExternC && isTask) {
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" " Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
"qualifiers"); "qualifiers");
return; return;
} }
if (isExternC && isExported) { if (isExternC && isExported) {
Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" " Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" "

View File

@@ -7880,12 +7880,14 @@ SizeOfExpr::TypeCheck() {
"struct type \"%s\".", type->GetString().c_str()); "struct type \"%s\".", type->GetString().c_str());
return NULL; return NULL;
} }
#ifdef ISPC_NVPTX_ENABLED
if (type != NULL) if (type != NULL)
if (g->target->getISA() == Target::NVPTX && type->IsVaryingType()) if (g->target->getISA() == Target::NVPTX && type->IsVaryingType())
{ {
Error(pos, "\"sizeof\" with varying data types is not yet supported with \"nvptx\" target."); Error(pos, "\"sizeof\" with varying data types is not yet supported with \"nvptx\" target.");
return NULL; return NULL;
} }
#endif /* ISPC_NVPTX_ENABLED */
return this; return this;
} }
@@ -8718,11 +8720,13 @@ NewExpr::TypeCheck() {
AssertPos(pos, m->errorCount > 0); AssertPos(pos, m->errorCount > 0);
return NULL; return NULL;
} }
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX && allocType->IsVaryingType()) if (g->target->getISA() == Target::NVPTX && allocType->IsVaryingType())
{ {
Error(pos, "\"new\" with varying data types is not yet supported with \"nvptx\" target."); Error(pos, "\"new\" with varying data types is not yet supported with \"nvptx\" target.");
return NULL; return NULL;
} }
#endif /* ISPC_NVPTX_ENABLED */
if (CastType<UndefinedStructType>(allocType) != NULL) { if (CastType<UndefinedStructType>(allocType) != NULL) {
Error(pos, "Can't dynamically allocate storage for declared " Error(pos, "Can't dynamically allocate storage for declared "
"but not defined type \"%s\".", allocType->GetString().c_str()); "but not defined type \"%s\".", allocType->GetString().c_str());

View File

@@ -47,7 +47,9 @@
#include <stdio.h> #include <stdio.h>
#if defined(LLVM_3_1) || defined(LLVM_3_2) #if defined(LLVM_3_1) || defined(LLVM_3_2)
#ifdef ISPC_NVPTX_ENABLED
#include <llvm/Metadata.h> #include <llvm/Metadata.h>
#endif /* ISPC_NVPTX_ENABLED */
#include <llvm/LLVMContext.h> #include <llvm/LLVMContext.h>
#include <llvm/Module.h> #include <llvm/Module.h>
#include <llvm/Type.h> #include <llvm/Type.h>
@@ -55,7 +57,9 @@
#include <llvm/Intrinsics.h> #include <llvm/Intrinsics.h>
#include <llvm/DerivedTypes.h> #include <llvm/DerivedTypes.h>
#else #else
#ifdef ISPC_NVPTX_ENABLED
#include <llvm/IR/Metadata.h> #include <llvm/IR/Metadata.h>
#endif /* ISPC_NVPTX_ENABLED */
#include <llvm/IR/LLVMContext.h> #include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h> #include <llvm/IR/Module.h>
#include <llvm/IR/Type.h> #include <llvm/IR/Type.h>
@@ -131,7 +135,11 @@ Function::Function(Symbol *s, Stmt *c) {
sym->parentFunction = this; sym->parentFunction = this;
} }
if (type->isTask && g->target->getISA() != Target::NVPTX) { if (type->isTask
#ifdef ISPC_NVPTX_ENABLED
&& (g->target->getISA() != Target::NVPTX)
#endif
){
threadIndexSym = m->symbolTable->LookupVariable("threadIndex"); threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
Assert(threadIndexSym); Assert(threadIndexSym);
threadCountSym = m->symbolTable->LookupVariable("threadCount"); threadCountSym = m->symbolTable->LookupVariable("threadCount");
@@ -242,7 +250,11 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
#endif #endif
const FunctionType *type = CastType<FunctionType>(sym->type); const FunctionType *type = CastType<FunctionType>(sym->type);
Assert(type != NULL); Assert(type != NULL);
if (type->isTask == true && g->target->getISA() != Target::NVPTX) { if (type->isTask == true
#ifdef ISPC_NVPTX_ENABLED
&& (g->target->getISA() != Target::NVPTX)
#endif
){
// For tasks, there should always be three parameters: the // For tasks, there should always be three parameters: the
// pointer to the structure that holds all of the arguments, the // pointer to the structure that holds all of the arguments, the
// thread index, and the thread count variables. // thread index, and the thread count variables.
@@ -340,6 +352,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
ctx->SetFunctionMask(argIter); ctx->SetFunctionMask(argIter);
Assert(++argIter == function->arg_end()); Assert(++argIter == function->arg_end());
} }
#ifdef ISPC_NVPTX_ENABLED
if (type->isTask == true && g->target->getISA() == Target::NVPTX) if (type->isTask == true && g->target->getISA() == Target::NVPTX)
{ {
llvm::NamedMDNode* annotations = llvm::NamedMDNode* annotations =
@@ -350,6 +363,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
av.push_back(LLVMInt32(1)); av.push_back(LLVMInt32(1));
annotations->addOperand(llvm::MDNode::get(*g->ctx, av)); annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
} }
#endif /* ISPC_NVPTX_ENABLED */
} }
// Finally, we can generate code for the function // Finally, we can generate code for the function
@@ -505,15 +519,14 @@ Function::GenerateIR() {
// the application can call it // the application can call it
const FunctionType *type = CastType<FunctionType>(sym->type); const FunctionType *type = CastType<FunctionType>(sym->type);
Assert(type != NULL); Assert(type != NULL);
if (type->isExported) { if (type->isExported) {
if (!type->isTask) { if (!type->isTask) {
llvm::FunctionType *ftype = type->LLVMFunctionType(g->ctx, true); llvm::FunctionType *ftype = type->LLVMFunctionType(g->ctx, true);
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage; llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
std::string functionName = sym->name; std::string functionName = sym->name;
if (g->mangleFunctionsWithTarget) if (g->mangleFunctionsWithTarget)
functionName += std::string("_") + g->target->GetISAString(); functionName += std::string("_") + g->target->GetISAString();
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX) if (g->target->getISA() == Target::NVPTX)
{ {
functionName += std::string("___export"); /* add ___export to the end, for ptxcc to recognize it is exported */ functionName += std::string("___export"); /* add ___export to the end, for ptxcc to recognize it is exported */
@@ -527,6 +540,7 @@ Function::GenerateIR() {
annotations->addOperand(llvm::MDNode::get(*g->ctx, av)); annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
#endif #endif
} }
#endif /* ISPC_NVPTX_ENABLED */
llvm::Function *appFunction = llvm::Function *appFunction =
llvm::Function::Create(ftype, linkage, functionName.c_str(), m->module); llvm::Function::Create(ftype, linkage, functionName.c_str(), m->module);
#if defined(LLVM_3_1) #if defined(LLVM_3_1)
@@ -566,6 +580,7 @@ Function::GenerateIR() {
FATAL("Function verificication failed"); FATAL("Function verificication failed");
} }
} }
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX) if (g->target->getISA() == Target::NVPTX)
{ {
llvm::NamedMDNode* annotations = llvm::NamedMDNode* annotations =
@@ -576,6 +591,7 @@ Function::GenerateIR() {
av.push_back(llvm::ConstantInt::get(llvm::IntegerType::get(*g->ctx,32), 1)); av.push_back(llvm::ConstantInt::get(llvm::IntegerType::get(*g->ctx,32), 1));
annotations->addOperand(llvm::MDNode::get(*g->ctx, av)); annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
} }
#endif /* ISPC_NVPTX_ENABLED */
} }
} }
} }

View File

@@ -247,9 +247,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
arch = "arm"; arch = "arm";
else else
#endif #endif
#ifdef ISPC_NVPTX_ENABLED
if(!strncmp(isa, "nvptx", 5)) if(!strncmp(isa, "nvptx", 5))
arch = "nvptx64"; arch = "nvptx64";
else else
#endif /* ISPC_NVPTX_ENABLED */
arch = "x86-64"; arch = "x86-64";
} }
@@ -587,6 +589,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskBitCount = 32; this->m_maskBitCount = 32;
} }
#endif #endif
#ifdef ISPC_NVPTX_ENABLED
else if (!strcasecmp(isa, "nvptx")) else if (!strcasecmp(isa, "nvptx"))
{ {
this->m_isa = Target::NVPTX; this->m_isa = Target::NVPTX;
@@ -602,6 +605,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasGather = this->m_hasScatter = false; this->m_hasGather = this->m_hasScatter = false;
cpuFromIsa = "sm_35"; cpuFromIsa = "sm_35";
} }
#endif /* ISPC_NVPTX_ENABLED */
else { else {
Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.", Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.",
isa, SupportedTargets()); isa, SupportedTargets());
@@ -720,8 +724,10 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
// Initialize target-specific "target-feature" attribute. // Initialize target-specific "target-feature" attribute.
if (!m_attributes.empty()) { if (!m_attributes.empty()) {
llvm::AttrBuilder attrBuilder; llvm::AttrBuilder attrBuilder;
#ifdef ISPC_NVPTX_ENABLED
if (m_isa != Target::NVPTX) if (m_isa != Target::NVPTX)
attrBuilder.addAttribute("target-cpu", this->m_cpu); #endif
attrBuilder.addAttribute("target-cpu", this->m_cpu);
attrBuilder.addAttribute("target-features", this->m_attributes); attrBuilder.addAttribute("target-features", this->m_attributes);
this->m_tf_attributes = new llvm::AttributeSet( this->m_tf_attributes = new llvm::AttributeSet(
llvm::AttributeSet::get( llvm::AttributeSet::get(
@@ -768,6 +774,9 @@ Target::SupportedTargets() {
return return
#ifdef ISPC_ARM_ENABLED #ifdef ISPC_ARM_ENABLED
"neon-i8x16, neon-i16x8, neon-i32x4, " "neon-i8x16, neon-i16x8, neon-i32x4, "
#endif
#ifdef ISPC_NVPTX_ENABLED
"nvptx, "
#endif #endif
"sse2-i32x4, sse2-i32x8, " "sse2-i32x4, sse2-i32x8, "
"sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, " "sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
@@ -776,7 +785,7 @@ Target::SupportedTargets() {
"avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4 " "avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4 "
"avx2-i32x8, avx2-i32x16, avx2-i64x4, " "avx2-i32x8, avx2-i32x16, avx2-i64x4, "
"generic-x1, generic-x4, generic-x8, generic-x16, " "generic-x1, generic-x4, generic-x8, generic-x16, "
"generic-x32, generic-x64, nvptx"; "generic-x32, generic-x64";
} }
@@ -803,8 +812,10 @@ Target::GetTripleString() const {
triple.setArchName("i386"); triple.setArchName("i386");
else if (m_arch == "x86-64") else if (m_arch == "x86-64")
triple.setArchName("x86_64"); triple.setArchName("x86_64");
#ifdef ISPC_NVPTX_ENABLED
else if (m_arch == "nvptx64") else if (m_arch == "nvptx64")
triple = llvm::Triple("nvptx64", "nvidia", "cuda"); triple = llvm::Triple("nvptx64", "nvidia", "cuda");
#endif /* ISPC_NVPTX_ENABLED */
else else
triple.setArchName(m_arch); triple.setArchName(m_arch);
} }
@@ -837,8 +848,10 @@ Target::ISAToString(ISA isa) {
return "avx2"; return "avx2";
case Target::GENERIC: case Target::GENERIC:
return "generic"; return "generic";
#ifdef ISPC_NVPTX_ENABLED
case Target::NVPTX: case Target::NVPTX:
return "nvptx"; return "nvptx";
#endif /* ISPC_NVPTX_ENABLED */
default: default:
FATAL("Unhandled target in ISAToString()"); FATAL("Unhandled target in ISAToString()");
} }
@@ -877,8 +890,10 @@ Target::ISAToTargetString(ISA isa) {
return "avx2-i32x8"; return "avx2-i32x8";
case Target::GENERIC: case Target::GENERIC:
return "generic-4"; return "generic-4";
#ifdef ISPC_NVPTX_ENABLED
case Target::NVPTX: case Target::NVPTX:
return "nvptx"; return "nvptx";
#endif /* ISPC_NVPTX_ENABLED */
default: default:
FATAL("Unhandled target in ISAToTargetString()"); FATAL("Unhandled target in ISAToTargetString()");
} }

6
ispc.h
View File

@@ -179,7 +179,10 @@ public:
flexible/performant of them will apear last in the enumerant. Note flexible/performant of them will apear last in the enumerant. Note
also that __best_available_isa() needs to be updated if ISAs are also that __best_available_isa() needs to be updated if ISAs are
added or the enumerant values are reordered. */ added or the enumerant values are reordered. */
enum ISA { NVPTX, enum ISA {
#ifdef ISPC_NVPTX_ENABLED
NVPTX,
#endif
#ifdef ISPC_ARM_ENABLED #ifdef ISPC_ARM_ENABLED
NEON32, NEON16, NEON8, NEON32, NEON16, NEON8,
#endif #endif
@@ -611,7 +614,6 @@ struct Globals {
/** Indicates that alignment in memory allocation routines should be /** Indicates that alignment in memory allocation routines should be
forced to have given value. -1 value means natural alignment for the platforms. */ forced to have given value. -1 value means natural alignment for the platforms. */
int forceAlignment; int forceAlignment;
std::string PtxString;
}; };
enum { enum {

View File

@@ -320,10 +320,12 @@ int main(int Argc, char *Argv[]) {
LLVMInitializeARMTargetMC(); LLVMInitializeARMTargetMC();
#endif #endif
#ifdef ISPC_NVPTX_ENABLED
LLVMInitializeNVPTXTargetInfo(); LLVMInitializeNVPTXTargetInfo();
LLVMInitializeNVPTXTarget(); LLVMInitializeNVPTXTarget();
LLVMInitializeNVPTXAsmPrinter(); LLVMInitializeNVPTXAsmPrinter();
LLVMInitializeNVPTXTargetMC(); LLVMInitializeNVPTXTargetMC();
#endif /* ISPC_NVPTX_ENABLED */
char *file = NULL; char *file = NULL;
const char *headerFileName = NULL; const char *headerFileName = NULL;

View File

@@ -58,7 +58,9 @@
#include <set> #include <set>
#include <sstream> #include <sstream>
#include <iostream> #include <iostream>
#ifdef ISPC_NVPTX_ENABLED
#include <map> #include <map>
#endif /* ISPC_NVPTX_ENABLED */
#ifdef ISPC_IS_WINDOWS #ifdef ISPC_IS_WINDOWS
#include <windows.h> #include <windows.h>
#include <io.h> #include <io.h>
@@ -72,7 +74,9 @@
#include <llvm/Instructions.h> #include <llvm/Instructions.h>
#include <llvm/Intrinsics.h> #include <llvm/Intrinsics.h>
#include <llvm/DerivedTypes.h> #include <llvm/DerivedTypes.h>
#ifdef ISPC_NVPTX_ENABLED
#include "llvm/Assembly/AssemblyAnnotationWriter.h" #include "llvm/Assembly/AssemblyAnnotationWriter.h"
#endif /* ISPC_NVPTX_ENABLED */
#else #else
#include <llvm/IR/LLVMContext.h> #include <llvm/IR/LLVMContext.h>
#include <llvm/IR/Module.h> #include <llvm/IR/Module.h>
@@ -80,7 +84,9 @@
#include <llvm/IR/Instructions.h> #include <llvm/IR/Instructions.h>
#include <llvm/IR/Intrinsics.h> #include <llvm/IR/Intrinsics.h>
#include <llvm/IR/DerivedTypes.h> #include <llvm/IR/DerivedTypes.h>
#ifdef ISPC_NVPTX_ENABLED
#include "llvm/Assembly/AssemblyAnnotationWriter.h" #include "llvm/Assembly/AssemblyAnnotationWriter.h"
#endif /* ISPC_NVPTX_ENABLED */
#endif #endif
#include <llvm/PassManager.h> #include <llvm/PassManager.h>
#include <llvm/PassRegistry.h> #include <llvm/PassRegistry.h>
@@ -446,6 +452,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
return; return;
} }
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX && if (g->target->getISA() == Target::NVPTX &&
#if 0 #if 0
!type->IsConstType() && !type->IsConstType() &&
@@ -476,7 +483,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
type = new ArrayType(type->GetAsUniformType(), nel); type = new ArrayType(type->GetAsUniformType(), nel);
#endif #endif
} }
#endif /* ISPC_NVPTX_ENABLED */
llvm::Type *llvmType = type->LLVMType(g->ctx); llvm::Type *llvmType = type->LLVMType(g->ctx);
if (llvmType == NULL) if (llvmType == NULL)
@@ -677,6 +684,7 @@ lCheckExportedParameterTypes(const Type *type, const std::string &name,
} }
} }
#ifdef ISPC_NVPTX_ENABLED
static void static void
lCheckTaskParameterTypes(const Type *type, const std::string &name, lCheckTaskParameterTypes(const Type *type, const std::string &name,
SourcePos pos) { SourcePos pos) {
@@ -691,7 +699,7 @@ lCheckTaskParameterTypes(const Type *type, const std::string &name,
name.c_str()); name.c_str());
} }
} }
#endif /* ISPC_NVPTX_ENABLED */
/** Given a function type, loop through the function parameters and see if /** Given a function type, loop through the function parameters and see if
any are StructTypes. If so, issue an error; this is currently broken any are StructTypes. If so, issue an error; this is currently broken
@@ -849,8 +857,12 @@ Module::AddFunctionDeclaration(const std::string &name,
#else // LLVM 3.1 and 3.3+ #else // LLVM 3.1 and 3.3+
function->addFnAttr(llvm::Attribute::AlwaysInline); function->addFnAttr(llvm::Attribute::AlwaysInline);
#endif #endif
/* evghenii: fails function verification when "if" executed in nvptx target */
if (functionType->isTask && g->target->getISA() != Target::NVPTX) if (functionType->isTask)
#ifdef ISPC_NVPTX_ENABLED
/* evghenii: fails function verification when "if" executed in nvptx target */
if (g->target->getISA() != Target::NVPTX)
#endif /* ISPC_NVPTX_ENABLED */
// This also applies transitively to members I think? // This also applies transitively to members I think?
#if defined(LLVM_3_1) #if defined(LLVM_3_1)
function->setDoesNotAlias(1, true); function->setDoesNotAlias(1, true);
@@ -871,12 +883,14 @@ Module::AddFunctionDeclaration(const std::string &name,
functionType->GetReturnType()->IsVoidType() == false) functionType->GetReturnType()->IsVoidType() == false)
Error(pos, "Task-qualified functions must have void return type."); Error(pos, "Task-qualified functions must have void return type.");
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX && if (g->target->getISA() == Target::NVPTX &&
Type::Equal(functionType->GetReturnType(), AtomicType::Void) == false && Type::Equal(functionType->GetReturnType(), AtomicType::Void) == false &&
functionType->isExported) functionType->isExported)
{ {
Error(pos, "Export-qualified functions must have void return type with \"nvptx\" target."); Error(pos, "Export-qualified functions must have void return type with \"nvptx\" target.");
} }
#endif /* ISPC_NVPTX_ENABLED */
if (functionType->isExported || functionType->isExternC) if (functionType->isExported || functionType->isExternC)
lCheckForStructParameters(functionType, pos); lCheckForStructParameters(functionType, pos);
@@ -897,9 +911,12 @@ Module::AddFunctionDeclaration(const std::string &name,
if (functionType->isExported) { if (functionType->isExported) {
lCheckExportedParameterTypes(argType, argName, argPos); lCheckExportedParameterTypes(argType, argName, argPos);
} }
#ifdef ISPC_NVPTX_ENABLED
if (functionType->isTask) { if (functionType->isTask) {
lCheckTaskParameterTypes(argType, argName, argPos); lCheckTaskParameterTypes(argType, argName, argPos);
} }
#endif /* ISPC_NVPTX_ENABLED */
// ISPC assumes that no pointers alias. (It should be possible to // ISPC assumes that no pointers alias. (It should be possible to
// specify when this is not the case, but this should be the // specify when this is not the case, but this should be the
@@ -1027,24 +1044,28 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
const char *fileType = NULL; const char *fileType = NULL;
switch (outputType) { switch (outputType) {
case Asm: case Asm:
if (g->target->getISA() != Target::NVPTX) #ifdef ISPC_NVPTX_ENABLED
{ if (g->target->getISA() == Target::NVPTX)
if (strcasecmp(suffix, "s")) {
if (strcasecmp(suffix, "ptx"))
fileType = "assembly"; fileType = "assembly";
} }
else else
if (strcasecmp(suffix, "ptx")) #endif /* ISPC_NVPTX_ENABLED */
if (strcasecmp(suffix, "s"))
fileType = "assembly"; fileType = "assembly";
break; break;
case Bitcode: case Bitcode:
if (g->target->getISA() != Target::NVPTX) #ifdef ISPC_NVPTX_ENABLED
{ if (g->target->getISA() == Target::NVPTX)
if (strcasecmp(suffix, "bc")) {
fileType = "LLVM bitcode"; if (strcasecmp(suffix, "ll"))
} fileType = "LLVM assembly";
else }
if (strcasecmp(suffix, "ll")) else
fileType = "LLVM assembly"; #endif /* ISPC_NVPTX_ENABLED */
if (strcasecmp(suffix, "bc"))
fileType = "LLVM bitcode";
break; break;
case Object: case Object:
if (strcasecmp(suffix, "o") && strcasecmp(suffix, "obj")) if (strcasecmp(suffix, "o") && strcasecmp(suffix, "obj"))
@@ -1113,6 +1134,7 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
return writeObjectFileOrAssembly(outputType, outFileName); return writeObjectFileOrAssembly(outputType, outFileName);
} }
#ifdef ISPC_NVPTX_ENABLED
typedef std::vector<std::string> vecString_t; typedef std::vector<std::string> vecString_t;
static vecString_t static vecString_t
lSplitString(const std::string &s) lSplitString(const std::string &s)
@@ -1180,6 +1202,7 @@ lFixAttributes(const vecString_t &src, vecString_t &dst)
dst.push_back(s); dst.push_back(s);
} }
} }
#endif /* ISPC_NVPTX_ENABLED */
bool bool
Module::writeBitcode(llvm::Module *module, const char *outFileName) { Module::writeBitcode(llvm::Module *module, const char *outFileName) {
@@ -1204,11 +1227,8 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
} }
llvm::raw_fd_ostream fos(fd, (fd != 1), false); llvm::raw_fd_ostream fos(fd, (fd != 1), false);
if (g->target->getISA() != Target::NVPTX) #ifdef ISPC_NVPTX_ENABLED
{ if (g->target->getISA() == Target::NVPTX)
llvm::WriteBitcodeToFile(module, fos);
}
else
{ {
/* when using "nvptx" target, emit patched/hacked assembly /* when using "nvptx" target, emit patched/hacked assembly
* NVPTX only accepts 3.2-style LLVM assembly, where attributes * NVPTX only accepts 3.2-style LLVM assembly, where attributes
@@ -1240,7 +1260,9 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
fos << *it; fos << *it;
} }
} }
else
#endif /* ISPC_NVPTX_ENABLED */
llvm::WriteBitcodeToFile(module, fos);
return true; return true;
} }
@@ -2275,6 +2297,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
opts.addMacroDef(g->cppArgs[i].substr(2)); opts.addMacroDef(g->cppArgs[i].substr(2));
} }
} }
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX) if (g->target->getISA() == Target::NVPTX)
{ {
opts.addMacroDef("__NVPTX__"); opts.addMacroDef("__NVPTX__");
@@ -2295,6 +2318,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
opts.addMacroDef("taskCount2=__taskCount2()"); opts.addMacroDef("taskCount2=__taskCount2()");
opts.addMacroDef("taskCount=__taskCount()"); opts.addMacroDef("taskCount=__taskCount()");
} }
#endif /* ISPC_NVPTX_ENABLED */
inst.getLangOpts().LineComment = 1; inst.getLangOpts().LineComment = 1;
#if defined(LLVM_3_5) #if defined(LLVM_3_5)
@@ -2740,6 +2764,7 @@ lCreateDispatchModule(std::map<std::string, FunctionTargetVariants> &functions)
return module; return module;
} }
#ifdef ISPC_NVPTX_ENABLED
static std::string lCBEMangle(const std::string &S) { static std::string lCBEMangle(const std::string &S) {
std::string Result; std::string Result;
@@ -2762,7 +2787,7 @@ static std::string lCBEMangle(const std::string &S) {
} }
return Result; return Result;
} }
#endif /* ISPC_NVPTX_ENABLED */
int int
Module::CompileAndOutput(const char *srcFile, Module::CompileAndOutput(const char *srcFile,
@@ -2778,7 +2803,7 @@ Module::CompileAndOutput(const char *srcFile,
const char *hostStubFileName, const char *hostStubFileName,
const char *devStubFileName) const char *devStubFileName)
{ {
if (target == NULL || strchr(target, ',') == NULL) { if (target == NULL || strchr(target, ',') == NULL) {
// We're only compiling to a single target // We're only compiling to a single target
g->target = new Target(arch, cpu, target, generatePIC); g->target = new Target(arch, cpu, target, generatePIC);
if (!g->target->isValid()) if (!g->target->isValid())
@@ -2786,7 +2811,7 @@ Module::CompileAndOutput(const char *srcFile,
m = new Module(srcFile); m = new Module(srcFile);
if (m->CompileFile() == 0) { if (m->CompileFile() == 0) {
#ifdef ISPC_NVPTX_ENABLED
/* NVPTX: /* NVPTX:
* for PTX target replace '.' with '_' in all global variables * for PTX target replace '.' with '_' in all global variables
* a PTX identifier name must match [a-zA-Z$_][a-zA-Z$_0-9]* * a PTX identifier name must match [a-zA-Z$_][a-zA-Z$_0-9]*
@@ -2811,7 +2836,7 @@ Module::CompileAndOutput(const char *srcFile,
} }
} }
} }
#endif /* ISPC_NVPTX_ENABLED */
if (outputType == CXX) { if (outputType == CXX) {
if (target == NULL || strncmp(target, "generic-", 8) != 0) { if (target == NULL || strncmp(target, "generic-", 8) != 0) {
Error(SourcePos(), "When generating C++ output, one of the \"generic-*\" " Error(SourcePos(), "When generating C++ output, one of the \"generic-*\" "
@@ -3014,5 +3039,4 @@ Module::CompileAndOutput(const char *srcFile,
return errorCount > 0; return errorCount > 0;
} }
return true;
} }

17
opt.cpp
View File

@@ -55,7 +55,9 @@
#include <llvm/Function.h> #include <llvm/Function.h>
#include <llvm/BasicBlock.h> #include <llvm/BasicBlock.h>
#include <llvm/Constants.h> #include <llvm/Constants.h>
#ifdef ISPC_NVPTX_ENABLED
#include <llvm/InlineAsm.h> #include <llvm/InlineAsm.h>
#endif /* ISPC_NVPTX_ENABLED */
#else #else
#include <llvm/IR/Module.h> #include <llvm/IR/Module.h>
#include <llvm/IR/Instructions.h> #include <llvm/IR/Instructions.h>
@@ -63,7 +65,9 @@
#include <llvm/IR/Function.h> #include <llvm/IR/Function.h>
#include <llvm/IR/BasicBlock.h> #include <llvm/IR/BasicBlock.h>
#include <llvm/IR/Constants.h> #include <llvm/IR/Constants.h>
#ifdef ISPC_NVPTX_ENABLED
#include <llvm/IR/InlineAsm.h> #include <llvm/IR/InlineAsm.h>
#endif /* ISPC_NVPTX_ENABLED */
#endif #endif
#if defined (LLVM_3_4) || defined(LLVM_3_5) #if defined (LLVM_3_4) || defined(LLVM_3_5)
#include <llvm/Transforms/Instrumentation.h> #include <llvm/Transforms/Instrumentation.h>
@@ -131,7 +135,9 @@ static llvm::Pass *CreateDebugPass(char * output);
static llvm::Pass *CreateReplaceStdlibShiftPass(); static llvm::Pass *CreateReplaceStdlibShiftPass();
static llvm::Pass *CreateFixBooleanSelectPass(); static llvm::Pass *CreateFixBooleanSelectPass();
#ifdef ISPC_NVPTX_ENABLED
static llvm::Pass *CreatePromoteLocalToPrivatePass(); static llvm::Pass *CreatePromoteLocalToPrivatePass();
#endif /* ISPC_NVPTX_ENABLED */
#define DEBUG_START_PASS(NAME) \ #define DEBUG_START_PASS(NAME) \
if (g->debugPrint && \ if (g->debugPrint && \
@@ -495,9 +501,11 @@ Optimize(llvm::Module *module, int optLevel) {
// run absolutely no optimizations, since the front-end needs us to // run absolutely no optimizations, since the front-end needs us to
// take the various __pseudo_* functions it has emitted and turn // take the various __pseudo_* functions it has emitted and turn
// them into something that can actually execute. // them into something that can actually execute.
optPM.add(CreateImproveMemoryOpsPass(), 100);
#ifdef ISPC_NVPTX_ENABLED
if (g->opt.disableGatherScatterOptimizations == false && if (g->opt.disableGatherScatterOptimizations == false &&
g->target->getVectorWidth() > 1) g->target->getVectorWidth() > 1)
#endif /* ISPC_NVPTX_ENABLED */
optPM.add(CreateImproveMemoryOpsPass(), 100); optPM.add(CreateImproveMemoryOpsPass(), 100);
if (g->opt.disableHandlePseudoMemoryOps == false) if (g->opt.disableHandlePseudoMemoryOps == false)
@@ -579,7 +587,9 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createReassociatePass()); optPM.add(llvm::createReassociatePass());
optPM.add(llvm::createIPConstantPropagationPass()); optPM.add(llvm::createIPConstantPropagationPass());
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() != Target::NVPTX) if (g->target->getISA() != Target::NVPTX)
#endif /* ISPC_NVPTX_ENABLED */
optPM.add(CreateReplaceStdlibShiftPass(),229); optPM.add(CreateReplaceStdlibShiftPass(),229);
optPM.add(llvm::createDeadArgEliminationPass(),230); optPM.add(llvm::createDeadArgEliminationPass(),230);
@@ -693,7 +703,7 @@ Optimize(llvm::Module *module, int optLevel) {
// Should be the last // Should be the last
optPM.add(CreateFixBooleanSelectPass(), 400); optPM.add(CreateFixBooleanSelectPass(), 400);
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX) if (g->target->getISA() == Target::NVPTX)
{ {
optPM.add(CreatePromoteLocalToPrivatePass()); optPM.add(CreatePromoteLocalToPrivatePass());
@@ -799,6 +809,7 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createConstantMergePass()); optPM.add(llvm::createConstantMergePass());
#endif #endif
} }
#endif /* ISPC_NVPTX_ENABLED */
} }
// Finish up by making sure we didn't mess anything up in the IR along // Finish up by making sure we didn't mess anything up in the IR along
@@ -5410,6 +5421,7 @@ CreateFixBooleanSelectPass() {
return new FixBooleanSelectPass(); return new FixBooleanSelectPass();
} }
#ifdef ISPC_NVPTX_ENABLED
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
// Detect addrspace(3) // Detect addrspace(3)
/////////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////////
@@ -5498,4 +5510,5 @@ CreatePromoteLocalToPrivatePass() {
#endif /* ISPC_NVPTX_ENABLED */

View File

@@ -45,11 +45,13 @@ then
# $($LLVMAS $1 -o $TMPDIR/$fbname.bc) && $($LLVMDIS $TMPDIR/$fbname.bc -o $TMPDIR/$fbname.ll) && $($PTXGEN $TMPDIR/$fbname.ll -o $TMPDIR/$fbname.ptx) && \ # $($LLVMAS $1 -o $TMPDIR/$fbname.bc) && $($LLVMDIS $TMPDIR/$fbname.bc -o $TMPDIR/$fbname.ll) && $($PTXGEN $TMPDIR/$fbname.ll -o $TMPDIR/$fbname.ptx) && \
$($PTXGEN $1 -o $TMPDIR/$fbname.ptx) && \ $($PTXGEN $1 -o $TMPDIR/$fbname.ptx) && \
$($PTXCC $TMPDIR/$fbname.ptx -o $TMPDIR/$fbname.o -Xnvcc="-G") && \ $($PTXCC $TMPDIR/$fbname.ptx -o $TMPDIR/$fbname.o -Xnvcc="-G") && \
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS); $(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS) && \
$(/bin/rm -rf $TMPDIR/*$fbname*);
else else
$(sed 's/\.b0/\.b32/g' $1 > $TMPDIR/$fbname) && \ $(sed 's/\.b0/\.b32/g' $1 > $TMPDIR/$fbname) && \
$($PTXCC $TMPDIR/$fbname -o $TMPDIR/$fbname.o -Xnvcc="-G") && \ $($PTXCC $TMPDIR/$fbname -o $TMPDIR/$fbname.o -Xnvcc="-G") && \
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS); $(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS) && \
$(/bin/rm -rf $TMPDIR/*$fbname*);
fi fi

View File

@@ -233,7 +233,7 @@ def run_test(testname):
elif is_nvptx_target: elif is_nvptx_target:
if os.environ.get("NVVM") == "1": if os.environ.get("NVVM") == "1":
is_nvptx_nvvm = True is_nvptx_nvvm = True
obj_name = "%s.bc" % testname obj_name = "%s.ll" % testname
else: else:
obj_name = "%s.ptx" % testname obj_name = "%s.ptx" % testname
is_nvptx_nvvm = False is_nvptx_nvvm = False

700
stmt.cpp
View File

@@ -142,6 +142,7 @@ lHasUnsizedArrays(const Type *type) {
return lHasUnsizedArrays(at->GetElementType()); return lHasUnsizedArrays(at->GetElementType());
} }
#ifdef ISPC_NVPTX_ENABLED
static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value, const SourcePos &currentPos, const bool variable = false) static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value, const SourcePos &currentPos, const bool variable = false)
{ {
if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX) if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX)
@@ -198,6 +199,7 @@ static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *
return value; return value;
} }
#endif /* ISPC_NVPTX_ENABLED */
void void
DeclStmt::EmitCode(FunctionEmitContext *ctx) const { DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
@@ -261,9 +263,8 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
return; return;
} }
if (sym->storageClass == SC_STATIC) { if (sym->storageClass == SC_STATIC) {
#ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType()) if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType())
{ {
Error(sym->pos, Error(sym->pos,
@@ -279,7 +280,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
PerformanceWarning(sym->pos, PerformanceWarning(sym->pos,
"\"const static uniform\" variable ""\"%s\" is stored in __constant address space with ""\"nvptx\" target.", "\"const static uniform\" variable ""\"%s\" is stored in __constant address space with ""\"nvptx\" target.",
sym->name.c_str()); sym->name.c_str());
#endif /* ISPC_NVPTX_ENABLED */
// For static variables, we need a compile-time constant value // For static variables, we need a compile-time constant value
// for its initializer; if there's no initializer, we use a // for its initializer; if there's no initializer, we use a
// zero value. // zero value.
@@ -307,28 +308,38 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
if (cinit == NULL) if (cinit == NULL)
cinit = llvm::Constant::getNullValue(llvmType); cinit = llvm::Constant::getNullValue(llvmType);
// Allocate space for the static variable in global scope, so
// that it persists across function calls
#ifdef ISPC_NVPTX_ENABLED
int addressSpace = 0; int addressSpace = 0;
if (g->target->getISA() == Target::NVPTX && if (g->target->getISA() == Target::NVPTX &&
sym->type->IsConstType() && sym->type->IsConstType() &&
sym->type->IsUniformType()) sym->type->IsUniformType())
addressSpace = 4; addressSpace = 4;
// Allocate space for the static variable in global scope, so
// that it persists across function calls
sym->storagePtr = sym->storagePtr =
new llvm::GlobalVariable(*m->module, llvmType, new llvm::GlobalVariable(*m->module, llvmType,
sym->type->IsConstType(), sym->type->IsConstType(),
llvm::GlobalValue::InternalLinkage, cinit, llvm::GlobalValue::InternalLinkage, cinit,
llvm::Twine("static_") + llvm::Twine("static.") +
llvm::Twine(sym->pos.first_line) + llvm::Twine(sym->pos.first_line) +
llvm::Twine("_") + sym->name.c_str(), llvm::Twine(".") + sym->name.c_str(),
NULL, NULL,
llvm::GlobalVariable::NotThreadLocal, llvm::GlobalVariable::NotThreadLocal,
addressSpace); addressSpace);
sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos); sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
#else /* ISPC_NVPTX_ENABLED */
sym->storagePtr =
new llvm::GlobalVariable(*m->module, llvmType,
sym->type->IsConstType(),
llvm::GlobalValue::InternalLinkage, cinit,
llvm::Twine("static.") +
llvm::Twine(sym->pos.first_line) +
llvm::Twine(".") + sym->name.c_str());
#endif /* ISPC_NVPTX_ENABLED */
// Tell the FunctionEmitContext about the variable // Tell the FunctionEmitContext about the variable
ctx->EmitVariableDebugInfo(sym); ctx->EmitVariableDebugInfo(sym);
} }
#ifdef ISPC_NVPTX_ENABLED
else if ((sym->type->IsUniformType() || sym->type->IsSOAType()) && else if ((sym->type->IsUniformType() || sym->type->IsSOAType()) &&
/* NVPTX: /* NVPTX:
* only non-constant uniform data types are stored in shared memory * only non-constant uniform data types are stored in shared memory
@@ -396,6 +407,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
sym->parentFunction = ctx->GetFunction(); sym->parentFunction = ctx->GetFunction();
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos); InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
} }
#endif /* ISPC_NVPTX_ENABLED */
else else
{ {
// For non-static variables, allocate storage on the stack // For non-static variables, allocate storage on the stack
@@ -404,7 +416,6 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
// Tell the FunctionEmitContext about the variable; must do // Tell the FunctionEmitContext about the variable; must do
// this before the initializer stuff. // this before the initializer stuff.
ctx->EmitVariableDebugInfo(sym); ctx->EmitVariableDebugInfo(sym);
if (initExpr == 0 && sym->type->IsConstType()) if (initExpr == 0 && sym->type->IsConstType())
Error(sym->pos, "Missing initializer for const variable " Error(sym->pos, "Missing initializer for const variable "
"\"%s\".", sym->name.c_str()); "\"%s\".", sym->name.c_str());
@@ -412,7 +423,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
// And then get it initialized... // And then get it initialized...
sym->parentFunction = ctx->GetFunction(); sym->parentFunction = ctx->GetFunction();
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos); InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
} }
} }
} }
@@ -571,7 +582,7 @@ IfStmt::EmitCode(FunctionEmitContext *ctx) const {
if (testValue == NULL) if (testValue == NULL)
return; return;
#ifdef ISPC_NVPTX_ENABLED
#if 0 #if 0
if (!isUniform && g->target->getISA() == Target::NVPTX) if (!isUniform && g->target->getISA() == Target::NVPTX)
{ {
@@ -582,7 +593,7 @@ IfStmt::EmitCode(FunctionEmitContext *ctx) const {
isUniform = true; isUniform = true;
} }
#endif #endif
#endif /* ISPC_NVPTX_ENABLED */
if (isUniform) { if (isUniform) {
ctx->StartUniformIf(); ctx->StartUniformIf();
@@ -865,11 +876,17 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
// Do any of the program instances want to run the 'true' // Do any of the program instances want to run the 'true'
// block? If not, jump ahead to bNext. // block? If not, jump ahead to bNext.
#if 1
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask()); #ifdef ISPC_NVPTX_ENABLED
#else #if 0
llvm::Value *maskAnyTrueQ = ctx->ExtractInst(ctx->GetFullMask(),0); llvm::Value *maskAnyTrueQ = ctx->ExtractInst(ctx->GetFullMask(),0);
#else
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
#endif #endif
#else /* ISPC_NVPTX_ENABLED */
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
#endif /* ISPC_NVPTX_ENABLED */
ctx->BranchInst(bRunTrue, bNext, maskAnyTrueQ); ctx->BranchInst(bRunTrue, bNext, maskAnyTrueQ);
// Emit statements for true // Emit statements for true
@@ -886,11 +903,16 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
// Similarly, check to see if any of the instances want to // Similarly, check to see if any of the instances want to
// run the 'false' block... // run the 'false' block...
#if 1
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask()); #ifdef ISPC_NVPTX_ENABLED
#else #if 0
llvm::Value *maskAnyFalseQ = ctx->ExtractInst(ctx->GetFullMask(),0); llvm::Value *maskAnyFalseQ = ctx->ExtractInst(ctx->GetFullMask(),0);
#else
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
#endif #endif
#else /* ISPC_NVPTX_ENABLED */
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
#endif /* ISPC_NVPTX_ENABLED */
ctx->BranchInst(bRunFalse, bDone, maskAnyFalseQ); ctx->BranchInst(bRunFalse, bDone, maskAnyFalseQ);
// Emit code for false // Emit code for false
@@ -1450,10 +1472,96 @@ static llvm::Value *
lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx, lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
llvm::Value *uniformCounterPtr, llvm::Value *uniformCounterPtr,
llvm::Value *varyingCounterPtr, llvm::Value *varyingCounterPtr,
const std::vector<int> &spans) const std::vector<int> &spans) {
{ #ifdef ISPC_NVPTX_ENABLED
if (g->target->getISA() != Target::NVPTX) if (g->target->getISA() == Target::NVPTX)
{ {
// Smear the uniform counter value out to be varying
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
llvm::Value *smearCounter = ctx->BroadcastValue(
counter, LLVMTypes::Int32VectorType, "smear_counter");
// Figure out the offsets; this is a little bit tricky. As an example,
// consider a 2D tiled foreach loop, where we're running 8-wide and
// where the inner dimension has a stride of 4 and the outer dimension
// has a stride of 2. For the inner dimension, we want the offsets
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
// (0,0,0,0,1,1,1,1).
int32_t delta[ISPC_MAX_NVEC];
const int vecWidth = 32;
std::vector<llvm::Constant*> constDeltaList;
for (int i = 0; i < vecWidth; ++i)
{
int d = i;
// First, account for the effect of any dimensions at deeper
// nesting levels than the current one.
int prevDimSpanCount = 1;
for (int j = dim; j < nDims-1; ++j)
prevDimSpanCount *= spans[j+1];
d /= prevDimSpanCount;
// And now with what's left, figure out our own offset
delta[i] = d % spans[dim];
constDeltaList.push_back(LLVMInt8(delta[i]));
}
llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
// llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
llvm::GlobalVariable* globalDelta = new llvm::GlobalVariable(
/*Module=*/*m->module,
/*Type=*/ArrayDelta,
/*isConstant=*/true,
/*Linkage=*/llvm::GlobalValue::PrivateLinkage,
/*Initializer=*/0, // has initializer, specified below
/*Name=*/"constDeltaForeach");
#if 0
/*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
/*unsigned AddressSpace=*/4 /*constant*/);
#endif
llvm::Constant* constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
globalDelta->setInitializer(constDelta);
llvm::Function *func_program_index = m->module->getFunction("__program_index");
llvm::Value *laneIdx = ctx->CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__programIndex");
std::vector<llvm::Value*> ptr_arrayidx_indices;
ptr_arrayidx_indices.push_back(LLVMInt32(0));
ptr_arrayidx_indices.push_back(laneIdx);
#if 1
llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
llvm::LoadInst* int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
llvm::Value * int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
llvm::VectorType* VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
llvm::UndefValue* const_packed_41 = llvm::UndefValue::get(VectorTy_2);
llvm::InsertElementInst* packed_43 = llvm::InsertElementInst::Create(
// llvm::UndefValue(LLVMInt32Vector),
const_packed_41,
int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
#endif
// Add the deltas to compute the varying counter values; store the
// result to memory and then return it directly as well.
#if 0
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
LLVMInt32Vector(delta), "iter_val");
#else
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
packed_43, "iter_val");
#endif
ctx->StoreInst(varyingCounter, varyingCounterPtr);
return varyingCounter;
}
#endif /* ISPC_NVPTX_ENABLED */
// Smear the uniform counter value out to be varying // Smear the uniform counter value out to be varying
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr); llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
llvm::Value *smearCounter = ctx->BroadcastValue( llvm::Value *smearCounter = ctx->BroadcastValue(
@@ -1486,93 +1594,6 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
LLVMInt32Vector(delta), "iter_val"); LLVMInt32Vector(delta), "iter_val");
ctx->StoreInst(varyingCounter, varyingCounterPtr); ctx->StoreInst(varyingCounter, varyingCounterPtr);
return varyingCounter; return varyingCounter;
}
else /* NVPTX == true */
{
// Smear the uniform counter value out to be varying
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
llvm::Value *smearCounter = ctx->BroadcastValue(
counter, LLVMTypes::Int32VectorType, "smear_counter");
// Figure out the offsets; this is a little bit tricky. As an example,
// consider a 2D tiled foreach loop, where we're running 8-wide and
// where the inner dimension has a stride of 4 and the outer dimension
// has a stride of 2. For the inner dimension, we want the offsets
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
// (0,0,0,0,1,1,1,1).
int32_t delta[ISPC_MAX_NVEC];
const int vecWidth = 32;
std::vector<llvm::Constant*> constDeltaList;
for (int i = 0; i < vecWidth; ++i)
{
int d = i;
// First, account for the effect of any dimensions at deeper
// nesting levels than the current one.
int prevDimSpanCount = 1;
for (int j = dim; j < nDims-1; ++j)
prevDimSpanCount *= spans[j+1];
d /= prevDimSpanCount;
// And now with what's left, figure out our own offset
delta[i] = d % spans[dim];
constDeltaList.push_back(LLVMInt8(delta[i]));
}
llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
// llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
llvm::GlobalVariable* globalDelta = new llvm::GlobalVariable(
/*Module=*/*m->module,
/*Type=*/ArrayDelta,
/*isConstant=*/true,
/*Linkage=*/llvm::GlobalValue::PrivateLinkage,
/*Initializer=*/0, // has initializer, specified below
/*Name=*/"constDeltaForeach");
#if 0
/*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
/*unsigned AddressSpace=*/4 /*constant*/);
#endif
llvm::Constant* constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
globalDelta->setInitializer(constDelta);
llvm::Function *func_program_index = m->module->getFunction("__program_index");
llvm::Value *laneIdx = ctx->CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__programIndex");
std::vector<llvm::Value*> ptr_arrayidx_indices;
ptr_arrayidx_indices.push_back(LLVMInt32(0));
ptr_arrayidx_indices.push_back(laneIdx);
#if 1
llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
llvm::LoadInst* int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
llvm::Value * int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
llvm::VectorType* VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
llvm::UndefValue* const_packed_41 = llvm::UndefValue::get(VectorTy_2);
llvm::InsertElementInst* packed_43 = llvm::InsertElementInst::Create(
// llvm::UndefValue(LLVMInt32Vector),
const_packed_41,
int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
#endif
// Add the deltas to compute the varying counter values; store the
// result to memory and then return it directly as well.
#if 0
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
LLVMInt32Vector(delta), "iter_val");
#else
llvm::Value *varyingCounter =
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
packed_43, "iter_val");
#endif
ctx->StoreInst(varyingCounter, varyingCounterPtr);
return varyingCounter;
}
} }
@@ -1650,7 +1671,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// This should be caught during typechecking // This should be caught during typechecking
AssertPos(pos, startExprs.size() == dimVariables.size() && AssertPos(pos, startExprs.size() == dimVariables.size() &&
endExprs.size() == dimVariables.size()); endExprs.size() == dimVariables.size());
int nDims = (int)dimVariables.size(); int nDims = (int)dimVariables.size();
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
@@ -1661,66 +1682,70 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs; std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
std::vector<int> span(nDims, 0); std::vector<int> span(nDims, 0);
#ifdef ISPC_NVPTX_ENABLED
const int vectorWidth = const int vectorWidth =
g->target->getISA() == Target::NVPTX ? 32 : g->target->getVectorWidth(); g->target->getISA() == Target::NVPTX ? 32 : g->target->getVectorWidth();
lGetSpans(nDims-1, nDims, vectorWidth, isTiled, &span[0]); lGetSpans(nDims-1, nDims, vectorWidth, isTiled, &span[0]);
#else /* ISPC_NVPTX_ENABLED */
lGetSpans(nDims-1, nDims, g->target->getVectorWidth(), isTiled, &span[0]);
#endif /* ISPC_NVPTX_ENABLED */
for (int i = 0; i < nDims; ++i) { for (int i = 0; i < nDims; ++i) {
// Basic blocks that we'll fill in later with the looping logic for // Basic blocks that we'll fill in later with the looping logic for
// this dimension. // this dimension.
bbReset.push_back(ctx->CreateBasicBlock("foreach_reset")); bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
if (i < nDims-1) if (i < nDims-1)
// stepping for the innermost dimension is handled specially // stepping for the innermost dimension is handled specially
bbStep.push_back(ctx->CreateBasicBlock("foreach_step")); bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
bbTest.push_back(ctx->CreateBasicBlock("foreach_test")); bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
// Start and end value for this loop dimension // Start and end value for this loop dimension
llvm::Value *sv = startExprs[i]->GetValue(ctx); llvm::Value *sv = startExprs[i]->GetValue(ctx);
llvm::Value *ev = endExprs[i]->GetValue(ctx); llvm::Value *ev = endExprs[i]->GetValue(ctx);
if (sv == NULL || ev == NULL) if (sv == NULL || ev == NULL)
return; return;
startVals.push_back(sv); startVals.push_back(sv);
endVals.push_back(ev); endVals.push_back(ev);
// nItems = endVal - startVal // nItems = endVal - startVal
llvm::Value *nItems = llvm::Value *nItems =
ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems"); ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
// nExtras = nItems % (span for this dimension) // nExtras = nItems % (span for this dimension)
// This gives us the number of extra elements we need to deal with // This gives us the number of extra elements we need to deal with
// at the end of the loop for this dimension that don't fit cleanly // at the end of the loop for this dimension that don't fit cleanly
// into a vector width. // into a vector width.
nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems, nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems,
LLVMInt32(span[i]), "nextras")); LLVMInt32(span[i]), "nextras"));
// alignedEnd = endVal - nExtras // alignedEnd = endVal - nExtras
alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev, alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
nExtras[i], "aligned_end")); nExtras[i], "aligned_end"));
/////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////
// Each dimension has a loop counter that is a uniform value that // Each dimension has a loop counter that is a uniform value that
// goes from startVal to endVal, in steps of the span for this // goes from startVal to endVal, in steps of the span for this
// dimension. Its value is only used internally here for looping // dimension. Its value is only used internally here for looping
// logic and isn't directly available in the user's program code. // logic and isn't directly available in the user's program code.
uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type, uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
"counter")); "counter"));
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]); ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
// There is also a varying variable that holds the set of index // There is also a varying variable that holds the set of index
// values for each dimension in the current loop iteration; this is // values for each dimension in the current loop iteration; this is
// the value that is program-visible. // the value that is program-visible.
dimVariables[i]->storagePtr = dimVariables[i]->storagePtr =
ctx->AllocaInst(LLVMTypes::Int32VectorType, ctx->AllocaInst(LLVMTypes::Int32VectorType,
dimVariables[i]->name.c_str()); dimVariables[i]->name.c_str());
dimVariables[i]->parentFunction = ctx->GetFunction(); dimVariables[i]->parentFunction = ctx->GetFunction();
ctx->EmitVariableDebugInfo(dimVariables[i]); ctx->EmitVariableDebugInfo(dimVariables[i]);
// Each dimension also maintains a mask that represents which of // Each dimension also maintains a mask that represents which of
// the varying elements in the current iteration should be // the varying elements in the current iteration should be
// processed. (i.e. this is used to disable the lanes that have // processed. (i.e. this is used to disable the lanes that have
// out-of-bounds offsets.) // out-of-bounds offsets.)
extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask")); extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]); ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
} }
ctx->StartForeach(FunctionEmitContext::FOREACH_REGULAR); ctx->StartForeach(FunctionEmitContext::FOREACH_REGULAR);
@@ -1733,14 +1758,14 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// a given dimension in preparation for running through its loop again, // a given dimension in preparation for running through its loop again,
// after the enclosing level advances its counter. // after the enclosing level advances its counter.
for (int i = 0; i < nDims; ++i) { for (int i = 0; i < nDims; ++i) {
ctx->SetCurrentBasicBlock(bbReset[i]); ctx->SetCurrentBasicBlock(bbReset[i]);
if (i == 0) if (i == 0)
ctx->BranchInst(bbExit); ctx->BranchInst(bbExit);
else { else {
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]); ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]); ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
ctx->BranchInst(bbStep[i-1]); ctx->BranchInst(bbStep[i-1]);
} }
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@@ -1750,67 +1775,67 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// this for the innermost dimension, which has a more complex stepping // this for the innermost dimension, which has a more complex stepping
// structure.. // structure..
for (int i = 0; i < nDims-1; ++i) { for (int i = 0; i < nDims-1; ++i) {
ctx->SetCurrentBasicBlock(bbStep[i]); ctx->SetCurrentBasicBlock(bbStep[i]);
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]); llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
llvm::Value *newCounter = llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter, ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[i]), "new_counter"); LLVMInt32(span[i]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[i]); ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
ctx->BranchInst(bbTest[i]); ctx->BranchInst(bbTest[i]);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// foreach_test (for all dimensions other than the innermost...) // foreach_test (for all dimensions other than the innermost...)
std::vector<llvm::Value *> inExtras; std::vector<llvm::Value *> inExtras;
for (int i = 0; i < nDims-1; ++i) { for (int i = 0; i < nDims-1; ++i) {
ctx->SetCurrentBasicBlock(bbTest[i]); ctx->SetCurrentBasicBlock(bbTest[i]);
llvm::Value *haveExtras = llvm::Value *haveExtras =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT, ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
endVals[i], alignedEnd[i], "have_extras"); endVals[i], alignedEnd[i], "have_extras");
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter"); llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
llvm::Value *atAlignedEnd = llvm::Value *atAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
counter, alignedEnd[i], "at_aligned_end"); counter, alignedEnd[i], "at_aligned_end");
llvm::Value *inEx = llvm::Value *inEx =
ctx->BinaryOperator(llvm::Instruction::And, haveExtras, ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
atAlignedEnd, "in_extras"); atAlignedEnd, "in_extras");
if (i == 0) if (i == 0)
inExtras.push_back(inEx); inExtras.push_back(inEx);
else else
inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx, inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
inExtras[i-1], "in_extras_all")); inExtras[i-1], "in_extras_all"));
llvm::Value *varyingCounter = llvm::Value *varyingCounter =
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i], lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
dimVariables[i]->storagePtr, span); dimVariables[i]->storagePtr, span);
llvm::Value *smearEnd = ctx->BroadcastValue( llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[i], LLVMTypes::Int32VectorType, "smear_end"); endVals[i], LLVMTypes::Int32VectorType, "smear_end");
// Do a vector compare of its value to the end value to generate a // Do a vector compare of its value to the end value to generate a
// mask for this last bit of work. // mask for this last bit of work.
llvm::Value *emask = llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd); varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask); emask = ctx->I1VecToBoolVec(emask);
if (i == 0) if (i == 0)
ctx->StoreInst(emask, extrasMaskPtrs[i]); ctx->StoreInst(emask, extrasMaskPtrs[i]);
else { else {
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]); llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
llvm::Value *newMask = llvm::Value *newMask =
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask, ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
"extras_mask"); "extras_mask");
ctx->StoreInst(newMask, extrasMaskPtrs[i]); ctx->StoreInst(newMask, extrasMaskPtrs[i]);
} }
llvm::Value *notAtEnd = llvm::Value *notAtEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, endVals[i]); counter, endVals[i]);
ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd); ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@@ -1847,18 +1872,18 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// (i.e. processing extra elements that don't exactly fit into a // (i.e. processing extra elements that don't exactly fit into a
// vector). // vector).
llvm::BasicBlock *bbOuterInExtras = llvm::BasicBlock *bbOuterInExtras =
ctx->CreateBasicBlock("outer_in_extras"); ctx->CreateBasicBlock("outer_in_extras");
llvm::BasicBlock *bbOuterNotInExtras = llvm::BasicBlock *bbOuterNotInExtras =
ctx->CreateBasicBlock("outer_not_in_extras"); ctx->CreateBasicBlock("outer_not_in_extras");
ctx->SetCurrentBasicBlock(bbTest[nDims-1]); ctx->SetCurrentBasicBlock(bbTest[nDims-1]);
if (inExtras.size()) if (inExtras.size())
ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras, ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras,
inExtras.back()); inExtras.back());
else else
// for a 1D iteration domain, we certainly don't have any enclosing // for a 1D iteration domain, we certainly don't have any enclosing
// dimensions that are processing extra elements. // dimensions that are processing extra elements.
ctx->BranchInst(bbOuterNotInExtras); ctx->BranchInst(bbOuterNotInExtras);
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// One or more outer dimensions in extras, so we need to mask for the loop // One or more outer dimensions in extras, so we need to mask for the loop
@@ -1873,21 +1898,21 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// // run loop body with mask // // run loop body with mask
// } // }
llvm::BasicBlock *bbAllInnerPartialOuter = llvm::BasicBlock *bbAllInnerPartialOuter =
ctx->CreateBasicBlock("all_inner_partial_outer"); ctx->CreateBasicBlock("all_inner_partial_outer");
llvm::BasicBlock *bbPartial = llvm::BasicBlock *bbPartial =
ctx->CreateBasicBlock("both_partial"); ctx->CreateBasicBlock("both_partial");
ctx->SetCurrentBasicBlock(bbOuterInExtras); { ctx->SetCurrentBasicBlock(bbOuterInExtras); {
// Update the varying counter value here, since all subsequent // Update the varying counter value here, since all subsequent
// blocks along this path need it. // blocks along this path need it.
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1], lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span); dimVariables[nDims-1]->storagePtr, span);
// here we just check to see if counter < alignedEnd // here we just check to see if counter < alignedEnd
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter"); llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeAlignedEnd = llvm::Value *beforeAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, alignedEnd[nDims-1], "before_aligned_end"); counter, alignedEnd[nDims-1], "before_aligned_end");
ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd); ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
} }
// Below we have a basic block that runs the loop body code for the // Below we have a basic block that runs the loop body code for the
@@ -1906,53 +1931,53 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// should step the loop counter for the next enclosing dimension // should step the loop counter for the next enclosing dimension
// instead. // instead.
llvm::Value *stepIndexAfterMaskedBodyPtr = llvm::Value *stepIndexAfterMaskedBodyPtr =
ctx->AllocaInst(LLVMTypes::BoolType, "step_index"); ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// We're in the inner loop part where the only masking is due to outer // We're in the inner loop part where the only masking is due to outer
// dimensions but the innermost dimension fits fully into a vector's // dimensions but the innermost dimension fits fully into a vector's
// width. Set the mask and jump to the masked loop body. // width. Set the mask and jump to the masked loop body.
ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); { ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); {
llvm::Value *mask; llvm::Value *mask;
if (nDims == 1) if (nDims == 1)
// 1D loop; we shouldn't ever get here anyway // 1D loop; we shouldn't ever get here anyway
mask = LLVMMaskAllOff; mask = LLVMMaskAllOff;
else else
mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]); mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
ctx->SetInternalMask(mask); ctx->SetInternalMask(mask);
ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr); ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody); ctx->BranchInst(bbMaskedBody);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// We need to include the effect of the innermost dimension in the mask // We need to include the effect of the innermost dimension in the mask
// for the final bits here // for the final bits here
ctx->SetCurrentBasicBlock(bbPartial); { ctx->SetCurrentBasicBlock(bbPartial); {
llvm::Value *varyingCounter = llvm::Value *varyingCounter =
ctx->LoadInst(dimVariables[nDims-1]->storagePtr); ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
llvm::Value *smearEnd = ctx->BroadcastValue( llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end"); endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
llvm::Value *emask = llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd); varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask); emask = ctx->I1VecToBoolVec(emask);
if (nDims == 1) { if (nDims == 1) {
ctx->SetInternalMask(emask); ctx->SetInternalMask(emask);
} }
else { else {
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]); llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
llvm::Value *newMask = llvm::Value *newMask =
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask, ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
"extras_mask"); "extras_mask");
ctx->SetInternalMask(newMask); ctx->SetInternalMask(newMask);
} }
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr); ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody); ctx->BranchInst(bbMaskedBody);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@@ -1968,14 +1993,14 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// // run loop body with mask // // run loop body with mask
// } // }
llvm::BasicBlock *bbPartialInnerAllOuter = llvm::BasicBlock *bbPartialInnerAllOuter =
ctx->CreateBasicBlock("partial_inner_all_outer"); ctx->CreateBasicBlock("partial_inner_all_outer");
ctx->SetCurrentBasicBlock(bbOuterNotInExtras); { ctx->SetCurrentBasicBlock(bbOuterNotInExtras); {
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter"); llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeAlignedEnd = llvm::Value *beforeAlignedEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, alignedEnd[nDims-1], "before_aligned_end"); counter, alignedEnd[nDims-1], "before_aligned_end");
ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter, ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter,
beforeAlignedEnd); beforeAlignedEnd);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@@ -1985,26 +2010,26 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// value of the varying loop counter and have the statements in the // value of the varying loop counter and have the statements in the
// loop body emit their code. // loop body emit their code.
llvm::BasicBlock *bbFullBodyContinue = llvm::BasicBlock *bbFullBodyContinue =
ctx->CreateBasicBlock("foreach_full_continue"); ctx->CreateBasicBlock("foreach_full_continue");
ctx->SetCurrentBasicBlock(bbFullBody); { ctx->SetCurrentBasicBlock(bbFullBody); {
ctx->SetInternalMask(LLVMMaskAllOn); ctx->SetInternalMask(LLVMMaskAllOn);
ctx->SetBlockEntryMask(LLVMMaskAllOn); ctx->SetBlockEntryMask(LLVMMaskAllOn);
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1], lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span); dimVariables[nDims-1]->storagePtr, span);
ctx->SetContinueTarget(bbFullBodyContinue); ctx->SetContinueTarget(bbFullBodyContinue);
ctx->AddInstrumentationPoint("foreach loop body (all on)"); ctx->AddInstrumentationPoint("foreach loop body (all on)");
stmts->EmitCode(ctx); stmts->EmitCode(ctx);
AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL); AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
ctx->BranchInst(bbFullBodyContinue); ctx->BranchInst(bbFullBodyContinue);
} }
ctx->SetCurrentBasicBlock(bbFullBodyContinue); { ctx->SetCurrentBasicBlock(bbFullBodyContinue); {
ctx->RestoreContinuedLanes(); ctx->RestoreContinuedLanes();
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]); llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
llvm::Value *newCounter = llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter, ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[nDims-1]), "new_counter"); LLVMInt32(span[nDims-1]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]); ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
ctx->BranchInst(bbOuterNotInExtras); ctx->BranchInst(bbOuterNotInExtras);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@@ -2012,33 +2037,33 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// less than the end value, in which case we need to run the body one // less than the end value, in which case we need to run the body one
// more time to get the extra bits. // more time to get the extra bits.
llvm::BasicBlock *bbSetInnerMask = llvm::BasicBlock *bbSetInnerMask =
ctx->CreateBasicBlock("partial_inner_only"); ctx->CreateBasicBlock("partial_inner_only");
ctx->SetCurrentBasicBlock(bbPartialInnerAllOuter); { ctx->SetCurrentBasicBlock(bbPartialInnerAllOuter); {
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter"); llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
llvm::Value *beforeFullEnd = llvm::Value *beforeFullEnd =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
counter, endVals[nDims-1], "before_full_end"); counter, endVals[nDims-1], "before_full_end");
ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd); ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// The outer dimensions are all on, so the mask is just given by the // The outer dimensions are all on, so the mask is just given by the
// mask for the innermost dimension // mask for the innermost dimension
ctx->SetCurrentBasicBlock(bbSetInnerMask); { ctx->SetCurrentBasicBlock(bbSetInnerMask); {
llvm::Value *varyingCounter = llvm::Value *varyingCounter =
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1], lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
dimVariables[nDims-1]->storagePtr, span); dimVariables[nDims-1]->storagePtr, span);
llvm::Value *smearEnd = ctx->BroadcastValue( llvm::Value *smearEnd = ctx->BroadcastValue(
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end"); endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
llvm::Value *emask = llvm::Value *emask =
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT, ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
varyingCounter, smearEnd); varyingCounter, smearEnd);
emask = ctx->I1VecToBoolVec(emask); emask = ctx->I1VecToBoolVec(emask);
ctx->SetInternalMask(emask); ctx->SetInternalMask(emask);
ctx->SetBlockEntryMask(emask); ctx->SetBlockEntryMask(emask);
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr); ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbMaskedBody); ctx->BranchInst(bbMaskedBody);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@@ -2048,34 +2073,34 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
// mask known to be all-on, which in turn leads to more efficient code // mask known to be all-on, which in turn leads to more efficient code
// for that case. // for that case.
llvm::BasicBlock *bbStepInnerIndex = llvm::BasicBlock *bbStepInnerIndex =
ctx->CreateBasicBlock("step_inner_index"); ctx->CreateBasicBlock("step_inner_index");
llvm::BasicBlock *bbMaskedBodyContinue = llvm::BasicBlock *bbMaskedBodyContinue =
ctx->CreateBasicBlock("foreach_masked_continue"); ctx->CreateBasicBlock("foreach_masked_continue");
ctx->SetCurrentBasicBlock(bbMaskedBody); { ctx->SetCurrentBasicBlock(bbMaskedBody); {
ctx->AddInstrumentationPoint("foreach loop body (masked)"); ctx->AddInstrumentationPoint("foreach loop body (masked)");
ctx->SetContinueTarget(bbMaskedBodyContinue); ctx->SetContinueTarget(bbMaskedBodyContinue);
ctx->DisableGatherScatterWarnings(); ctx->DisableGatherScatterWarnings();
ctx->SetBlockEntryMask(ctx->GetFullMask()); ctx->SetBlockEntryMask(ctx->GetFullMask());
stmts->EmitCode(ctx); stmts->EmitCode(ctx);
ctx->EnableGatherScatterWarnings(); ctx->EnableGatherScatterWarnings();
ctx->BranchInst(bbMaskedBodyContinue); ctx->BranchInst(bbMaskedBodyContinue);
} }
ctx->SetCurrentBasicBlock(bbMaskedBodyContinue); { ctx->SetCurrentBasicBlock(bbMaskedBodyContinue); {
ctx->RestoreContinuedLanes(); ctx->RestoreContinuedLanes();
llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr); llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex); ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// step the innermost index, for the case where we're doing the // step the innermost index, for the case where we're doing the
// innermost for loop over full vectors. // innermost for loop over full vectors.
ctx->SetCurrentBasicBlock(bbStepInnerIndex); { ctx->SetCurrentBasicBlock(bbStepInnerIndex); {
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]); llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
llvm::Value *newCounter = llvm::Value *newCounter =
ctx->BinaryOperator(llvm::Instruction::Add, counter, ctx->BinaryOperator(llvm::Instruction::Add, counter,
LLVMInt32(span[nDims-1]), "new_counter"); LLVMInt32(span[nDims-1]), "new_counter");
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]); ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
ctx->BranchInst(bbOuterInExtras); ctx->BranchInst(bbOuterInExtras);
} }
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
@@ -2262,8 +2287,12 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
// math...) // math...)
// Get the "program index" vector value // Get the "program index" vector value
#ifdef ISPC_NVPTX_ENABLED
llvm::Value *programIndex = g->target->getISA() == Target::NVPTX ? llvm::Value *programIndex = g->target->getISA() == Target::NVPTX ?
ctx->ProgramIndexVectorPTX() : ctx->ProgramIndexVector(); ctx->ProgramIndexVectorPTX() : ctx->ProgramIndexVector();
#else /* ISPC_NVPTX_ENABLED */
llvm::Value *programIndex = ctx->ProgramIndexVector();
#endif /* ISPC_NVPTX_ENABLED */
// And smear the current lane out to a vector // And smear the current lane out to a vector
llvm::Value *firstSet32 = llvm::Value *firstSet32 =
@@ -2460,19 +2489,22 @@ ForeachUniqueStmt::EmitCode(FunctionEmitContext *ctx) const {
// And load the corresponding element value from the temporary // And load the corresponding element value from the temporary
// memory storing the value of the varying expr. // memory storing the value of the varying expr.
llvm::Value *uniqueValue; llvm::Value *uniqueValue;
if (g->target->getISA() != Target::NVPTX) #ifdef ISPC_NVPTX_ENABLED
{ if (g->target->getISA() == Target::NVPTX)
llvm::Value *uniqueValuePtr =
ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType,
"unique_index_ptr");
uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
}
else /* in case of PTX target, use __shfl PTX intrinsics via __insert/__extract function */
{ {
llvm::Value *firstSet32 = ctx->TruncInst(firstSet, LLVMTypes::Int32Type); llvm::Value *firstSet32 = ctx->TruncInst(firstSet, LLVMTypes::Int32Type);
uniqueValue = ctx->Extract(exprValue, firstSet32); uniqueValue = ctx->Extract(exprValue, firstSet32);
} }
else
{
#endif /* ISPC_NVPTX_ENABLED */
llvm::Value *uniqueValuePtr =
ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType,
"unique_index_ptr");
uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
#ifdef ISPC_NVPTX_ENABLED
}
#endif /* ISPC_NVPTX_ENABLED */
// If it's a varying pointer type, need to convert from the int // If it's a varying pointer type, need to convert from the int
// type we store in the vector to the actual pointer type // type we store in the vector to the actual pointer type
if (llvm::dyn_cast<llvm::PointerType>(symType) != NULL) if (llvm::dyn_cast<llvm::PointerType>(symType) != NULL)
@@ -3379,8 +3411,12 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
} }
// Now we can emit code to call __do_print() // Now we can emit code to call __do_print()
#ifdef ISPC_NVPTX_ENABLED
llvm::Function *printFunc = g->target->getISA() != Target::NVPTX ? llvm::Function *printFunc = g->target->getISA() != Target::NVPTX ?
m->module->getFunction("__do_print") : m->module->getFunction("__do_print_nvptx"); m->module->getFunction("__do_print") : m->module->getFunction("__do_print_nvptx");
#else /* ISPC_NVPTX_ENABLED */
llvm::Function *printFunc = m->module->getFunction("__do_print");
#endif /* ISPC_NVPTX_ENABLED */
AssertPos(pos, printFunc); AssertPos(pos, printFunc);
llvm::Value *mask = ctx->GetFullMask(); llvm::Value *mask = ctx->GetFullMask();

View File

@@ -751,7 +751,7 @@ EnumType::Mangle() const {
std::string ret; std::string ret;
if (isConst) ret += "C"; if (isConst) ret += "C";
ret += variability.MangleString(); ret += variability.MangleString();
ret += std::string("enum_5B_") + name + std::string("_5C_"); ret += std::string("enum[") + name + std::string("]");
return ret; return ret;
} }
@@ -1433,7 +1433,7 @@ ArrayType::Mangle() const {
sprintf(buf, "%d", numElements); sprintf(buf, "%d", numElements);
else else
buf[0] = '\0'; buf[0] = '\0';
return s + "_5B_" + buf + "_5C_"; return s + "[" + buf + "]";
} }
@@ -2106,12 +2106,12 @@ lMangleStruct(Variability variability, bool isConst, const std::string &name) {
Assert(variability != Variability::Unbound); Assert(variability != Variability::Unbound);
std::string ret; std::string ret;
ret += "s_5B_"; ret += "s[";
if (isConst) if (isConst)
ret += "_c_"; ret += "_c_";
ret += variability.MangleString(); ret += variability.MangleString();
ret += name + std::string("_5C_"); ret += name + std::string("]");
return ret; return ret;
} }
@@ -3057,7 +3057,11 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
llvmArgTypes.push_back(LLVMTypes::MaskType); llvmArgTypes.push_back(LLVMTypes::MaskType);
std::vector<llvm::Type *> callTypes; std::vector<llvm::Type *> callTypes;
if (isTask && g->target->getISA() != Target::NVPTX) { if (isTask
#ifdef ISPC_NVPTX_ENABLED
&& (g->target->getISA() != Target::NVPTX)
#endif
){
// Tasks take three arguments: a pointer to a struct that holds the // Tasks take three arguments: a pointer to a struct that holds the
// actual task arguments, the thread index, and the total number of // actual task arguments, the thread index, and the total number of
// threads the tasks system has running. (Task arguments are // threads the tasks system has running. (Task arguments are