added #ifdef ISPC_NVPTX_ENALED ... #endif guards
This commit is contained in:
17
Makefile
17
Makefile
@@ -73,6 +73,10 @@ endif
|
|||||||
# To enable: make ARM_ENABLED=1
|
# To enable: make ARM_ENABLED=1
|
||||||
ARM_ENABLED=0
|
ARM_ENABLED=0
|
||||||
|
|
||||||
|
# Disable NVPTX by request
|
||||||
|
# To disable: make NVPTX_ENABLED=0
|
||||||
|
NVPTX_ENABLED=1
|
||||||
|
|
||||||
# Add llvm bin to the path so any scripts run will go to the right llvm-config
|
# Add llvm bin to the path so any scripts run will go to the right llvm-config
|
||||||
LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir)
|
LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir)
|
||||||
export PATH:=$(LLVM_BIN):$(PATH)
|
export PATH:=$(LLVM_BIN):$(PATH)
|
||||||
@@ -89,7 +93,7 @@ LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
|
|||||||
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e 's/svn//' -e 's/\./_/' -e 's/\..*//')
|
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e 's/svn//' -e 's/\./_/' -e 's/\..*//')
|
||||||
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
||||||
|
|
||||||
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker nvptx
|
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker
|
||||||
# Component "option" was introduced in 3.3 and starting with 3.4 it is required for the link step.
|
# Component "option" was introduced in 3.3 and starting with 3.4 it is required for the link step.
|
||||||
# We check if it's available before adding it (to not break 3.2 and earlier).
|
# We check if it's available before adding it (to not break 3.2 and earlier).
|
||||||
ifeq ($(shell $(LLVM_CONFIG) --components |grep -c option), 1)
|
ifeq ($(shell $(LLVM_CONFIG) --components |grep -c option), 1)
|
||||||
@@ -98,6 +102,9 @@ endif
|
|||||||
ifneq ($(ARM_ENABLED), 0)
|
ifneq ($(ARM_ENABLED), 0)
|
||||||
LLVM_COMPONENTS+=arm
|
LLVM_COMPONENTS+=arm
|
||||||
endif
|
endif
|
||||||
|
ifneq ($(NVPTX_ENABLED), 0)
|
||||||
|
LLVM_COMPONENTS+=nvptx
|
||||||
|
endif
|
||||||
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs $(LLVM_COMPONENTS))
|
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs $(LLVM_COMPONENTS))
|
||||||
|
|
||||||
CLANG=clang
|
CLANG=clang
|
||||||
@@ -156,6 +163,9 @@ endif
|
|||||||
ifneq ($(ARM_ENABLED), 0)
|
ifneq ($(ARM_ENABLED), 0)
|
||||||
CXXFLAGS+=-DISPC_ARM_ENABLED
|
CXXFLAGS+=-DISPC_ARM_ENABLED
|
||||||
endif
|
endif
|
||||||
|
ifneq ($(NVPTX_ENABLED), 0)
|
||||||
|
CXXFLAGS+=-DISPC_NVPTX_ENABLED
|
||||||
|
endif
|
||||||
|
|
||||||
LDFLAGS=
|
LDFLAGS=
|
||||||
ifeq ($(ARCH_OS),Linux)
|
ifeq ($(ARCH_OS),Linux)
|
||||||
@@ -174,12 +184,15 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
|
|||||||
type.cpp util.cpp
|
type.cpp util.cpp
|
||||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||||
opt.h stmt.h sym.h type.h util.h
|
opt.h stmt.h sym.h type.h util.h
|
||||||
TARGETS=nvptx avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
TARGETS=avx2-i64x4 avx11-i64x4 avx1-i64x4 avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
||||||
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
|
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
|
||||||
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
|
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
|
||||||
ifneq ($(ARM_ENABLED), 0)
|
ifneq ($(ARM_ENABLED), 0)
|
||||||
TARGETS+=neon-32 neon-16 neon-8
|
TARGETS+=neon-32 neon-16 neon-8
|
||||||
endif
|
endif
|
||||||
|
ifneq ($(NVPTX_ENABLED), 0)
|
||||||
|
TARGETS+=nvptx
|
||||||
|
endif
|
||||||
# These files need to be compiled in two versions - 32 and 64 bits.
|
# These files need to be compiled in two versions - 32 and 64 bits.
|
||||||
BUILTINS_SRC_TARGET=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS)))
|
BUILTINS_SRC_TARGET=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS)))
|
||||||
# These are files to be compiled in single version.
|
# These are files to be compiled in single version.
|
||||||
|
|||||||
95
builtins.cpp
95
builtins.cpp
@@ -342,13 +342,17 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__all",
|
"__all",
|
||||||
"__any",
|
"__any",
|
||||||
"__aos_to_soa3_float",
|
"__aos_to_soa3_float",
|
||||||
|
//#ifdef ISPC_NVPTX_ENABLED
|
||||||
"__aos_to_soa3_float1",
|
"__aos_to_soa3_float1",
|
||||||
|
//#endif /* ISPC_NVPTX_ENABLED */
|
||||||
"__aos_to_soa3_float16",
|
"__aos_to_soa3_float16",
|
||||||
"__aos_to_soa3_float4",
|
"__aos_to_soa3_float4",
|
||||||
"__aos_to_soa3_float8",
|
"__aos_to_soa3_float8",
|
||||||
"__aos_to_soa3_int32",
|
"__aos_to_soa3_int32",
|
||||||
"__aos_to_soa4_float",
|
"__aos_to_soa4_float",
|
||||||
|
//#ifdef ISPC_NVPTX_ENABLED
|
||||||
"__aos_to_soa4_float1",
|
"__aos_to_soa4_float1",
|
||||||
|
//#endif /* ISPC_NVPTX_ENABLED */
|
||||||
"__aos_to_soa4_float16",
|
"__aos_to_soa4_float16",
|
||||||
"__aos_to_soa4_float4",
|
"__aos_to_soa4_float4",
|
||||||
"__aos_to_soa4_float8",
|
"__aos_to_soa4_float8",
|
||||||
@@ -357,14 +361,10 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__atomic_add_int64_global",
|
"__atomic_add_int64_global",
|
||||||
"__atomic_add_uniform_int32_global",
|
"__atomic_add_uniform_int32_global",
|
||||||
"__atomic_add_uniform_int64_global",
|
"__atomic_add_uniform_int64_global",
|
||||||
"__atomic_add_varying_int32_global",
|
|
||||||
"__atomic_add_varying_int64_global",
|
|
||||||
"__atomic_and_int32_global",
|
"__atomic_and_int32_global",
|
||||||
"__atomic_and_int64_global",
|
"__atomic_and_int64_global",
|
||||||
"__atomic_and_uniform_int32_global",
|
"__atomic_and_uniform_int32_global",
|
||||||
"__atomic_and_uniform_int64_global",
|
"__atomic_and_uniform_int64_global",
|
||||||
"__atomic_and_varying_int32_global",
|
|
||||||
"__atomic_and_varying_int64_global",
|
|
||||||
"__atomic_compare_exchange_double_global",
|
"__atomic_compare_exchange_double_global",
|
||||||
"__atomic_compare_exchange_float_global",
|
"__atomic_compare_exchange_float_global",
|
||||||
"__atomic_compare_exchange_int32_global",
|
"__atomic_compare_exchange_int32_global",
|
||||||
@@ -373,30 +373,18 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__atomic_compare_exchange_uniform_float_global",
|
"__atomic_compare_exchange_uniform_float_global",
|
||||||
"__atomic_compare_exchange_uniform_int32_global",
|
"__atomic_compare_exchange_uniform_int32_global",
|
||||||
"__atomic_compare_exchange_uniform_int64_global",
|
"__atomic_compare_exchange_uniform_int64_global",
|
||||||
"__atomic_compare_exchange_varying_double_global",
|
|
||||||
"__atomic_compare_exchange_varying_float_global",
|
|
||||||
"__atomic_compare_exchange_varying_int32_global",
|
|
||||||
"__atomic_compare_exchange_varying_int64_global",
|
|
||||||
"__atomic_max_uniform_int32_global",
|
"__atomic_max_uniform_int32_global",
|
||||||
"__atomic_max_uniform_int64_global",
|
"__atomic_max_uniform_int64_global",
|
||||||
"__atomic_min_uniform_int32_global",
|
"__atomic_min_uniform_int32_global",
|
||||||
"__atomic_min_uniform_int64_global",
|
"__atomic_min_uniform_int64_global",
|
||||||
"__atomic_max_varying_int32_global",
|
|
||||||
"__atomic_max_varying_int64_global",
|
|
||||||
"__atomic_min_varying_int32_global",
|
|
||||||
"__atomic_min_varying_int64_global",
|
|
||||||
"__atomic_or_int32_global",
|
"__atomic_or_int32_global",
|
||||||
"__atomic_or_int64_global",
|
"__atomic_or_int64_global",
|
||||||
"__atomic_or_uniform_int32_global",
|
"__atomic_or_uniform_int32_global",
|
||||||
"__atomic_or_uniform_int64_global",
|
"__atomic_or_uniform_int64_global",
|
||||||
"__atomic_or_varying_int32_global",
|
|
||||||
"__atomic_or_varying_int64_global",
|
|
||||||
"__atomic_sub_int32_global",
|
"__atomic_sub_int32_global",
|
||||||
"__atomic_sub_int64_global",
|
"__atomic_sub_int64_global",
|
||||||
"__atomic_sub_uniform_int32_global",
|
"__atomic_sub_uniform_int32_global",
|
||||||
"__atomic_sub_uniform_int64_global",
|
"__atomic_sub_uniform_int64_global",
|
||||||
"__atomic_sub_varying_int32_global",
|
|
||||||
"__atomic_sub_varying_int64_global",
|
|
||||||
"__atomic_swap_double_global",
|
"__atomic_swap_double_global",
|
||||||
"__atomic_swap_float_global",
|
"__atomic_swap_float_global",
|
||||||
"__atomic_swap_int32_global",
|
"__atomic_swap_int32_global",
|
||||||
@@ -405,28 +393,46 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__atomic_swap_uniform_float_global",
|
"__atomic_swap_uniform_float_global",
|
||||||
"__atomic_swap_uniform_int32_global",
|
"__atomic_swap_uniform_int32_global",
|
||||||
"__atomic_swap_uniform_int64_global",
|
"__atomic_swap_uniform_int64_global",
|
||||||
"__atomic_swap_varying_double_global",
|
|
||||||
"__atomic_swap_varying_float_global",
|
|
||||||
"__atomic_swap_varying_int32_global",
|
|
||||||
"__atomic_swap_varying_int64_global",
|
|
||||||
"__atomic_umax_uniform_uint32_global",
|
"__atomic_umax_uniform_uint32_global",
|
||||||
"__atomic_umax_uniform_uint64_global",
|
"__atomic_umax_uniform_uint64_global",
|
||||||
"__atomic_umin_uniform_uint32_global",
|
"__atomic_umin_uniform_uint32_global",
|
||||||
"__atomic_umin_uniform_uint64_global",
|
"__atomic_umin_uniform_uint64_global",
|
||||||
"__atomic_umax_varying_uint32_global",
|
|
||||||
"__atomic_umax_varying_uint64_global",
|
|
||||||
"__atomic_umin_varying_uint32_global",
|
|
||||||
"__atomic_umin_varying_uint64_global",
|
|
||||||
"__atomic_xor_int32_global",
|
"__atomic_xor_int32_global",
|
||||||
"__atomic_xor_int64_global",
|
"__atomic_xor_int64_global",
|
||||||
"__atomic_xor_uniform_int32_global",
|
"__atomic_xor_uniform_int32_global",
|
||||||
"__atomic_xor_uniform_int64_global",
|
"__atomic_xor_uniform_int64_global",
|
||||||
|
//#ifdef ISPC_NVPTX_ENABLED
|
||||||
|
"__atomic_add_varying_int32_global",
|
||||||
|
"__atomic_add_varying_int64_global",
|
||||||
|
"__atomic_and_varying_int32_global",
|
||||||
|
"__atomic_and_varying_int64_global",
|
||||||
|
"__atomic_compare_exchange_varying_double_global",
|
||||||
|
"__atomic_compare_exchange_varying_float_global",
|
||||||
|
"__atomic_compare_exchange_varying_int32_global",
|
||||||
|
"__atomic_compare_exchange_varying_int64_global",
|
||||||
|
"__atomic_max_varying_int32_global",
|
||||||
|
"__atomic_max_varying_int64_global",
|
||||||
|
"__atomic_min_varying_int32_global",
|
||||||
|
"__atomic_min_varying_int64_global",
|
||||||
|
"__atomic_or_varying_int32_global",
|
||||||
|
"__atomic_or_varying_int64_global",
|
||||||
|
"__atomic_sub_varying_int32_global",
|
||||||
|
"__atomic_sub_varying_int64_global",
|
||||||
|
"__atomic_swap_varying_double_global",
|
||||||
|
"__atomic_swap_varying_float_global",
|
||||||
|
"__atomic_swap_varying_int32_global",
|
||||||
|
"__atomic_swap_varying_int64_global",
|
||||||
|
"__atomic_umax_varying_uint32_global",
|
||||||
|
"__atomic_umax_varying_uint64_global",
|
||||||
|
"__atomic_umin_varying_uint32_global",
|
||||||
|
"__atomic_umin_varying_uint64_global",
|
||||||
"__atomic_xor_uniform_int32_global",
|
"__atomic_xor_uniform_int32_global",
|
||||||
"__atomic_xor_uniform_int64_global",
|
"__atomic_xor_uniform_int64_global",
|
||||||
"__atomic_xor_varying_int32_global",
|
"__atomic_xor_varying_int32_global",
|
||||||
"__atomic_xor_varying_int64_global",
|
"__atomic_xor_varying_int64_global",
|
||||||
"__atomic_xor_varying_int32_global",
|
"__atomic_xor_varying_int32_global",
|
||||||
"__atomic_xor_varying_int64_global",
|
"__atomic_xor_varying_int64_global",
|
||||||
|
//#endif /* ISPC_NVPTX_ENABLED */
|
||||||
"__broadcast_double",
|
"__broadcast_double",
|
||||||
"__broadcast_float",
|
"__broadcast_float",
|
||||||
"__broadcast_i16",
|
"__broadcast_i16",
|
||||||
@@ -449,7 +455,9 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__do_assert_uniform",
|
"__do_assert_uniform",
|
||||||
"__do_assert_varying",
|
"__do_assert_varying",
|
||||||
"__do_print",
|
"__do_print",
|
||||||
|
//#ifdef ISPC_NVPTX_ENABLED
|
||||||
"__do_print_nvptx",
|
"__do_print_nvptx",
|
||||||
|
//#endif /* ISPC_NVPTX_ENABLED */
|
||||||
"__doublebits_uniform_int64",
|
"__doublebits_uniform_int64",
|
||||||
"__doublebits_varying_int64",
|
"__doublebits_varying_int64",
|
||||||
"__exclusive_scan_add_double",
|
"__exclusive_scan_add_double",
|
||||||
@@ -464,8 +472,10 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__extract_int32",
|
"__extract_int32",
|
||||||
"__extract_int64",
|
"__extract_int64",
|
||||||
"__extract_int8",
|
"__extract_int8",
|
||||||
|
//#ifdef ISPC_NVPTX_ENABLED
|
||||||
"__extract_float",
|
"__extract_float",
|
||||||
"__extract_double",
|
"__extract_double",
|
||||||
|
//#endif /* ISPC_NVPTX_ENABLED */
|
||||||
"__fastmath",
|
"__fastmath",
|
||||||
"__float_to_half_uniform",
|
"__float_to_half_uniform",
|
||||||
"__float_to_half_varying",
|
"__float_to_half_varying",
|
||||||
@@ -482,8 +492,10 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__insert_int32",
|
"__insert_int32",
|
||||||
"__insert_int64",
|
"__insert_int64",
|
||||||
"__insert_int8",
|
"__insert_int8",
|
||||||
|
//#ifdef ISPC_NVPTX_ENABLED
|
||||||
"__insert_float",
|
"__insert_float",
|
||||||
"__insert_double",
|
"__insert_double",
|
||||||
|
//#endif /* ISPC_NVPTX_ENABLED */
|
||||||
"__intbits_uniform_double",
|
"__intbits_uniform_double",
|
||||||
"__intbits_uniform_float",
|
"__intbits_uniform_float",
|
||||||
"__intbits_varying_double",
|
"__intbits_varying_double",
|
||||||
@@ -520,7 +532,9 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__min_varying_uint32",
|
"__min_varying_uint32",
|
||||||
"__min_varying_uint64",
|
"__min_varying_uint64",
|
||||||
"__movmsk",
|
"__movmsk",
|
||||||
|
//#ifdef ISPC_NVPTX_ENABLED
|
||||||
"__movmsk_ptx",
|
"__movmsk_ptx",
|
||||||
|
//#endif /* ISPC_NVPTX_ENABLED */
|
||||||
"__new_uniform_32rt",
|
"__new_uniform_32rt",
|
||||||
"__new_uniform_64rt",
|
"__new_uniform_64rt",
|
||||||
"__new_varying32_32rt",
|
"__new_varying32_32rt",
|
||||||
@@ -610,13 +624,15 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__shuffle_i64",
|
"__shuffle_i64",
|
||||||
"__shuffle_i8",
|
"__shuffle_i8",
|
||||||
"__soa_to_aos3_float",
|
"__soa_to_aos3_float",
|
||||||
"__soa_to_aos3_float1",
|
|
||||||
"__soa_to_aos3_float16",
|
"__soa_to_aos3_float16",
|
||||||
"__soa_to_aos3_float4",
|
"__soa_to_aos3_float4",
|
||||||
"__soa_to_aos3_float8",
|
"__soa_to_aos3_float8",
|
||||||
"__soa_to_aos3_int32",
|
"__soa_to_aos3_int32",
|
||||||
"__soa_to_aos4_float",
|
"__soa_to_aos4_float",
|
||||||
|
//#ifdef ISPC_NVPTX_ENABLED
|
||||||
|
"__soa_to_aos3_float1",
|
||||||
"__soa_to_aos4_float1",
|
"__soa_to_aos4_float1",
|
||||||
|
//#endif /* ISPC_NVPTX_ENABLED */
|
||||||
"__soa_to_aos4_float16",
|
"__soa_to_aos4_float16",
|
||||||
"__soa_to_aos4_float4",
|
"__soa_to_aos4_float4",
|
||||||
"__soa_to_aos4_float8",
|
"__soa_to_aos4_float8",
|
||||||
@@ -717,7 +733,7 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"__vec4_add_float",
|
"__vec4_add_float",
|
||||||
"__vec4_add_int32",
|
"__vec4_add_int32",
|
||||||
"__vselect_float",
|
"__vselect_float",
|
||||||
"__vselect_i32",
|
//#ifdef ISPC_NVPTX_ENABLED
|
||||||
"__program_index",
|
"__program_index",
|
||||||
"__program_count",
|
"__program_count",
|
||||||
"__warp_index",
|
"__warp_index",
|
||||||
@@ -736,6 +752,8 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
"ISPCAlloc",
|
"ISPCAlloc",
|
||||||
"ISPCLaunch",
|
"ISPCLaunch",
|
||||||
"ISPCSync",
|
"ISPCSync",
|
||||||
|
//#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
"__vselect_i32"
|
||||||
};
|
};
|
||||||
|
|
||||||
int count = sizeof(names) / sizeof(names[0]);
|
int count = sizeof(names) / sizeof(names[0]);
|
||||||
@@ -808,7 +826,9 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
|||||||
g->target->getISA() != Target::NEON16 &&
|
g->target->getISA() != Target::NEON16 &&
|
||||||
g->target->getISA() != Target::NEON8)
|
g->target->getISA() != Target::NEON8)
|
||||||
#endif // !__arm__
|
#endif // !__arm__
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (g->target->getISA() != Target::NVPTX)
|
if (g->target->getISA() != Target::NVPTX)
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
{
|
{
|
||||||
Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
|
Assert(bcTriple.getArch() == llvm::Triple::UnknownArch ||
|
||||||
mTriple.getArch() == bcTriple.getArch());
|
mTriple.getArch() == bcTriple.getArch());
|
||||||
@@ -982,6 +1002,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
// Next, add the target's custom implementations of the various needed
|
// Next, add the target's custom implementations of the various needed
|
||||||
// builtin functions (e.g. __masked_store_32(), etc).
|
// builtin functions (e.g. __masked_store_32(), etc).
|
||||||
switch (g->target->getISA()) {
|
switch (g->target->getISA()) {
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
case Target::NVPTX:
|
case Target::NVPTX:
|
||||||
{
|
{
|
||||||
if (runtime32) {
|
if (runtime32) {
|
||||||
@@ -993,6 +1014,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
};
|
};
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
case Target::NEON8: {
|
case Target::NEON8: {
|
||||||
if (runtime32) {
|
if (runtime32) {
|
||||||
@@ -1262,14 +1285,18 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
}
|
}
|
||||||
|
|
||||||
// define the 'programCount' builtin variable
|
// define the 'programCount' builtin variable
|
||||||
if (g->target->getISA() != Target::NVPTX)
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
{
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
lDefineConstantInt("programCount", 32, module, symbolTable);
|
lDefineConstantInt("programCount", 32, module, symbolTable);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
lDefineConstantInt("programCount", g->target->getVectorWidth(), module, symbolTable);
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
// define the 'programIndex' builtin
|
// define the 'programIndex' builtin
|
||||||
lDefineProgramIndex(module, symbolTable);
|
lDefineProgramIndex(module, symbolTable);
|
||||||
@@ -1301,9 +1328,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(),
|
lDefineConstantInt("__have_native_rcpd", g->target->hasRcpd(),
|
||||||
module, symbolTable);
|
module, symbolTable);
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
lDefineConstantInt("__is_nvptx_target", (int)(g->target->getISA() == Target::NVPTX),
|
lDefineConstantInt("__is_nvptx_target", (int)(g->target->getISA() == Target::NVPTX),
|
||||||
module, symbolTable);
|
module, symbolTable);
|
||||||
|
#else
|
||||||
|
lDefineConstantInt("__is_nvptx_target", (int)0, module, symbolTable);
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
if (g->forceAlignment != -1) {
|
if (g->forceAlignment != -1) {
|
||||||
llvm::GlobalVariable *alignment = module->getGlobalVariable("memory_alignment", true);
|
llvm::GlobalVariable *alignment = module->getGlobalVariable("memory_alignment", true);
|
||||||
alignment->setInitializer(LLVMInt32(g->forceAlignment));
|
alignment->setInitializer(LLVMInt32(g->forceAlignment));
|
||||||
|
|||||||
257
ctx.cpp
257
ctx.cpp
@@ -57,8 +57,10 @@
|
|||||||
#include <llvm/IR/Instructions.h>
|
#include <llvm/IR/Instructions.h>
|
||||||
#include <llvm/IR/DerivedTypes.h>
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#include <llvm/Support/raw_ostream.h>
|
#include <llvm/Support/raw_ostream.h>
|
||||||
#include <llvm/Support/FormattedStream.h>
|
#include <llvm/Support/FormattedStream.h>
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
/** This is a small utility structure that records information related to one
|
/** This is a small utility structure that records information related to one
|
||||||
level of nested control flow. It's mostly used in correctly restoring
|
level of nested control flow. It's mostly used in correctly restoring
|
||||||
@@ -1373,28 +1375,30 @@ FunctionEmitContext::None(llvm::Value *mask) {
|
|||||||
|
|
||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
FunctionEmitContext::LaneMask(llvm::Value *v)
|
FunctionEmitContext::LaneMask(llvm::Value *v) {
|
||||||
{
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#if 1 /* this makes mandelbrot example slower with "nvptx" target. Need further investigation. */
|
/* this makes mandelbrot example slower with "nvptx" target.
|
||||||
const char *__movmsk = g->target->getISA() == Target::NVPTX ? "__movmsk_ptx" : "__movmsk";
|
* Needs further investigation. */
|
||||||
|
const char *__movmsk = g->target->getISA() == Target::NVPTX ? "__movmsk_ptx" : "__movmsk";
|
||||||
#else
|
#else
|
||||||
const char *__movmsk = "__movmsk";
|
const char *__movmsk = "__movmsk";
|
||||||
#endif
|
#endif
|
||||||
// Call the target-dependent movmsk function to turn the vector mask
|
// Call the target-dependent movmsk function to turn the vector mask
|
||||||
// into an i64 value
|
// into an i64 value
|
||||||
std::vector<Symbol *> mm;
|
std::vector<Symbol *> mm;
|
||||||
m->symbolTable->LookupFunction(__movmsk, &mm);
|
m->symbolTable->LookupFunction(__movmsk, &mm);
|
||||||
if (g->target->getMaskBitCount() == 1)
|
if (g->target->getMaskBitCount() == 1)
|
||||||
AssertPos(currentPos, mm.size() == 1);
|
AssertPos(currentPos, mm.size() == 1);
|
||||||
else
|
else
|
||||||
// There should be one with signed int signature, one unsigned int.
|
// There should be one with signed int signature, one unsigned int.
|
||||||
AssertPos(currentPos, mm.size() == 2);
|
AssertPos(currentPos, mm.size() == 2);
|
||||||
// We can actually call either one, since both are i32s as far as
|
// We can actually call either one, since both are i32s as far as
|
||||||
// LLVM's type system is concerned...
|
// LLVM's type system is concerned...
|
||||||
llvm::Function *fmm = mm[0]->function;
|
llvm::Function *fmm = mm[0]->function;
|
||||||
return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
|
return CallInst(fmm, NULL, v, LLVMGetName(v, "_movmsk"));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
bool lAppendInsertExtractName(llvm::Value *vector, std::string &funcName)
|
bool lAppendInsertExtractName(llvm::Value *vector, std::string &funcName)
|
||||||
{
|
{
|
||||||
llvm::Type *type = vector->getType();
|
llvm::Type *type = vector->getType();
|
||||||
@@ -1447,19 +1451,21 @@ FunctionEmitContext::Extract(llvm::Value *vector, llvm::Value *lane)
|
|||||||
llvm::Value *ret = llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock());
|
llvm::Value *ret = llvm::CallInst::Create(func, args, LLVMGetName(vector, funcName.c_str()), GetCurrentBasicBlock());
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
||||||
if (g->target->getISA() == Target::NVPTX)
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
{
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
// Compare the two masks to get a vector of i1s
|
{
|
||||||
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
// Compare the two masks to get a vector of i1s
|
||||||
v1, v2, "v1==v2");
|
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
||||||
return ExtractInst(cmp, 0); /* this works without calling All(..) in PTX. Why ?!? */
|
v1, v2, "v1==v2");
|
||||||
}
|
return ExtractInst(cmp, 0); /* this works without calling All(..) in PTX. Why ?!? */
|
||||||
else
|
}
|
||||||
{
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
#if 0
|
#if 0
|
||||||
// Compare the two masks to get a vector of i1s
|
// Compare the two masks to get a vector of i1s
|
||||||
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
llvm::Value *cmp = CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
||||||
@@ -1474,7 +1480,6 @@ FunctionEmitContext::MasksAllEqual(llvm::Value *v1, llvm::Value *v2) {
|
|||||||
return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2,
|
return CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ, mm1, mm2,
|
||||||
LLVMGetName("equal", v1, v2));
|
LLVMGetName("equal", v1, v2));
|
||||||
#endif
|
#endif
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
@@ -1489,6 +1494,8 @@ FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
|||||||
|
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
|
FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
|
||||||
llvm::Function *func_program_index = m->module->getFunction("__program_index");
|
llvm::Function *func_program_index = m->module->getFunction("__program_index");
|
||||||
@@ -1500,6 +1507,7 @@ FunctionEmitContext::ProgramIndexVectorPTX(bool is32bits) {
|
|||||||
#endif
|
#endif
|
||||||
return index;
|
return index;
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
|
|
||||||
llvm::Value *
|
llvm::Value *
|
||||||
@@ -1919,7 +1927,6 @@ FunctionEmitContext::PtrToIntInst(llvm::Value *value, const char *name) {
|
|||||||
|
|
||||||
if (name == NULL)
|
if (name == NULL)
|
||||||
name = LLVMGetName(value, "_ptr2int");
|
name = LLVMGetName(value, "_ptr2int");
|
||||||
|
|
||||||
llvm::Type *type = LLVMTypes::PointerIntType;
|
llvm::Type *type = LLVMTypes::PointerIntType;
|
||||||
llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock);
|
llvm::Instruction *inst = new llvm::PtrToIntInst(value, type, name, bblock);
|
||||||
AddDebugPos(inst);
|
AddDebugPos(inst);
|
||||||
@@ -3613,75 +3620,8 @@ llvm::Value *
|
|||||||
FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
||||||
std::vector<llvm::Value *> &argVals,
|
std::vector<llvm::Value *> &argVals,
|
||||||
llvm::Value *launchCount[3]){
|
llvm::Value *launchCount[3]){
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (g->target->getISA() != Target::NVPTX)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
{
|
|
||||||
if (callee == NULL) {
|
|
||||||
AssertPos(currentPos, m->errorCount > 0);
|
|
||||||
return NULL;
|
|
||||||
}
|
|
||||||
|
|
||||||
launchedTasks = true;
|
|
||||||
|
|
||||||
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
|
|
||||||
llvm::Type *argType =
|
|
||||||
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
|
|
||||||
AssertPos(currentPos, llvm::PointerType::classof(argType));
|
|
||||||
llvm::PointerType *pt =
|
|
||||||
llvm::dyn_cast<llvm::PointerType>(argType);
|
|
||||||
AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
|
|
||||||
llvm::StructType *argStructType =
|
|
||||||
static_cast<llvm::StructType *>(pt->getElementType());
|
|
||||||
|
|
||||||
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
|
|
||||||
AssertPos(currentPos, falloc != NULL);
|
|
||||||
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
|
|
||||||
if (structSize->getType() != LLVMTypes::Int64Type)
|
|
||||||
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
|
|
||||||
// targets, SizeOf returns a 32-bit value
|
|
||||||
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
|
|
||||||
"struct_size_to_64");
|
|
||||||
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
|
|
||||||
|
|
||||||
std::vector<llvm::Value *> allocArgs;
|
|
||||||
allocArgs.push_back(launchGroupHandlePtr);
|
|
||||||
allocArgs.push_back(structSize);
|
|
||||||
allocArgs.push_back(LLVMInt32(align));
|
|
||||||
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
|
|
||||||
llvm::Value *argmem = BitCastInst(voidmem, pt);
|
|
||||||
|
|
||||||
// Copy the values of the parameters into the appropriate place in
|
|
||||||
// the argument block
|
|
||||||
for (unsigned int i = 0; i < argVals.size(); ++i) {
|
|
||||||
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
|
|
||||||
// don't need to do masked store here, I think
|
|
||||||
StoreInst(argVals[i], ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
if (argStructType->getNumElements() == argVals.size() + 1) {
|
|
||||||
// copy in the mask
|
|
||||||
llvm::Value *mask = GetFullMask();
|
|
||||||
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
|
|
||||||
"funarg_mask");
|
|
||||||
StoreInst(mask, ptr);
|
|
||||||
}
|
|
||||||
|
|
||||||
// And emit the call to the user-supplied task launch function, passing
|
|
||||||
// a pointer to the task function being called and a pointer to the
|
|
||||||
// argument block we just filled in
|
|
||||||
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
|
|
||||||
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
|
|
||||||
AssertPos(currentPos, flaunch != NULL);
|
|
||||||
std::vector<llvm::Value *> args;
|
|
||||||
args.push_back(launchGroupHandlePtr);
|
|
||||||
args.push_back(fptr);
|
|
||||||
args.push_back(voidmem);
|
|
||||||
args.push_back(launchCount[0]);
|
|
||||||
args.push_back(launchCount[1]);
|
|
||||||
args.push_back(launchCount[2]);
|
|
||||||
return CallInst(flaunch, NULL, args, "");
|
|
||||||
}
|
|
||||||
else /* NVPTX */
|
|
||||||
{
|
{
|
||||||
if (callee == NULL) {
|
if (callee == NULL) {
|
||||||
AssertPos(currentPos, m->errorCount > 0);
|
AssertPos(currentPos, m->errorCount > 0);
|
||||||
@@ -3764,38 +3704,79 @@ FunctionEmitContext::LaunchInst(llvm::Value *callee,
|
|||||||
llvm::Value *ret = CallInst(flaunch, NULL, args, "");
|
llvm::Value *ret = CallInst(flaunch, NULL, args, "");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
|
if (callee == NULL) {
|
||||||
|
AssertPos(currentPos, m->errorCount > 0);
|
||||||
|
return NULL;
|
||||||
|
}
|
||||||
|
|
||||||
|
launchedTasks = true;
|
||||||
|
|
||||||
|
AssertPos(currentPos, llvm::isa<llvm::Function>(callee));
|
||||||
|
llvm::Type *argType =
|
||||||
|
(llvm::dyn_cast<llvm::Function>(callee))->arg_begin()->getType();
|
||||||
|
AssertPos(currentPos, llvm::PointerType::classof(argType));
|
||||||
|
llvm::PointerType *pt =
|
||||||
|
llvm::dyn_cast<llvm::PointerType>(argType);
|
||||||
|
AssertPos(currentPos, llvm::StructType::classof(pt->getElementType()));
|
||||||
|
llvm::StructType *argStructType =
|
||||||
|
static_cast<llvm::StructType *>(pt->getElementType());
|
||||||
|
|
||||||
|
llvm::Function *falloc = m->module->getFunction("ISPCAlloc");
|
||||||
|
AssertPos(currentPos, falloc != NULL);
|
||||||
|
llvm::Value *structSize = g->target->SizeOf(argStructType, bblock);
|
||||||
|
if (structSize->getType() != LLVMTypes::Int64Type)
|
||||||
|
// ISPCAlloc expects the size as an uint64_t, but on 32-bit
|
||||||
|
// targets, SizeOf returns a 32-bit value
|
||||||
|
structSize = ZExtInst(structSize, LLVMTypes::Int64Type,
|
||||||
|
"struct_size_to_64");
|
||||||
|
int align = 4 * RoundUpPow2(g->target->getNativeVectorWidth());
|
||||||
|
|
||||||
|
std::vector<llvm::Value *> allocArgs;
|
||||||
|
allocArgs.push_back(launchGroupHandlePtr);
|
||||||
|
allocArgs.push_back(structSize);
|
||||||
|
allocArgs.push_back(LLVMInt32(align));
|
||||||
|
llvm::Value *voidmem = CallInst(falloc, NULL, allocArgs, "args_ptr");
|
||||||
|
llvm::Value *argmem = BitCastInst(voidmem, pt);
|
||||||
|
|
||||||
|
// Copy the values of the parameters into the appropriate place in
|
||||||
|
// the argument block
|
||||||
|
for (unsigned int i = 0; i < argVals.size(); ++i) {
|
||||||
|
llvm::Value *ptr = AddElementOffset(argmem, i, NULL, "funarg");
|
||||||
|
// don't need to do masked store here, I think
|
||||||
|
StoreInst(argVals[i], ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
if (argStructType->getNumElements() == argVals.size() + 1) {
|
||||||
|
// copy in the mask
|
||||||
|
llvm::Value *mask = GetFullMask();
|
||||||
|
llvm::Value *ptr = AddElementOffset(argmem, argVals.size(), NULL,
|
||||||
|
"funarg_mask");
|
||||||
|
StoreInst(mask, ptr);
|
||||||
|
}
|
||||||
|
|
||||||
|
// And emit the call to the user-supplied task launch function, passing
|
||||||
|
// a pointer to the task function being called and a pointer to the
|
||||||
|
// argument block we just filled in
|
||||||
|
llvm::Value *fptr = BitCastInst(callee, LLVMTypes::VoidPointerType);
|
||||||
|
llvm::Function *flaunch = m->module->getFunction("ISPCLaunch");
|
||||||
|
AssertPos(currentPos, flaunch != NULL);
|
||||||
|
std::vector<llvm::Value *> args;
|
||||||
|
args.push_back(launchGroupHandlePtr);
|
||||||
|
args.push_back(fptr);
|
||||||
|
args.push_back(voidmem);
|
||||||
|
args.push_back(launchCount[0]);
|
||||||
|
args.push_back(launchCount[1]);
|
||||||
|
args.push_back(launchCount[2]);
|
||||||
|
return CallInst(flaunch, NULL, args, "");
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
void
|
void
|
||||||
FunctionEmitContext::SyncInst() {
|
FunctionEmitContext::SyncInst() {
|
||||||
if (g->target->getISA() != Target::NVPTX)
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
{
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
|
||||||
llvm::Value *nullPtrValue =
|
|
||||||
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
|
|
||||||
llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
|
|
||||||
llvm::CmpInst::ICMP_NE,
|
|
||||||
launchGroupHandle, nullPtrValue);
|
|
||||||
llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
|
|
||||||
llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
|
|
||||||
BranchInst(bSync, bPostSync, nonNull);
|
|
||||||
|
|
||||||
SetCurrentBasicBlock(bSync);
|
|
||||||
llvm::Function *fsync = m->module->getFunction("ISPCSync");
|
|
||||||
if (fsync == NULL)
|
|
||||||
FATAL("Couldn't find ISPCSync declaration?!");
|
|
||||||
CallInst(fsync, NULL, launchGroupHandle, "");
|
|
||||||
|
|
||||||
// zero out the handle so that if ISPCLaunch is called again in this
|
|
||||||
// function, it knows it's starting out from scratch
|
|
||||||
StoreInst(nullPtrValue, launchGroupHandlePtr);
|
|
||||||
|
|
||||||
BranchInst(bPostSync);
|
|
||||||
|
|
||||||
SetCurrentBasicBlock(bPostSync);
|
|
||||||
}
|
|
||||||
else /* NVPTX: don't do test, just call sync */
|
|
||||||
{
|
{
|
||||||
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
||||||
llvm::Value *nullPtrValue =
|
llvm::Value *nullPtrValue =
|
||||||
@@ -3805,7 +3786,33 @@ FunctionEmitContext::SyncInst() {
|
|||||||
FATAL("Couldn't find ISPCSync declaration?!");
|
FATAL("Couldn't find ISPCSync declaration?!");
|
||||||
CallInst(fsync, NULL, launchGroupHandle, "");
|
CallInst(fsync, NULL, launchGroupHandle, "");
|
||||||
StoreInst(nullPtrValue, launchGroupHandlePtr);
|
StoreInst(nullPtrValue, launchGroupHandlePtr);
|
||||||
|
return;
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
|
llvm::Value *launchGroupHandle = LoadInst(launchGroupHandlePtr);
|
||||||
|
llvm::Value *nullPtrValue =
|
||||||
|
llvm::Constant::getNullValue(LLVMTypes::VoidPointerType);
|
||||||
|
llvm::Value *nonNull = CmpInst(llvm::Instruction::ICmp,
|
||||||
|
llvm::CmpInst::ICMP_NE,
|
||||||
|
launchGroupHandle, nullPtrValue);
|
||||||
|
llvm::BasicBlock *bSync = CreateBasicBlock("call_sync");
|
||||||
|
llvm::BasicBlock *bPostSync = CreateBasicBlock("post_sync");
|
||||||
|
BranchInst(bSync, bPostSync, nonNull);
|
||||||
|
|
||||||
|
SetCurrentBasicBlock(bSync);
|
||||||
|
llvm::Function *fsync = m->module->getFunction("ISPCSync");
|
||||||
|
if (fsync == NULL)
|
||||||
|
FATAL("Couldn't find ISPCSync declaration?!");
|
||||||
|
CallInst(fsync, NULL, launchGroupHandle, "");
|
||||||
|
|
||||||
|
// zero out the handle so that if ISPCLaunch is called again in this
|
||||||
|
// function, it knows it's starting out from scratch
|
||||||
|
StoreInst(nullPtrValue, launchGroupHandlePtr);
|
||||||
|
|
||||||
|
BranchInst(bPostSync);
|
||||||
|
|
||||||
|
SetCurrentBasicBlock(bPostSync);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
20
ctx.h
20
ctx.h
@@ -291,21 +291,21 @@ public:
|
|||||||
of the mask is on. */
|
of the mask is on. */
|
||||||
llvm::Value *LaneMask(llvm::Value *mask);
|
llvm::Value *LaneMask(llvm::Value *mask);
|
||||||
|
|
||||||
|
/** Given two masks of type LLVMTypes::MaskType, return an i1 value
|
||||||
|
that indicates whether the two masks are equal. */
|
||||||
|
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
|
||||||
|
|
||||||
|
/** generate constantvector, which contains programindex, i.e.
|
||||||
|
< i32 0, i32 1, i32 2, i32 3> */
|
||||||
|
llvm::Value *ProgramIndexVector(bool is32bits = true);
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
|
llvm::Value *ProgramIndexVectorPTX(bool is32bits = true);
|
||||||
|
|
||||||
/** Issues a call to __insert_int8/int16/int32/int64/float/double */
|
/** Issues a call to __insert_int8/int16/int32/int64/float/double */
|
||||||
llvm::Value* Insert(llvm::Value *vector, llvm::Value *lane, llvm::Value *scalar);
|
llvm::Value* Insert(llvm::Value *vector, llvm::Value *lane, llvm::Value *scalar);
|
||||||
/** Issues a call to __extract_int8/int16/int32/int64/float/double */
|
/** Issues a call to __extract_int8/int16/int32/int64/float/double */
|
||||||
llvm::Value* Extract(llvm::Value *vector, llvm::Value *lane);
|
llvm::Value* Extract(llvm::Value *vector, llvm::Value *lane);
|
||||||
|
#endif
|
||||||
|
|
||||||
/** Given two masks of type LLVMTypes::MaskType, return an i1 value
|
|
||||||
that indicates whether the two masks are equal. */
|
|
||||||
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
|
|
||||||
|
|
||||||
/** Generate ConstantVector, which contains ProgramIndex, i.e.
|
|
||||||
< i32 0, i32 1, i32 2, i32 3> */
|
|
||||||
llvm::Value *ProgramIndexVector(bool is32bits = true);
|
|
||||||
llvm::Value *ProgramIndexVectorPTX(bool is32bits = true);
|
|
||||||
|
|
||||||
/** Given a string, create an anonymous global variable to hold its
|
/** Given a string, create an anonymous global variable to hold its
|
||||||
value and return the pointer to the string. */
|
value and return the pointer to the string. */
|
||||||
|
|||||||
12
decl.cpp
12
decl.cpp
@@ -168,6 +168,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
|
|||||||
retType = lApplyTypeQualifiers(typeQualifiers, retType, pos);
|
retType = lApplyTypeQualifiers(typeQualifiers, retType, pos);
|
||||||
|
|
||||||
if (soaWidth > 0) {
|
if (soaWidth > 0) {
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#if 0 /* see stmt.cpp in DeclStmt::EmitCode for work-around of SOAType Declaration */
|
#if 0 /* see stmt.cpp in DeclStmt::EmitCode for work-around of SOAType Declaration */
|
||||||
if (g->target->getISA() == Target::NVPTX)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
@@ -175,6 +176,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
const StructType *st = CastType<StructType>(retType);
|
const StructType *st = CastType<StructType>(retType);
|
||||||
|
|
||||||
if (st == NULL) {
|
if (st == NULL) {
|
||||||
@@ -409,6 +411,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#if 0 /* NVPTX */
|
#if 0 /* NVPTX */
|
||||||
if (baseType->IsUniformType())
|
if (baseType->IsUniformType())
|
||||||
{
|
{
|
||||||
@@ -416,6 +419,7 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
|||||||
baseType->IsArrayType() ? " true " : " false ");
|
baseType->IsArrayType() ? " true " : " false ");
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
const Type *arrayType = new ArrayType(baseType, arraySize);
|
const Type *arrayType = new ArrayType(baseType, arraySize);
|
||||||
if (child != NULL) {
|
if (child != NULL) {
|
||||||
child->InitFromType(arrayType, ds);
|
child->InitFromType(arrayType, ds);
|
||||||
@@ -544,9 +548,9 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
|||||||
|
|
||||||
returnType = returnType->ResolveUnboundVariability(Variability::Varying);
|
returnType = returnType->ResolveUnboundVariability(Variability::Varying);
|
||||||
|
|
||||||
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
|
|
||||||
bool isExternC = ds && (ds->storageClass == SC_EXTERN_C);
|
bool isExternC = ds && (ds->storageClass == SC_EXTERN_C);
|
||||||
bool isExported = ds && ((ds->typeQualifiers & TYPEQUAL_EXPORT) != 0);
|
bool isExported = ds && ((ds->typeQualifiers & TYPEQUAL_EXPORT) != 0);
|
||||||
|
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
|
||||||
bool isUnmasked = ds && ((ds->typeQualifiers & TYPEQUAL_UNMASKED) != 0);
|
bool isUnmasked = ds && ((ds->typeQualifiers & TYPEQUAL_UNMASKED) != 0);
|
||||||
|
|
||||||
if (isExported && isTask) {
|
if (isExported && isTask) {
|
||||||
@@ -555,9 +559,9 @@ Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (isExternC && isTask) {
|
if (isExternC && isTask) {
|
||||||
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
|
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
|
||||||
"qualifiers");
|
"qualifiers");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
if (isExternC && isExported) {
|
if (isExternC && isExported) {
|
||||||
Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" "
|
Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" "
|
||||||
|
|||||||
4
expr.cpp
4
expr.cpp
@@ -7880,12 +7880,14 @@ SizeOfExpr::TypeCheck() {
|
|||||||
"struct type \"%s\".", type->GetString().c_str());
|
"struct type \"%s\".", type->GetString().c_str());
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (type != NULL)
|
if (type != NULL)
|
||||||
if (g->target->getISA() == Target::NVPTX && type->IsVaryingType())
|
if (g->target->getISA() == Target::NVPTX && type->IsVaryingType())
|
||||||
{
|
{
|
||||||
Error(pos, "\"sizeof\" with varying data types is not yet supported with \"nvptx\" target.");
|
Error(pos, "\"sizeof\" with varying data types is not yet supported with \"nvptx\" target.");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
return this;
|
return this;
|
||||||
}
|
}
|
||||||
@@ -8718,11 +8720,13 @@ NewExpr::TypeCheck() {
|
|||||||
AssertPos(pos, m->errorCount > 0);
|
AssertPos(pos, m->errorCount > 0);
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (g->target->getISA() == Target::NVPTX && allocType->IsVaryingType())
|
if (g->target->getISA() == Target::NVPTX && allocType->IsVaryingType())
|
||||||
{
|
{
|
||||||
Error(pos, "\"new\" with varying data types is not yet supported with \"nvptx\" target.");
|
Error(pos, "\"new\" with varying data types is not yet supported with \"nvptx\" target.");
|
||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
if (CastType<UndefinedStructType>(allocType) != NULL) {
|
if (CastType<UndefinedStructType>(allocType) != NULL) {
|
||||||
Error(pos, "Can't dynamically allocate storage for declared "
|
Error(pos, "Can't dynamically allocate storage for declared "
|
||||||
"but not defined type \"%s\".", allocType->GetString().c_str());
|
"but not defined type \"%s\".", allocType->GetString().c_str());
|
||||||
|
|||||||
26
func.cpp
26
func.cpp
@@ -47,7 +47,9 @@
|
|||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
|
||||||
#if defined(LLVM_3_1) || defined(LLVM_3_2)
|
#if defined(LLVM_3_1) || defined(LLVM_3_2)
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#include <llvm/Metadata.h>
|
#include <llvm/Metadata.h>
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
#include <llvm/LLVMContext.h>
|
#include <llvm/LLVMContext.h>
|
||||||
#include <llvm/Module.h>
|
#include <llvm/Module.h>
|
||||||
#include <llvm/Type.h>
|
#include <llvm/Type.h>
|
||||||
@@ -55,7 +57,9 @@
|
|||||||
#include <llvm/Intrinsics.h>
|
#include <llvm/Intrinsics.h>
|
||||||
#include <llvm/DerivedTypes.h>
|
#include <llvm/DerivedTypes.h>
|
||||||
#else
|
#else
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#include <llvm/IR/Metadata.h>
|
#include <llvm/IR/Metadata.h>
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
#include <llvm/IR/LLVMContext.h>
|
#include <llvm/IR/LLVMContext.h>
|
||||||
#include <llvm/IR/Module.h>
|
#include <llvm/IR/Module.h>
|
||||||
#include <llvm/IR/Type.h>
|
#include <llvm/IR/Type.h>
|
||||||
@@ -131,7 +135,11 @@ Function::Function(Symbol *s, Stmt *c) {
|
|||||||
sym->parentFunction = this;
|
sym->parentFunction = this;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (type->isTask && g->target->getISA() != Target::NVPTX) {
|
if (type->isTask
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
|
&& (g->target->getISA() != Target::NVPTX)
|
||||||
|
#endif
|
||||||
|
){
|
||||||
threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
|
threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
|
||||||
Assert(threadIndexSym);
|
Assert(threadIndexSym);
|
||||||
threadCountSym = m->symbolTable->LookupVariable("threadCount");
|
threadCountSym = m->symbolTable->LookupVariable("threadCount");
|
||||||
@@ -242,7 +250,11 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
#endif
|
#endif
|
||||||
const FunctionType *type = CastType<FunctionType>(sym->type);
|
const FunctionType *type = CastType<FunctionType>(sym->type);
|
||||||
Assert(type != NULL);
|
Assert(type != NULL);
|
||||||
if (type->isTask == true && g->target->getISA() != Target::NVPTX) {
|
if (type->isTask == true
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
|
&& (g->target->getISA() != Target::NVPTX)
|
||||||
|
#endif
|
||||||
|
){
|
||||||
// For tasks, there should always be three parameters: the
|
// For tasks, there should always be three parameters: the
|
||||||
// pointer to the structure that holds all of the arguments, the
|
// pointer to the structure that holds all of the arguments, the
|
||||||
// thread index, and the thread count variables.
|
// thread index, and the thread count variables.
|
||||||
@@ -340,6 +352,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
ctx->SetFunctionMask(argIter);
|
ctx->SetFunctionMask(argIter);
|
||||||
Assert(++argIter == function->arg_end());
|
Assert(++argIter == function->arg_end());
|
||||||
}
|
}
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (type->isTask == true && g->target->getISA() == Target::NVPTX)
|
if (type->isTask == true && g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
llvm::NamedMDNode* annotations =
|
llvm::NamedMDNode* annotations =
|
||||||
@@ -350,6 +363,7 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
av.push_back(LLVMInt32(1));
|
av.push_back(LLVMInt32(1));
|
||||||
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finally, we can generate code for the function
|
// Finally, we can generate code for the function
|
||||||
@@ -505,15 +519,14 @@ Function::GenerateIR() {
|
|||||||
// the application can call it
|
// the application can call it
|
||||||
const FunctionType *type = CastType<FunctionType>(sym->type);
|
const FunctionType *type = CastType<FunctionType>(sym->type);
|
||||||
Assert(type != NULL);
|
Assert(type != NULL);
|
||||||
if (type->isExported) {
|
if (type->isExported) {
|
||||||
if (!type->isTask) {
|
if (!type->isTask) {
|
||||||
llvm::FunctionType *ftype = type->LLVMFunctionType(g->ctx, true);
|
llvm::FunctionType *ftype = type->LLVMFunctionType(g->ctx, true);
|
||||||
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
|
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
|
||||||
std::string functionName = sym->name;
|
std::string functionName = sym->name;
|
||||||
|
|
||||||
if (g->mangleFunctionsWithTarget)
|
if (g->mangleFunctionsWithTarget)
|
||||||
functionName += std::string("_") + g->target->GetISAString();
|
functionName += std::string("_") + g->target->GetISAString();
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (g->target->getISA() == Target::NVPTX)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
functionName += std::string("___export"); /* add ___export to the end, for ptxcc to recognize it is exported */
|
functionName += std::string("___export"); /* add ___export to the end, for ptxcc to recognize it is exported */
|
||||||
@@ -527,6 +540,7 @@ Function::GenerateIR() {
|
|||||||
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
llvm::Function *appFunction =
|
llvm::Function *appFunction =
|
||||||
llvm::Function::Create(ftype, linkage, functionName.c_str(), m->module);
|
llvm::Function::Create(ftype, linkage, functionName.c_str(), m->module);
|
||||||
#if defined(LLVM_3_1)
|
#if defined(LLVM_3_1)
|
||||||
@@ -566,6 +580,7 @@ Function::GenerateIR() {
|
|||||||
FATAL("Function verificication failed");
|
FATAL("Function verificication failed");
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (g->target->getISA() == Target::NVPTX)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
llvm::NamedMDNode* annotations =
|
llvm::NamedMDNode* annotations =
|
||||||
@@ -576,6 +591,7 @@ Function::GenerateIR() {
|
|||||||
av.push_back(llvm::ConstantInt::get(llvm::IntegerType::get(*g->ctx,32), 1));
|
av.push_back(llvm::ConstantInt::get(llvm::IntegerType::get(*g->ctx,32), 1));
|
||||||
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
annotations->addOperand(llvm::MDNode::get(*g->ctx, av));
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
19
ispc.cpp
19
ispc.cpp
@@ -247,9 +247,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
arch = "arm";
|
arch = "arm";
|
||||||
else
|
else
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if(!strncmp(isa, "nvptx", 5))
|
if(!strncmp(isa, "nvptx", 5))
|
||||||
arch = "nvptx64";
|
arch = "nvptx64";
|
||||||
else
|
else
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
arch = "x86-64";
|
arch = "x86-64";
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -587,6 +589,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
else if (!strcasecmp(isa, "nvptx"))
|
else if (!strcasecmp(isa, "nvptx"))
|
||||||
{
|
{
|
||||||
this->m_isa = Target::NVPTX;
|
this->m_isa = Target::NVPTX;
|
||||||
@@ -602,6 +605,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_hasGather = this->m_hasScatter = false;
|
this->m_hasGather = this->m_hasScatter = false;
|
||||||
cpuFromIsa = "sm_35";
|
cpuFromIsa = "sm_35";
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
else {
|
else {
|
||||||
Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.",
|
Error(SourcePos(), "Target \"%s\" is unknown. Choices are: %s.",
|
||||||
isa, SupportedTargets());
|
isa, SupportedTargets());
|
||||||
@@ -720,8 +724,10 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
// Initialize target-specific "target-feature" attribute.
|
// Initialize target-specific "target-feature" attribute.
|
||||||
if (!m_attributes.empty()) {
|
if (!m_attributes.empty()) {
|
||||||
llvm::AttrBuilder attrBuilder;
|
llvm::AttrBuilder attrBuilder;
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (m_isa != Target::NVPTX)
|
if (m_isa != Target::NVPTX)
|
||||||
attrBuilder.addAttribute("target-cpu", this->m_cpu);
|
#endif
|
||||||
|
attrBuilder.addAttribute("target-cpu", this->m_cpu);
|
||||||
attrBuilder.addAttribute("target-features", this->m_attributes);
|
attrBuilder.addAttribute("target-features", this->m_attributes);
|
||||||
this->m_tf_attributes = new llvm::AttributeSet(
|
this->m_tf_attributes = new llvm::AttributeSet(
|
||||||
llvm::AttributeSet::get(
|
llvm::AttributeSet::get(
|
||||||
@@ -768,6 +774,9 @@ Target::SupportedTargets() {
|
|||||||
return
|
return
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
"neon-i8x16, neon-i16x8, neon-i32x4, "
|
"neon-i8x16, neon-i16x8, neon-i32x4, "
|
||||||
|
#endif
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
|
"nvptx, "
|
||||||
#endif
|
#endif
|
||||||
"sse2-i32x4, sse2-i32x8, "
|
"sse2-i32x4, sse2-i32x8, "
|
||||||
"sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
|
"sse4-i32x4, sse4-i32x8, sse4-i16x8, sse4-i8x16, "
|
||||||
@@ -776,7 +785,7 @@ Target::SupportedTargets() {
|
|||||||
"avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4 "
|
"avx1.1-i32x8, avx1.1-i32x16, avx1.1-i64x4 "
|
||||||
"avx2-i32x8, avx2-i32x16, avx2-i64x4, "
|
"avx2-i32x8, avx2-i32x16, avx2-i64x4, "
|
||||||
"generic-x1, generic-x4, generic-x8, generic-x16, "
|
"generic-x1, generic-x4, generic-x8, generic-x16, "
|
||||||
"generic-x32, generic-x64, nvptx";
|
"generic-x32, generic-x64";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -803,8 +812,10 @@ Target::GetTripleString() const {
|
|||||||
triple.setArchName("i386");
|
triple.setArchName("i386");
|
||||||
else if (m_arch == "x86-64")
|
else if (m_arch == "x86-64")
|
||||||
triple.setArchName("x86_64");
|
triple.setArchName("x86_64");
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
else if (m_arch == "nvptx64")
|
else if (m_arch == "nvptx64")
|
||||||
triple = llvm::Triple("nvptx64", "nvidia", "cuda");
|
triple = llvm::Triple("nvptx64", "nvidia", "cuda");
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
else
|
else
|
||||||
triple.setArchName(m_arch);
|
triple.setArchName(m_arch);
|
||||||
}
|
}
|
||||||
@@ -837,8 +848,10 @@ Target::ISAToString(ISA isa) {
|
|||||||
return "avx2";
|
return "avx2";
|
||||||
case Target::GENERIC:
|
case Target::GENERIC:
|
||||||
return "generic";
|
return "generic";
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
case Target::NVPTX:
|
case Target::NVPTX:
|
||||||
return "nvptx";
|
return "nvptx";
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
default:
|
default:
|
||||||
FATAL("Unhandled target in ISAToString()");
|
FATAL("Unhandled target in ISAToString()");
|
||||||
}
|
}
|
||||||
@@ -877,8 +890,10 @@ Target::ISAToTargetString(ISA isa) {
|
|||||||
return "avx2-i32x8";
|
return "avx2-i32x8";
|
||||||
case Target::GENERIC:
|
case Target::GENERIC:
|
||||||
return "generic-4";
|
return "generic-4";
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
case Target::NVPTX:
|
case Target::NVPTX:
|
||||||
return "nvptx";
|
return "nvptx";
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
default:
|
default:
|
||||||
FATAL("Unhandled target in ISAToTargetString()");
|
FATAL("Unhandled target in ISAToTargetString()");
|
||||||
}
|
}
|
||||||
|
|||||||
6
ispc.h
6
ispc.h
@@ -179,7 +179,10 @@ public:
|
|||||||
flexible/performant of them will apear last in the enumerant. Note
|
flexible/performant of them will apear last in the enumerant. Note
|
||||||
also that __best_available_isa() needs to be updated if ISAs are
|
also that __best_available_isa() needs to be updated if ISAs are
|
||||||
added or the enumerant values are reordered. */
|
added or the enumerant values are reordered. */
|
||||||
enum ISA { NVPTX,
|
enum ISA {
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
|
NVPTX,
|
||||||
|
#endif
|
||||||
#ifdef ISPC_ARM_ENABLED
|
#ifdef ISPC_ARM_ENABLED
|
||||||
NEON32, NEON16, NEON8,
|
NEON32, NEON16, NEON8,
|
||||||
#endif
|
#endif
|
||||||
@@ -611,7 +614,6 @@ struct Globals {
|
|||||||
/** Indicates that alignment in memory allocation routines should be
|
/** Indicates that alignment in memory allocation routines should be
|
||||||
forced to have given value. -1 value means natural alignment for the platforms. */
|
forced to have given value. -1 value means natural alignment for the platforms. */
|
||||||
int forceAlignment;
|
int forceAlignment;
|
||||||
std::string PtxString;
|
|
||||||
};
|
};
|
||||||
|
|
||||||
enum {
|
enum {
|
||||||
|
|||||||
2
main.cpp
2
main.cpp
@@ -320,10 +320,12 @@ int main(int Argc, char *Argv[]) {
|
|||||||
LLVMInitializeARMTargetMC();
|
LLVMInitializeARMTargetMC();
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
LLVMInitializeNVPTXTargetInfo();
|
LLVMInitializeNVPTXTargetInfo();
|
||||||
LLVMInitializeNVPTXTarget();
|
LLVMInitializeNVPTXTarget();
|
||||||
LLVMInitializeNVPTXAsmPrinter();
|
LLVMInitializeNVPTXAsmPrinter();
|
||||||
LLVMInitializeNVPTXTargetMC();
|
LLVMInitializeNVPTXTargetMC();
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
char *file = NULL;
|
char *file = NULL;
|
||||||
const char *headerFileName = NULL;
|
const char *headerFileName = NULL;
|
||||||
|
|||||||
82
module.cpp
82
module.cpp
@@ -58,7 +58,9 @@
|
|||||||
#include <set>
|
#include <set>
|
||||||
#include <sstream>
|
#include <sstream>
|
||||||
#include <iostream>
|
#include <iostream>
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#include <map>
|
#include <map>
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
#ifdef ISPC_IS_WINDOWS
|
#ifdef ISPC_IS_WINDOWS
|
||||||
#include <windows.h>
|
#include <windows.h>
|
||||||
#include <io.h>
|
#include <io.h>
|
||||||
@@ -72,7 +74,9 @@
|
|||||||
#include <llvm/Instructions.h>
|
#include <llvm/Instructions.h>
|
||||||
#include <llvm/Intrinsics.h>
|
#include <llvm/Intrinsics.h>
|
||||||
#include <llvm/DerivedTypes.h>
|
#include <llvm/DerivedTypes.h>
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#include "llvm/Assembly/AssemblyAnnotationWriter.h"
|
#include "llvm/Assembly/AssemblyAnnotationWriter.h"
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
#else
|
#else
|
||||||
#include <llvm/IR/LLVMContext.h>
|
#include <llvm/IR/LLVMContext.h>
|
||||||
#include <llvm/IR/Module.h>
|
#include <llvm/IR/Module.h>
|
||||||
@@ -80,7 +84,9 @@
|
|||||||
#include <llvm/IR/Instructions.h>
|
#include <llvm/IR/Instructions.h>
|
||||||
#include <llvm/IR/Intrinsics.h>
|
#include <llvm/IR/Intrinsics.h>
|
||||||
#include <llvm/IR/DerivedTypes.h>
|
#include <llvm/IR/DerivedTypes.h>
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#include "llvm/Assembly/AssemblyAnnotationWriter.h"
|
#include "llvm/Assembly/AssemblyAnnotationWriter.h"
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
#endif
|
#endif
|
||||||
#include <llvm/PassManager.h>
|
#include <llvm/PassManager.h>
|
||||||
#include <llvm/PassRegistry.h>
|
#include <llvm/PassRegistry.h>
|
||||||
@@ -446,6 +452,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (g->target->getISA() == Target::NVPTX &&
|
if (g->target->getISA() == Target::NVPTX &&
|
||||||
#if 0
|
#if 0
|
||||||
!type->IsConstType() &&
|
!type->IsConstType() &&
|
||||||
@@ -476,7 +483,7 @@ Module::AddGlobalVariable(const std::string &name, const Type *type, Expr *initE
|
|||||||
type = new ArrayType(type->GetAsUniformType(), nel);
|
type = new ArrayType(type->GetAsUniformType(), nel);
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
llvm::Type *llvmType = type->LLVMType(g->ctx);
|
llvm::Type *llvmType = type->LLVMType(g->ctx);
|
||||||
if (llvmType == NULL)
|
if (llvmType == NULL)
|
||||||
@@ -677,6 +684,7 @@ lCheckExportedParameterTypes(const Type *type, const std::string &name,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
static void
|
static void
|
||||||
lCheckTaskParameterTypes(const Type *type, const std::string &name,
|
lCheckTaskParameterTypes(const Type *type, const std::string &name,
|
||||||
SourcePos pos) {
|
SourcePos pos) {
|
||||||
@@ -691,7 +699,7 @@ lCheckTaskParameterTypes(const Type *type, const std::string &name,
|
|||||||
name.c_str());
|
name.c_str());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
/** Given a function type, loop through the function parameters and see if
|
/** Given a function type, loop through the function parameters and see if
|
||||||
any are StructTypes. If so, issue an error; this is currently broken
|
any are StructTypes. If so, issue an error; this is currently broken
|
||||||
@@ -849,8 +857,12 @@ Module::AddFunctionDeclaration(const std::string &name,
|
|||||||
#else // LLVM 3.1 and 3.3+
|
#else // LLVM 3.1 and 3.3+
|
||||||
function->addFnAttr(llvm::Attribute::AlwaysInline);
|
function->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||||
#endif
|
#endif
|
||||||
/* evghenii: fails function verification when "if" executed in nvptx target */
|
|
||||||
if (functionType->isTask && g->target->getISA() != Target::NVPTX)
|
if (functionType->isTask)
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
|
/* evghenii: fails function verification when "if" executed in nvptx target */
|
||||||
|
if (g->target->getISA() != Target::NVPTX)
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
// This also applies transitively to members I think?
|
// This also applies transitively to members I think?
|
||||||
#if defined(LLVM_3_1)
|
#if defined(LLVM_3_1)
|
||||||
function->setDoesNotAlias(1, true);
|
function->setDoesNotAlias(1, true);
|
||||||
@@ -871,12 +883,14 @@ Module::AddFunctionDeclaration(const std::string &name,
|
|||||||
functionType->GetReturnType()->IsVoidType() == false)
|
functionType->GetReturnType()->IsVoidType() == false)
|
||||||
Error(pos, "Task-qualified functions must have void return type.");
|
Error(pos, "Task-qualified functions must have void return type.");
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (g->target->getISA() == Target::NVPTX &&
|
if (g->target->getISA() == Target::NVPTX &&
|
||||||
Type::Equal(functionType->GetReturnType(), AtomicType::Void) == false &&
|
Type::Equal(functionType->GetReturnType(), AtomicType::Void) == false &&
|
||||||
functionType->isExported)
|
functionType->isExported)
|
||||||
{
|
{
|
||||||
Error(pos, "Export-qualified functions must have void return type with \"nvptx\" target.");
|
Error(pos, "Export-qualified functions must have void return type with \"nvptx\" target.");
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
if (functionType->isExported || functionType->isExternC)
|
if (functionType->isExported || functionType->isExternC)
|
||||||
lCheckForStructParameters(functionType, pos);
|
lCheckForStructParameters(functionType, pos);
|
||||||
@@ -897,9 +911,12 @@ Module::AddFunctionDeclaration(const std::string &name,
|
|||||||
if (functionType->isExported) {
|
if (functionType->isExported) {
|
||||||
lCheckExportedParameterTypes(argType, argName, argPos);
|
lCheckExportedParameterTypes(argType, argName, argPos);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (functionType->isTask) {
|
if (functionType->isTask) {
|
||||||
lCheckTaskParameterTypes(argType, argName, argPos);
|
lCheckTaskParameterTypes(argType, argName, argPos);
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
// ISPC assumes that no pointers alias. (It should be possible to
|
// ISPC assumes that no pointers alias. (It should be possible to
|
||||||
// specify when this is not the case, but this should be the
|
// specify when this is not the case, but this should be the
|
||||||
@@ -1027,24 +1044,28 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
|
|||||||
const char *fileType = NULL;
|
const char *fileType = NULL;
|
||||||
switch (outputType) {
|
switch (outputType) {
|
||||||
case Asm:
|
case Asm:
|
||||||
if (g->target->getISA() != Target::NVPTX)
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
{
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
if (strcasecmp(suffix, "s"))
|
{
|
||||||
|
if (strcasecmp(suffix, "ptx"))
|
||||||
fileType = "assembly";
|
fileType = "assembly";
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
if (strcasecmp(suffix, "ptx"))
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
if (strcasecmp(suffix, "s"))
|
||||||
fileType = "assembly";
|
fileType = "assembly";
|
||||||
break;
|
break;
|
||||||
case Bitcode:
|
case Bitcode:
|
||||||
if (g->target->getISA() != Target::NVPTX)
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
{
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
if (strcasecmp(suffix, "bc"))
|
{
|
||||||
fileType = "LLVM bitcode";
|
if (strcasecmp(suffix, "ll"))
|
||||||
}
|
fileType = "LLVM assembly";
|
||||||
else
|
}
|
||||||
if (strcasecmp(suffix, "ll"))
|
else
|
||||||
fileType = "LLVM assembly";
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
if (strcasecmp(suffix, "bc"))
|
||||||
|
fileType = "LLVM bitcode";
|
||||||
break;
|
break;
|
||||||
case Object:
|
case Object:
|
||||||
if (strcasecmp(suffix, "o") && strcasecmp(suffix, "obj"))
|
if (strcasecmp(suffix, "o") && strcasecmp(suffix, "obj"))
|
||||||
@@ -1113,6 +1134,7 @@ Module::writeOutput(OutputType outputType, const char *outFileName,
|
|||||||
return writeObjectFileOrAssembly(outputType, outFileName);
|
return writeObjectFileOrAssembly(outputType, outFileName);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
typedef std::vector<std::string> vecString_t;
|
typedef std::vector<std::string> vecString_t;
|
||||||
static vecString_t
|
static vecString_t
|
||||||
lSplitString(const std::string &s)
|
lSplitString(const std::string &s)
|
||||||
@@ -1180,6 +1202,7 @@ lFixAttributes(const vecString_t &src, vecString_t &dst)
|
|||||||
dst.push_back(s);
|
dst.push_back(s);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
bool
|
bool
|
||||||
Module::writeBitcode(llvm::Module *module, const char *outFileName) {
|
Module::writeBitcode(llvm::Module *module, const char *outFileName) {
|
||||||
@@ -1204,11 +1227,8 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
llvm::raw_fd_ostream fos(fd, (fd != 1), false);
|
llvm::raw_fd_ostream fos(fd, (fd != 1), false);
|
||||||
if (g->target->getISA() != Target::NVPTX)
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
{
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
llvm::WriteBitcodeToFile(module, fos);
|
|
||||||
}
|
|
||||||
else
|
|
||||||
{
|
{
|
||||||
/* when using "nvptx" target, emit patched/hacked assembly
|
/* when using "nvptx" target, emit patched/hacked assembly
|
||||||
* NVPTX only accepts 3.2-style LLVM assembly, where attributes
|
* NVPTX only accepts 3.2-style LLVM assembly, where attributes
|
||||||
@@ -1240,7 +1260,9 @@ Module::writeBitcode(llvm::Module *module, const char *outFileName) {
|
|||||||
fos << *it;
|
fos << *it;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
llvm::WriteBitcodeToFile(module, fos);
|
||||||
|
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
@@ -2275,6 +2297,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
|
|||||||
opts.addMacroDef(g->cppArgs[i].substr(2));
|
opts.addMacroDef(g->cppArgs[i].substr(2));
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (g->target->getISA() == Target::NVPTX)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
opts.addMacroDef("__NVPTX__");
|
opts.addMacroDef("__NVPTX__");
|
||||||
@@ -2295,6 +2318,7 @@ Module::execPreprocessor(const char *infilename, llvm::raw_string_ostream *ostre
|
|||||||
opts.addMacroDef("taskCount2=__taskCount2()");
|
opts.addMacroDef("taskCount2=__taskCount2()");
|
||||||
opts.addMacroDef("taskCount=__taskCount()");
|
opts.addMacroDef("taskCount=__taskCount()");
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
inst.getLangOpts().LineComment = 1;
|
inst.getLangOpts().LineComment = 1;
|
||||||
#if defined(LLVM_3_5)
|
#if defined(LLVM_3_5)
|
||||||
@@ -2740,6 +2764,7 @@ lCreateDispatchModule(std::map<std::string, FunctionTargetVariants> &functions)
|
|||||||
return module;
|
return module;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
static std::string lCBEMangle(const std::string &S) {
|
static std::string lCBEMangle(const std::string &S) {
|
||||||
std::string Result;
|
std::string Result;
|
||||||
|
|
||||||
@@ -2762,7 +2787,7 @@ static std::string lCBEMangle(const std::string &S) {
|
|||||||
}
|
}
|
||||||
return Result;
|
return Result;
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
int
|
int
|
||||||
Module::CompileAndOutput(const char *srcFile,
|
Module::CompileAndOutput(const char *srcFile,
|
||||||
@@ -2778,7 +2803,7 @@ Module::CompileAndOutput(const char *srcFile,
|
|||||||
const char *hostStubFileName,
|
const char *hostStubFileName,
|
||||||
const char *devStubFileName)
|
const char *devStubFileName)
|
||||||
{
|
{
|
||||||
if (target == NULL || strchr(target, ',') == NULL) {
|
if (target == NULL || strchr(target, ',') == NULL) {
|
||||||
// We're only compiling to a single target
|
// We're only compiling to a single target
|
||||||
g->target = new Target(arch, cpu, target, generatePIC);
|
g->target = new Target(arch, cpu, target, generatePIC);
|
||||||
if (!g->target->isValid())
|
if (!g->target->isValid())
|
||||||
@@ -2786,7 +2811,7 @@ Module::CompileAndOutput(const char *srcFile,
|
|||||||
|
|
||||||
m = new Module(srcFile);
|
m = new Module(srcFile);
|
||||||
if (m->CompileFile() == 0) {
|
if (m->CompileFile() == 0) {
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
/* NVPTX:
|
/* NVPTX:
|
||||||
* for PTX target replace '.' with '_' in all global variables
|
* for PTX target replace '.' with '_' in all global variables
|
||||||
* a PTX identifier name must match [a-zA-Z$_][a-zA-Z$_0-9]*
|
* a PTX identifier name must match [a-zA-Z$_][a-zA-Z$_0-9]*
|
||||||
@@ -2811,7 +2836,7 @@ Module::CompileAndOutput(const char *srcFile,
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
if (outputType == CXX) {
|
if (outputType == CXX) {
|
||||||
if (target == NULL || strncmp(target, "generic-", 8) != 0) {
|
if (target == NULL || strncmp(target, "generic-", 8) != 0) {
|
||||||
Error(SourcePos(), "When generating C++ output, one of the \"generic-*\" "
|
Error(SourcePos(), "When generating C++ output, one of the \"generic-*\" "
|
||||||
@@ -3014,5 +3039,4 @@ Module::CompileAndOutput(const char *srcFile,
|
|||||||
|
|
||||||
return errorCount > 0;
|
return errorCount > 0;
|
||||||
}
|
}
|
||||||
return true;
|
|
||||||
}
|
}
|
||||||
|
|||||||
17
opt.cpp
17
opt.cpp
@@ -55,7 +55,9 @@
|
|||||||
#include <llvm/Function.h>
|
#include <llvm/Function.h>
|
||||||
#include <llvm/BasicBlock.h>
|
#include <llvm/BasicBlock.h>
|
||||||
#include <llvm/Constants.h>
|
#include <llvm/Constants.h>
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#include <llvm/InlineAsm.h>
|
#include <llvm/InlineAsm.h>
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
#else
|
#else
|
||||||
#include <llvm/IR/Module.h>
|
#include <llvm/IR/Module.h>
|
||||||
#include <llvm/IR/Instructions.h>
|
#include <llvm/IR/Instructions.h>
|
||||||
@@ -63,7 +65,9 @@
|
|||||||
#include <llvm/IR/Function.h>
|
#include <llvm/IR/Function.h>
|
||||||
#include <llvm/IR/BasicBlock.h>
|
#include <llvm/IR/BasicBlock.h>
|
||||||
#include <llvm/IR/Constants.h>
|
#include <llvm/IR/Constants.h>
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#include <llvm/IR/InlineAsm.h>
|
#include <llvm/IR/InlineAsm.h>
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
#endif
|
#endif
|
||||||
#if defined (LLVM_3_4) || defined(LLVM_3_5)
|
#if defined (LLVM_3_4) || defined(LLVM_3_5)
|
||||||
#include <llvm/Transforms/Instrumentation.h>
|
#include <llvm/Transforms/Instrumentation.h>
|
||||||
@@ -131,7 +135,9 @@ static llvm::Pass *CreateDebugPass(char * output);
|
|||||||
static llvm::Pass *CreateReplaceStdlibShiftPass();
|
static llvm::Pass *CreateReplaceStdlibShiftPass();
|
||||||
|
|
||||||
static llvm::Pass *CreateFixBooleanSelectPass();
|
static llvm::Pass *CreateFixBooleanSelectPass();
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
static llvm::Pass *CreatePromoteLocalToPrivatePass();
|
static llvm::Pass *CreatePromoteLocalToPrivatePass();
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
#define DEBUG_START_PASS(NAME) \
|
#define DEBUG_START_PASS(NAME) \
|
||||||
if (g->debugPrint && \
|
if (g->debugPrint && \
|
||||||
@@ -495,9 +501,11 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
// run absolutely no optimizations, since the front-end needs us to
|
// run absolutely no optimizations, since the front-end needs us to
|
||||||
// take the various __pseudo_* functions it has emitted and turn
|
// take the various __pseudo_* functions it has emitted and turn
|
||||||
// them into something that can actually execute.
|
// them into something that can actually execute.
|
||||||
|
optPM.add(CreateImproveMemoryOpsPass(), 100);
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (g->opt.disableGatherScatterOptimizations == false &&
|
if (g->opt.disableGatherScatterOptimizations == false &&
|
||||||
g->target->getVectorWidth() > 1)
|
g->target->getVectorWidth() > 1)
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
optPM.add(CreateImproveMemoryOpsPass(), 100);
|
optPM.add(CreateImproveMemoryOpsPass(), 100);
|
||||||
|
|
||||||
if (g->opt.disableHandlePseudoMemoryOps == false)
|
if (g->opt.disableHandlePseudoMemoryOps == false)
|
||||||
@@ -579,7 +587,9 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
optPM.add(llvm::createReassociatePass());
|
optPM.add(llvm::createReassociatePass());
|
||||||
optPM.add(llvm::createIPConstantPropagationPass());
|
optPM.add(llvm::createIPConstantPropagationPass());
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (g->target->getISA() != Target::NVPTX)
|
if (g->target->getISA() != Target::NVPTX)
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
optPM.add(CreateReplaceStdlibShiftPass(),229);
|
optPM.add(CreateReplaceStdlibShiftPass(),229);
|
||||||
|
|
||||||
optPM.add(llvm::createDeadArgEliminationPass(),230);
|
optPM.add(llvm::createDeadArgEliminationPass(),230);
|
||||||
@@ -693,7 +703,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
|
|
||||||
// Should be the last
|
// Should be the last
|
||||||
optPM.add(CreateFixBooleanSelectPass(), 400);
|
optPM.add(CreateFixBooleanSelectPass(), 400);
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (g->target->getISA() == Target::NVPTX)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
optPM.add(CreatePromoteLocalToPrivatePass());
|
optPM.add(CreatePromoteLocalToPrivatePass());
|
||||||
@@ -799,6 +809,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
optPM.add(llvm::createConstantMergePass());
|
optPM.add(llvm::createConstantMergePass());
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
}
|
}
|
||||||
|
|
||||||
// Finish up by making sure we didn't mess anything up in the IR along
|
// Finish up by making sure we didn't mess anything up in the IR along
|
||||||
@@ -5410,6 +5421,7 @@ CreateFixBooleanSelectPass() {
|
|||||||
return new FixBooleanSelectPass();
|
return new FixBooleanSelectPass();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
// Detect addrspace(3)
|
// Detect addrspace(3)
|
||||||
///////////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////////
|
||||||
@@ -5498,4 +5510,5 @@ CreatePromoteLocalToPrivatePass() {
|
|||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
|
|||||||
@@ -45,11 +45,13 @@ then
|
|||||||
# $($LLVMAS $1 -o $TMPDIR/$fbname.bc) && $($LLVMDIS $TMPDIR/$fbname.bc -o $TMPDIR/$fbname.ll) && $($PTXGEN $TMPDIR/$fbname.ll -o $TMPDIR/$fbname.ptx) && \
|
# $($LLVMAS $1 -o $TMPDIR/$fbname.bc) && $($LLVMDIS $TMPDIR/$fbname.bc -o $TMPDIR/$fbname.ll) && $($PTXGEN $TMPDIR/$fbname.ll -o $TMPDIR/$fbname.ptx) && \
|
||||||
$($PTXGEN $1 -o $TMPDIR/$fbname.ptx) && \
|
$($PTXGEN $1 -o $TMPDIR/$fbname.ptx) && \
|
||||||
$($PTXCC $TMPDIR/$fbname.ptx -o $TMPDIR/$fbname.o -Xnvcc="-G") && \
|
$($PTXCC $TMPDIR/$fbname.ptx -o $TMPDIR/$fbname.o -Xnvcc="-G") && \
|
||||||
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS);
|
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS) && \
|
||||||
|
$(/bin/rm -rf $TMPDIR/*$fbname*);
|
||||||
else
|
else
|
||||||
$(sed 's/\.b0/\.b32/g' $1 > $TMPDIR/$fbname) && \
|
$(sed 's/\.b0/\.b32/g' $1 > $TMPDIR/$fbname) && \
|
||||||
$($PTXCC $TMPDIR/$fbname -o $TMPDIR/$fbname.o -Xnvcc="-G") && \
|
$($PTXCC $TMPDIR/$fbname -o $TMPDIR/$fbname.o -Xnvcc="-G") && \
|
||||||
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS);
|
$(nvcc test_static_nvptx.cpp examples/util/nvcc_helpers.cu examples/util/ispc_malloc.cpp $TMPDIR/$fbname.o -arch=sm_35 -Iexamples/util/ -D_CUDA_ -lcudadevrt $ARGS) && \
|
||||||
|
$(/bin/rm -rf $TMPDIR/*$fbname*);
|
||||||
fi
|
fi
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -233,7 +233,7 @@ def run_test(testname):
|
|||||||
elif is_nvptx_target:
|
elif is_nvptx_target:
|
||||||
if os.environ.get("NVVM") == "1":
|
if os.environ.get("NVVM") == "1":
|
||||||
is_nvptx_nvvm = True
|
is_nvptx_nvvm = True
|
||||||
obj_name = "%s.bc" % testname
|
obj_name = "%s.ll" % testname
|
||||||
else:
|
else:
|
||||||
obj_name = "%s.ptx" % testname
|
obj_name = "%s.ptx" % testname
|
||||||
is_nvptx_nvvm = False
|
is_nvptx_nvvm = False
|
||||||
|
|||||||
700
stmt.cpp
700
stmt.cpp
@@ -142,6 +142,7 @@ lHasUnsizedArrays(const Type *type) {
|
|||||||
return lHasUnsizedArrays(at->GetElementType());
|
return lHasUnsizedArrays(at->GetElementType());
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value, const SourcePos ¤tPos, const bool variable = false)
|
static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *value, const SourcePos ¤tPos, const bool variable = false)
|
||||||
{
|
{
|
||||||
if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX)
|
if (!value->getType()->isPointerTy() || g->target->getISA() != Target::NVPTX)
|
||||||
@@ -198,6 +199,7 @@ static llvm::Value* lConvertToGenericPtr(FunctionEmitContext *ctx, llvm::Value *
|
|||||||
|
|
||||||
return value;
|
return value;
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
void
|
void
|
||||||
DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||||
@@ -261,9 +263,8 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
if (sym->storageClass == SC_STATIC) {
|
if (sym->storageClass == SC_STATIC) {
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType())
|
if (g->target->getISA() == Target::NVPTX && !sym->type->IsConstType())
|
||||||
{
|
{
|
||||||
Error(sym->pos,
|
Error(sym->pos,
|
||||||
@@ -279,7 +280,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
PerformanceWarning(sym->pos,
|
PerformanceWarning(sym->pos,
|
||||||
"\"const static uniform\" variable ""\"%s\" is stored in __constant address space with ""\"nvptx\" target.",
|
"\"const static uniform\" variable ""\"%s\" is stored in __constant address space with ""\"nvptx\" target.",
|
||||||
sym->name.c_str());
|
sym->name.c_str());
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
// For static variables, we need a compile-time constant value
|
// For static variables, we need a compile-time constant value
|
||||||
// for its initializer; if there's no initializer, we use a
|
// for its initializer; if there's no initializer, we use a
|
||||||
// zero value.
|
// zero value.
|
||||||
@@ -307,28 +308,38 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
if (cinit == NULL)
|
if (cinit == NULL)
|
||||||
cinit = llvm::Constant::getNullValue(llvmType);
|
cinit = llvm::Constant::getNullValue(llvmType);
|
||||||
|
|
||||||
|
// Allocate space for the static variable in global scope, so
|
||||||
|
// that it persists across function calls
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
int addressSpace = 0;
|
int addressSpace = 0;
|
||||||
if (g->target->getISA() == Target::NVPTX &&
|
if (g->target->getISA() == Target::NVPTX &&
|
||||||
sym->type->IsConstType() &&
|
sym->type->IsConstType() &&
|
||||||
sym->type->IsUniformType())
|
sym->type->IsUniformType())
|
||||||
addressSpace = 4;
|
addressSpace = 4;
|
||||||
|
|
||||||
// Allocate space for the static variable in global scope, so
|
|
||||||
// that it persists across function calls
|
|
||||||
sym->storagePtr =
|
sym->storagePtr =
|
||||||
new llvm::GlobalVariable(*m->module, llvmType,
|
new llvm::GlobalVariable(*m->module, llvmType,
|
||||||
sym->type->IsConstType(),
|
sym->type->IsConstType(),
|
||||||
llvm::GlobalValue::InternalLinkage, cinit,
|
llvm::GlobalValue::InternalLinkage, cinit,
|
||||||
llvm::Twine("static_") +
|
llvm::Twine("static.") +
|
||||||
llvm::Twine(sym->pos.first_line) +
|
llvm::Twine(sym->pos.first_line) +
|
||||||
llvm::Twine("_") + sym->name.c_str(),
|
llvm::Twine(".") + sym->name.c_str(),
|
||||||
NULL,
|
NULL,
|
||||||
llvm::GlobalVariable::NotThreadLocal,
|
llvm::GlobalVariable::NotThreadLocal,
|
||||||
addressSpace);
|
addressSpace);
|
||||||
sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
|
sym->storagePtr = lConvertToGenericPtr(ctx, sym->storagePtr, sym->pos);
|
||||||
|
#else /* ISPC_NVPTX_ENABLED */
|
||||||
|
sym->storagePtr =
|
||||||
|
new llvm::GlobalVariable(*m->module, llvmType,
|
||||||
|
sym->type->IsConstType(),
|
||||||
|
llvm::GlobalValue::InternalLinkage, cinit,
|
||||||
|
llvm::Twine("static.") +
|
||||||
|
llvm::Twine(sym->pos.first_line) +
|
||||||
|
llvm::Twine(".") + sym->name.c_str());
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
// Tell the FunctionEmitContext about the variable
|
// Tell the FunctionEmitContext about the variable
|
||||||
ctx->EmitVariableDebugInfo(sym);
|
ctx->EmitVariableDebugInfo(sym);
|
||||||
}
|
}
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
else if ((sym->type->IsUniformType() || sym->type->IsSOAType()) &&
|
else if ((sym->type->IsUniformType() || sym->type->IsSOAType()) &&
|
||||||
/* NVPTX:
|
/* NVPTX:
|
||||||
* only non-constant uniform data types are stored in shared memory
|
* only non-constant uniform data types are stored in shared memory
|
||||||
@@ -396,6 +407,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
sym->parentFunction = ctx->GetFunction();
|
sym->parentFunction = ctx->GetFunction();
|
||||||
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
|
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
|
||||||
}
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
// For non-static variables, allocate storage on the stack
|
// For non-static variables, allocate storage on the stack
|
||||||
@@ -404,7 +416,6 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// Tell the FunctionEmitContext about the variable; must do
|
// Tell the FunctionEmitContext about the variable; must do
|
||||||
// this before the initializer stuff.
|
// this before the initializer stuff.
|
||||||
ctx->EmitVariableDebugInfo(sym);
|
ctx->EmitVariableDebugInfo(sym);
|
||||||
|
|
||||||
if (initExpr == 0 && sym->type->IsConstType())
|
if (initExpr == 0 && sym->type->IsConstType())
|
||||||
Error(sym->pos, "Missing initializer for const variable "
|
Error(sym->pos, "Missing initializer for const variable "
|
||||||
"\"%s\".", sym->name.c_str());
|
"\"%s\".", sym->name.c_str());
|
||||||
@@ -412,7 +423,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// And then get it initialized...
|
// And then get it initialized...
|
||||||
sym->parentFunction = ctx->GetFunction();
|
sym->parentFunction = ctx->GetFunction();
|
||||||
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
|
InitSymbol(sym->storagePtr, sym->type, initExpr, ctx, sym->pos);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -571,7 +582,7 @@ IfStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
if (testValue == NULL)
|
if (testValue == NULL)
|
||||||
return;
|
return;
|
||||||
|
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#if 0
|
#if 0
|
||||||
if (!isUniform && g->target->getISA() == Target::NVPTX)
|
if (!isUniform && g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
@@ -582,7 +593,7 @@ IfStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
isUniform = true;
|
isUniform = true;
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
if (isUniform) {
|
if (isUniform) {
|
||||||
ctx->StartUniformIf();
|
ctx->StartUniformIf();
|
||||||
@@ -865,11 +876,17 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
|||||||
|
|
||||||
// Do any of the program instances want to run the 'true'
|
// Do any of the program instances want to run the 'true'
|
||||||
// block? If not, jump ahead to bNext.
|
// block? If not, jump ahead to bNext.
|
||||||
#if 1
|
|
||||||
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#else
|
#if 0
|
||||||
llvm::Value *maskAnyTrueQ = ctx->ExtractInst(ctx->GetFullMask(),0);
|
llvm::Value *maskAnyTrueQ = ctx->ExtractInst(ctx->GetFullMask(),0);
|
||||||
|
#else
|
||||||
|
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
|
||||||
#endif
|
#endif
|
||||||
|
#else /* ISPC_NVPTX_ENABLED */
|
||||||
|
llvm::Value *maskAnyTrueQ = ctx->Any(ctx->GetFullMask());
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
ctx->BranchInst(bRunTrue, bNext, maskAnyTrueQ);
|
ctx->BranchInst(bRunTrue, bNext, maskAnyTrueQ);
|
||||||
|
|
||||||
// Emit statements for true
|
// Emit statements for true
|
||||||
@@ -886,11 +903,16 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
|||||||
|
|
||||||
// Similarly, check to see if any of the instances want to
|
// Similarly, check to see if any of the instances want to
|
||||||
// run the 'false' block...
|
// run the 'false' block...
|
||||||
#if 1
|
|
||||||
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
#else
|
#if 0
|
||||||
llvm::Value *maskAnyFalseQ = ctx->ExtractInst(ctx->GetFullMask(),0);
|
llvm::Value *maskAnyFalseQ = ctx->ExtractInst(ctx->GetFullMask(),0);
|
||||||
|
#else
|
||||||
|
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
|
||||||
#endif
|
#endif
|
||||||
|
#else /* ISPC_NVPTX_ENABLED */
|
||||||
|
llvm::Value *maskAnyFalseQ = ctx->Any(ctx->GetFullMask());
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
ctx->BranchInst(bRunFalse, bDone, maskAnyFalseQ);
|
ctx->BranchInst(bRunFalse, bDone, maskAnyFalseQ);
|
||||||
|
|
||||||
// Emit code for false
|
// Emit code for false
|
||||||
@@ -1450,10 +1472,96 @@ static llvm::Value *
|
|||||||
lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
|
lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
|
||||||
llvm::Value *uniformCounterPtr,
|
llvm::Value *uniformCounterPtr,
|
||||||
llvm::Value *varyingCounterPtr,
|
llvm::Value *varyingCounterPtr,
|
||||||
const std::vector<int> &spans)
|
const std::vector<int> &spans) {
|
||||||
{
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
if (g->target->getISA() != Target::NVPTX)
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
{
|
{
|
||||||
|
// Smear the uniform counter value out to be varying
|
||||||
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
||||||
|
llvm::Value *smearCounter = ctx->BroadcastValue(
|
||||||
|
counter, LLVMTypes::Int32VectorType, "smear_counter");
|
||||||
|
|
||||||
|
// Figure out the offsets; this is a little bit tricky. As an example,
|
||||||
|
// consider a 2D tiled foreach loop, where we're running 8-wide and
|
||||||
|
// where the inner dimension has a stride of 4 and the outer dimension
|
||||||
|
// has a stride of 2. For the inner dimension, we want the offsets
|
||||||
|
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
|
||||||
|
// (0,0,0,0,1,1,1,1).
|
||||||
|
int32_t delta[ISPC_MAX_NVEC];
|
||||||
|
const int vecWidth = 32;
|
||||||
|
std::vector<llvm::Constant*> constDeltaList;
|
||||||
|
for (int i = 0; i < vecWidth; ++i)
|
||||||
|
{
|
||||||
|
int d = i;
|
||||||
|
// First, account for the effect of any dimensions at deeper
|
||||||
|
// nesting levels than the current one.
|
||||||
|
int prevDimSpanCount = 1;
|
||||||
|
for (int j = dim; j < nDims-1; ++j)
|
||||||
|
prevDimSpanCount *= spans[j+1];
|
||||||
|
d /= prevDimSpanCount;
|
||||||
|
|
||||||
|
// And now with what's left, figure out our own offset
|
||||||
|
delta[i] = d % spans[dim];
|
||||||
|
constDeltaList.push_back(LLVMInt8(delta[i]));
|
||||||
|
}
|
||||||
|
|
||||||
|
llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
|
||||||
|
// llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
|
||||||
|
|
||||||
|
|
||||||
|
llvm::GlobalVariable* globalDelta = new llvm::GlobalVariable(
|
||||||
|
/*Module=*/*m->module,
|
||||||
|
/*Type=*/ArrayDelta,
|
||||||
|
/*isConstant=*/true,
|
||||||
|
/*Linkage=*/llvm::GlobalValue::PrivateLinkage,
|
||||||
|
/*Initializer=*/0, // has initializer, specified below
|
||||||
|
/*Name=*/"constDeltaForeach");
|
||||||
|
#if 0
|
||||||
|
/*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
|
||||||
|
/*unsigned AddressSpace=*/4 /*constant*/);
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
llvm::Constant* constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
|
||||||
|
|
||||||
|
globalDelta->setInitializer(constDelta);
|
||||||
|
llvm::Function *func_program_index = m->module->getFunction("__program_index");
|
||||||
|
llvm::Value *laneIdx = ctx->CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__programIndex");
|
||||||
|
|
||||||
|
std::vector<llvm::Value*> ptr_arrayidx_indices;
|
||||||
|
ptr_arrayidx_indices.push_back(LLVMInt32(0));
|
||||||
|
ptr_arrayidx_indices.push_back(laneIdx);
|
||||||
|
#if 1
|
||||||
|
llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
|
||||||
|
llvm::LoadInst* int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
|
||||||
|
llvm::Value * int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
|
||||||
|
|
||||||
|
llvm::VectorType* VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
|
||||||
|
llvm::UndefValue* const_packed_41 = llvm::UndefValue::get(VectorTy_2);
|
||||||
|
|
||||||
|
llvm::InsertElementInst* packed_43 = llvm::InsertElementInst::Create(
|
||||||
|
// llvm::UndefValue(LLVMInt32Vector),
|
||||||
|
const_packed_41,
|
||||||
|
int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
|
||||||
|
#endif
|
||||||
|
|
||||||
|
|
||||||
|
// Add the deltas to compute the varying counter values; store the
|
||||||
|
// result to memory and then return it directly as well.
|
||||||
|
#if 0
|
||||||
|
llvm::Value *varyingCounter =
|
||||||
|
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
|
||||||
|
LLVMInt32Vector(delta), "iter_val");
|
||||||
|
#else
|
||||||
|
llvm::Value *varyingCounter =
|
||||||
|
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
|
||||||
|
packed_43, "iter_val");
|
||||||
|
#endif
|
||||||
|
ctx->StoreInst(varyingCounter, varyingCounterPtr);
|
||||||
|
return varyingCounter;
|
||||||
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
// Smear the uniform counter value out to be varying
|
// Smear the uniform counter value out to be varying
|
||||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
||||||
llvm::Value *smearCounter = ctx->BroadcastValue(
|
llvm::Value *smearCounter = ctx->BroadcastValue(
|
||||||
@@ -1486,93 +1594,6 @@ lUpdateVaryingCounter(int dim, int nDims, FunctionEmitContext *ctx,
|
|||||||
LLVMInt32Vector(delta), "iter_val");
|
LLVMInt32Vector(delta), "iter_val");
|
||||||
ctx->StoreInst(varyingCounter, varyingCounterPtr);
|
ctx->StoreInst(varyingCounter, varyingCounterPtr);
|
||||||
return varyingCounter;
|
return varyingCounter;
|
||||||
}
|
|
||||||
else /* NVPTX == true */
|
|
||||||
{
|
|
||||||
// Smear the uniform counter value out to be varying
|
|
||||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtr);
|
|
||||||
llvm::Value *smearCounter = ctx->BroadcastValue(
|
|
||||||
counter, LLVMTypes::Int32VectorType, "smear_counter");
|
|
||||||
|
|
||||||
// Figure out the offsets; this is a little bit tricky. As an example,
|
|
||||||
// consider a 2D tiled foreach loop, where we're running 8-wide and
|
|
||||||
// where the inner dimension has a stride of 4 and the outer dimension
|
|
||||||
// has a stride of 2. For the inner dimension, we want the offsets
|
|
||||||
// (0,1,2,3,0,1,2,3), and for the outer dimension we want
|
|
||||||
// (0,0,0,0,1,1,1,1).
|
|
||||||
int32_t delta[ISPC_MAX_NVEC];
|
|
||||||
const int vecWidth = 32;
|
|
||||||
std::vector<llvm::Constant*> constDeltaList;
|
|
||||||
for (int i = 0; i < vecWidth; ++i)
|
|
||||||
{
|
|
||||||
int d = i;
|
|
||||||
// First, account for the effect of any dimensions at deeper
|
|
||||||
// nesting levels than the current one.
|
|
||||||
int prevDimSpanCount = 1;
|
|
||||||
for (int j = dim; j < nDims-1; ++j)
|
|
||||||
prevDimSpanCount *= spans[j+1];
|
|
||||||
d /= prevDimSpanCount;
|
|
||||||
|
|
||||||
// And now with what's left, figure out our own offset
|
|
||||||
delta[i] = d % spans[dim];
|
|
||||||
constDeltaList.push_back(LLVMInt8(delta[i]));
|
|
||||||
}
|
|
||||||
|
|
||||||
llvm::ArrayType* ArrayDelta = llvm::ArrayType::get(LLVMTypes::Int8Type, 32);
|
|
||||||
// llvm::PointerType::get(ArrayDelta, 4); /* constant memory */
|
|
||||||
|
|
||||||
|
|
||||||
llvm::GlobalVariable* globalDelta = new llvm::GlobalVariable(
|
|
||||||
/*Module=*/*m->module,
|
|
||||||
/*Type=*/ArrayDelta,
|
|
||||||
/*isConstant=*/true,
|
|
||||||
/*Linkage=*/llvm::GlobalValue::PrivateLinkage,
|
|
||||||
/*Initializer=*/0, // has initializer, specified below
|
|
||||||
/*Name=*/"constDeltaForeach");
|
|
||||||
#if 0
|
|
||||||
/*ThreadLocalMode=*/llvm::GlobalVariable::NotThreadLocal,
|
|
||||||
/*unsigned AddressSpace=*/4 /*constant*/);
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
llvm::Constant* constDelta = llvm::ConstantArray::get(ArrayDelta, constDeltaList);
|
|
||||||
|
|
||||||
globalDelta->setInitializer(constDelta);
|
|
||||||
llvm::Function *func_program_index = m->module->getFunction("__program_index");
|
|
||||||
llvm::Value *laneIdx = ctx->CallInst(func_program_index, NULL, std::vector<llvm::Value*>(), "foreach__programIndex");
|
|
||||||
|
|
||||||
std::vector<llvm::Value*> ptr_arrayidx_indices;
|
|
||||||
ptr_arrayidx_indices.push_back(LLVMInt32(0));
|
|
||||||
ptr_arrayidx_indices.push_back(laneIdx);
|
|
||||||
#if 1
|
|
||||||
llvm::Instruction* ptr_arrayidx = llvm::GetElementPtrInst::Create(globalDelta, ptr_arrayidx_indices, "arrayidx", ctx->GetCurrentBasicBlock());
|
|
||||||
llvm::LoadInst* int8_39 = new llvm::LoadInst(ptr_arrayidx, "", false, ctx->GetCurrentBasicBlock());
|
|
||||||
llvm::Value * int32_39 = ctx->ZExtInst(int8_39, LLVMTypes::Int32Type);
|
|
||||||
|
|
||||||
llvm::VectorType* VectorTy_2 = llvm::VectorType::get(llvm::IntegerType::get(*g->ctx, 32), 1);
|
|
||||||
llvm::UndefValue* const_packed_41 = llvm::UndefValue::get(VectorTy_2);
|
|
||||||
|
|
||||||
llvm::InsertElementInst* packed_43 = llvm::InsertElementInst::Create(
|
|
||||||
// llvm::UndefValue(LLVMInt32Vector),
|
|
||||||
const_packed_41,
|
|
||||||
int32_39, LLVMInt32(0), "", ctx->GetCurrentBasicBlock());
|
|
||||||
#endif
|
|
||||||
|
|
||||||
|
|
||||||
// Add the deltas to compute the varying counter values; store the
|
|
||||||
// result to memory and then return it directly as well.
|
|
||||||
#if 0
|
|
||||||
llvm::Value *varyingCounter =
|
|
||||||
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
|
|
||||||
LLVMInt32Vector(delta), "iter_val");
|
|
||||||
#else
|
|
||||||
llvm::Value *varyingCounter =
|
|
||||||
ctx->BinaryOperator(llvm::Instruction::Add, smearCounter,
|
|
||||||
packed_43, "iter_val");
|
|
||||||
#endif
|
|
||||||
ctx->StoreInst(varyingCounter, varyingCounterPtr);
|
|
||||||
return varyingCounter;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -1650,7 +1671,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
|
|
||||||
// This should be caught during typechecking
|
// This should be caught during typechecking
|
||||||
AssertPos(pos, startExprs.size() == dimVariables.size() &&
|
AssertPos(pos, startExprs.size() == dimVariables.size() &&
|
||||||
endExprs.size() == dimVariables.size());
|
endExprs.size() == dimVariables.size());
|
||||||
int nDims = (int)dimVariables.size();
|
int nDims = (int)dimVariables.size();
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////
|
||||||
@@ -1661,66 +1682,70 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
|
std::vector<llvm::Value *> nExtras, alignedEnd, extrasMaskPtrs;
|
||||||
|
|
||||||
std::vector<int> span(nDims, 0);
|
std::vector<int> span(nDims, 0);
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
const int vectorWidth =
|
const int vectorWidth =
|
||||||
g->target->getISA() == Target::NVPTX ? 32 : g->target->getVectorWidth();
|
g->target->getISA() == Target::NVPTX ? 32 : g->target->getVectorWidth();
|
||||||
lGetSpans(nDims-1, nDims, vectorWidth, isTiled, &span[0]);
|
lGetSpans(nDims-1, nDims, vectorWidth, isTiled, &span[0]);
|
||||||
|
#else /* ISPC_NVPTX_ENABLED */
|
||||||
|
lGetSpans(nDims-1, nDims, g->target->getVectorWidth(), isTiled, &span[0]);
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
for (int i = 0; i < nDims; ++i) {
|
for (int i = 0; i < nDims; ++i) {
|
||||||
// Basic blocks that we'll fill in later with the looping logic for
|
// Basic blocks that we'll fill in later with the looping logic for
|
||||||
// this dimension.
|
// this dimension.
|
||||||
bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
|
bbReset.push_back(ctx->CreateBasicBlock("foreach_reset"));
|
||||||
if (i < nDims-1)
|
if (i < nDims-1)
|
||||||
// stepping for the innermost dimension is handled specially
|
// stepping for the innermost dimension is handled specially
|
||||||
bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
|
bbStep.push_back(ctx->CreateBasicBlock("foreach_step"));
|
||||||
bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
|
bbTest.push_back(ctx->CreateBasicBlock("foreach_test"));
|
||||||
|
|
||||||
// Start and end value for this loop dimension
|
// Start and end value for this loop dimension
|
||||||
llvm::Value *sv = startExprs[i]->GetValue(ctx);
|
llvm::Value *sv = startExprs[i]->GetValue(ctx);
|
||||||
llvm::Value *ev = endExprs[i]->GetValue(ctx);
|
llvm::Value *ev = endExprs[i]->GetValue(ctx);
|
||||||
if (sv == NULL || ev == NULL)
|
if (sv == NULL || ev == NULL)
|
||||||
return;
|
return;
|
||||||
startVals.push_back(sv);
|
startVals.push_back(sv);
|
||||||
endVals.push_back(ev);
|
endVals.push_back(ev);
|
||||||
|
|
||||||
// nItems = endVal - startVal
|
// nItems = endVal - startVal
|
||||||
llvm::Value *nItems =
|
llvm::Value *nItems =
|
||||||
ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
|
ctx->BinaryOperator(llvm::Instruction::Sub, ev, sv, "nitems");
|
||||||
|
|
||||||
// nExtras = nItems % (span for this dimension)
|
// nExtras = nItems % (span for this dimension)
|
||||||
// This gives us the number of extra elements we need to deal with
|
// This gives us the number of extra elements we need to deal with
|
||||||
// at the end of the loop for this dimension that don't fit cleanly
|
// at the end of the loop for this dimension that don't fit cleanly
|
||||||
// into a vector width.
|
// into a vector width.
|
||||||
nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems,
|
nExtras.push_back(ctx->BinaryOperator(llvm::Instruction::SRem, nItems,
|
||||||
LLVMInt32(span[i]), "nextras"));
|
LLVMInt32(span[i]), "nextras"));
|
||||||
|
|
||||||
// alignedEnd = endVal - nExtras
|
// alignedEnd = endVal - nExtras
|
||||||
alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
|
alignedEnd.push_back(ctx->BinaryOperator(llvm::Instruction::Sub, ev,
|
||||||
nExtras[i], "aligned_end"));
|
nExtras[i], "aligned_end"));
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////
|
||||||
// Each dimension has a loop counter that is a uniform value that
|
// Each dimension has a loop counter that is a uniform value that
|
||||||
// goes from startVal to endVal, in steps of the span for this
|
// goes from startVal to endVal, in steps of the span for this
|
||||||
// dimension. Its value is only used internally here for looping
|
// dimension. Its value is only used internally here for looping
|
||||||
// logic and isn't directly available in the user's program code.
|
// logic and isn't directly available in the user's program code.
|
||||||
uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
|
uniformCounterPtrs.push_back(ctx->AllocaInst(LLVMTypes::Int32Type,
|
||||||
"counter"));
|
"counter"));
|
||||||
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
||||||
|
|
||||||
// There is also a varying variable that holds the set of index
|
// There is also a varying variable that holds the set of index
|
||||||
// values for each dimension in the current loop iteration; this is
|
// values for each dimension in the current loop iteration; this is
|
||||||
// the value that is program-visible.
|
// the value that is program-visible.
|
||||||
dimVariables[i]->storagePtr =
|
dimVariables[i]->storagePtr =
|
||||||
ctx->AllocaInst(LLVMTypes::Int32VectorType,
|
ctx->AllocaInst(LLVMTypes::Int32VectorType,
|
||||||
dimVariables[i]->name.c_str());
|
dimVariables[i]->name.c_str());
|
||||||
dimVariables[i]->parentFunction = ctx->GetFunction();
|
dimVariables[i]->parentFunction = ctx->GetFunction();
|
||||||
ctx->EmitVariableDebugInfo(dimVariables[i]);
|
ctx->EmitVariableDebugInfo(dimVariables[i]);
|
||||||
|
|
||||||
// Each dimension also maintains a mask that represents which of
|
// Each dimension also maintains a mask that represents which of
|
||||||
// the varying elements in the current iteration should be
|
// the varying elements in the current iteration should be
|
||||||
// processed. (i.e. this is used to disable the lanes that have
|
// processed. (i.e. this is used to disable the lanes that have
|
||||||
// out-of-bounds offsets.)
|
// out-of-bounds offsets.)
|
||||||
extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
|
extrasMaskPtrs.push_back(ctx->AllocaInst(LLVMTypes::MaskType, "extras mask"));
|
||||||
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx->StartForeach(FunctionEmitContext::FOREACH_REGULAR);
|
ctx->StartForeach(FunctionEmitContext::FOREACH_REGULAR);
|
||||||
@@ -1733,14 +1758,14 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// a given dimension in preparation for running through its loop again,
|
// a given dimension in preparation for running through its loop again,
|
||||||
// after the enclosing level advances its counter.
|
// after the enclosing level advances its counter.
|
||||||
for (int i = 0; i < nDims; ++i) {
|
for (int i = 0; i < nDims; ++i) {
|
||||||
ctx->SetCurrentBasicBlock(bbReset[i]);
|
ctx->SetCurrentBasicBlock(bbReset[i]);
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
ctx->BranchInst(bbExit);
|
ctx->BranchInst(bbExit);
|
||||||
else {
|
else {
|
||||||
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
ctx->StoreInst(LLVMMaskAllOn, extrasMaskPtrs[i]);
|
||||||
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
ctx->StoreInst(startVals[i], uniformCounterPtrs[i]);
|
||||||
ctx->BranchInst(bbStep[i-1]);
|
ctx->BranchInst(bbStep[i-1]);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
@@ -1750,67 +1775,67 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// this for the innermost dimension, which has a more complex stepping
|
// this for the innermost dimension, which has a more complex stepping
|
||||||
// structure..
|
// structure..
|
||||||
for (int i = 0; i < nDims-1; ++i) {
|
for (int i = 0; i < nDims-1; ++i) {
|
||||||
ctx->SetCurrentBasicBlock(bbStep[i]);
|
ctx->SetCurrentBasicBlock(bbStep[i]);
|
||||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i]);
|
||||||
llvm::Value *newCounter =
|
llvm::Value *newCounter =
|
||||||
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||||
LLVMInt32(span[i]), "new_counter");
|
LLVMInt32(span[i]), "new_counter");
|
||||||
ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
|
ctx->StoreInst(newCounter, uniformCounterPtrs[i]);
|
||||||
ctx->BranchInst(bbTest[i]);
|
ctx->BranchInst(bbTest[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// foreach_test (for all dimensions other than the innermost...)
|
// foreach_test (for all dimensions other than the innermost...)
|
||||||
std::vector<llvm::Value *> inExtras;
|
std::vector<llvm::Value *> inExtras;
|
||||||
for (int i = 0; i < nDims-1; ++i) {
|
for (int i = 0; i < nDims-1; ++i) {
|
||||||
ctx->SetCurrentBasicBlock(bbTest[i]);
|
ctx->SetCurrentBasicBlock(bbTest[i]);
|
||||||
|
|
||||||
llvm::Value *haveExtras =
|
llvm::Value *haveExtras =
|
||||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SGT,
|
||||||
endVals[i], alignedEnd[i], "have_extras");
|
endVals[i], alignedEnd[i], "have_extras");
|
||||||
|
|
||||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[i], "counter");
|
||||||
llvm::Value *atAlignedEnd =
|
llvm::Value *atAlignedEnd =
|
||||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_EQ,
|
||||||
counter, alignedEnd[i], "at_aligned_end");
|
counter, alignedEnd[i], "at_aligned_end");
|
||||||
llvm::Value *inEx =
|
llvm::Value *inEx =
|
||||||
ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
|
ctx->BinaryOperator(llvm::Instruction::And, haveExtras,
|
||||||
atAlignedEnd, "in_extras");
|
atAlignedEnd, "in_extras");
|
||||||
|
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
inExtras.push_back(inEx);
|
inExtras.push_back(inEx);
|
||||||
else
|
else
|
||||||
inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
|
inExtras.push_back(ctx->BinaryOperator(llvm::Instruction::Or, inEx,
|
||||||
inExtras[i-1], "in_extras_all"));
|
inExtras[i-1], "in_extras_all"));
|
||||||
|
|
||||||
llvm::Value *varyingCounter =
|
llvm::Value *varyingCounter =
|
||||||
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
|
lUpdateVaryingCounter(i, nDims, ctx, uniformCounterPtrs[i],
|
||||||
dimVariables[i]->storagePtr, span);
|
dimVariables[i]->storagePtr, span);
|
||||||
|
|
||||||
llvm::Value *smearEnd = ctx->BroadcastValue(
|
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||||
endVals[i], LLVMTypes::Int32VectorType, "smear_end");
|
endVals[i], LLVMTypes::Int32VectorType, "smear_end");
|
||||||
|
|
||||||
// Do a vector compare of its value to the end value to generate a
|
// Do a vector compare of its value to the end value to generate a
|
||||||
// mask for this last bit of work.
|
// mask for this last bit of work.
|
||||||
llvm::Value *emask =
|
llvm::Value *emask =
|
||||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||||
varyingCounter, smearEnd);
|
varyingCounter, smearEnd);
|
||||||
emask = ctx->I1VecToBoolVec(emask);
|
emask = ctx->I1VecToBoolVec(emask);
|
||||||
|
|
||||||
if (i == 0)
|
if (i == 0)
|
||||||
ctx->StoreInst(emask, extrasMaskPtrs[i]);
|
ctx->StoreInst(emask, extrasMaskPtrs[i]);
|
||||||
else {
|
else {
|
||||||
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
|
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[i-1]);
|
||||||
llvm::Value *newMask =
|
llvm::Value *newMask =
|
||||||
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
|
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
|
||||||
"extras_mask");
|
"extras_mask");
|
||||||
ctx->StoreInst(newMask, extrasMaskPtrs[i]);
|
ctx->StoreInst(newMask, extrasMaskPtrs[i]);
|
||||||
}
|
}
|
||||||
|
|
||||||
llvm::Value *notAtEnd =
|
llvm::Value *notAtEnd =
|
||||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||||
counter, endVals[i]);
|
counter, endVals[i]);
|
||||||
ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
|
ctx->BranchInst(bbTest[i+1], bbReset[i], notAtEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
@@ -1847,18 +1872,18 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// (i.e. processing extra elements that don't exactly fit into a
|
// (i.e. processing extra elements that don't exactly fit into a
|
||||||
// vector).
|
// vector).
|
||||||
llvm::BasicBlock *bbOuterInExtras =
|
llvm::BasicBlock *bbOuterInExtras =
|
||||||
ctx->CreateBasicBlock("outer_in_extras");
|
ctx->CreateBasicBlock("outer_in_extras");
|
||||||
llvm::BasicBlock *bbOuterNotInExtras =
|
llvm::BasicBlock *bbOuterNotInExtras =
|
||||||
ctx->CreateBasicBlock("outer_not_in_extras");
|
ctx->CreateBasicBlock("outer_not_in_extras");
|
||||||
|
|
||||||
ctx->SetCurrentBasicBlock(bbTest[nDims-1]);
|
ctx->SetCurrentBasicBlock(bbTest[nDims-1]);
|
||||||
if (inExtras.size())
|
if (inExtras.size())
|
||||||
ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras,
|
ctx->BranchInst(bbOuterInExtras, bbOuterNotInExtras,
|
||||||
inExtras.back());
|
inExtras.back());
|
||||||
else
|
else
|
||||||
// for a 1D iteration domain, we certainly don't have any enclosing
|
// for a 1D iteration domain, we certainly don't have any enclosing
|
||||||
// dimensions that are processing extra elements.
|
// dimensions that are processing extra elements.
|
||||||
ctx->BranchInst(bbOuterNotInExtras);
|
ctx->BranchInst(bbOuterNotInExtras);
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// One or more outer dimensions in extras, so we need to mask for the loop
|
// One or more outer dimensions in extras, so we need to mask for the loop
|
||||||
@@ -1873,21 +1898,21 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// // run loop body with mask
|
// // run loop body with mask
|
||||||
// }
|
// }
|
||||||
llvm::BasicBlock *bbAllInnerPartialOuter =
|
llvm::BasicBlock *bbAllInnerPartialOuter =
|
||||||
ctx->CreateBasicBlock("all_inner_partial_outer");
|
ctx->CreateBasicBlock("all_inner_partial_outer");
|
||||||
llvm::BasicBlock *bbPartial =
|
llvm::BasicBlock *bbPartial =
|
||||||
ctx->CreateBasicBlock("both_partial");
|
ctx->CreateBasicBlock("both_partial");
|
||||||
ctx->SetCurrentBasicBlock(bbOuterInExtras); {
|
ctx->SetCurrentBasicBlock(bbOuterInExtras); {
|
||||||
// Update the varying counter value here, since all subsequent
|
// Update the varying counter value here, since all subsequent
|
||||||
// blocks along this path need it.
|
// blocks along this path need it.
|
||||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||||
dimVariables[nDims-1]->storagePtr, span);
|
dimVariables[nDims-1]->storagePtr, span);
|
||||||
|
|
||||||
// here we just check to see if counter < alignedEnd
|
// here we just check to see if counter < alignedEnd
|
||||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
||||||
llvm::Value *beforeAlignedEnd =
|
llvm::Value *beforeAlignedEnd =
|
||||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||||
counter, alignedEnd[nDims-1], "before_aligned_end");
|
counter, alignedEnd[nDims-1], "before_aligned_end");
|
||||||
ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
|
ctx->BranchInst(bbAllInnerPartialOuter, bbPartial, beforeAlignedEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Below we have a basic block that runs the loop body code for the
|
// Below we have a basic block that runs the loop body code for the
|
||||||
@@ -1906,53 +1931,53 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// should step the loop counter for the next enclosing dimension
|
// should step the loop counter for the next enclosing dimension
|
||||||
// instead.
|
// instead.
|
||||||
llvm::Value *stepIndexAfterMaskedBodyPtr =
|
llvm::Value *stepIndexAfterMaskedBodyPtr =
|
||||||
ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
|
ctx->AllocaInst(LLVMTypes::BoolType, "step_index");
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// We're in the inner loop part where the only masking is due to outer
|
// We're in the inner loop part where the only masking is due to outer
|
||||||
// dimensions but the innermost dimension fits fully into a vector's
|
// dimensions but the innermost dimension fits fully into a vector's
|
||||||
// width. Set the mask and jump to the masked loop body.
|
// width. Set the mask and jump to the masked loop body.
|
||||||
ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); {
|
ctx->SetCurrentBasicBlock(bbAllInnerPartialOuter); {
|
||||||
llvm::Value *mask;
|
llvm::Value *mask;
|
||||||
if (nDims == 1)
|
if (nDims == 1)
|
||||||
// 1D loop; we shouldn't ever get here anyway
|
// 1D loop; we shouldn't ever get here anyway
|
||||||
mask = LLVMMaskAllOff;
|
mask = LLVMMaskAllOff;
|
||||||
else
|
else
|
||||||
mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
|
mask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
|
||||||
|
|
||||||
ctx->SetInternalMask(mask);
|
ctx->SetInternalMask(mask);
|
||||||
|
|
||||||
ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
|
ctx->StoreInst(LLVMTrue, stepIndexAfterMaskedBodyPtr);
|
||||||
ctx->BranchInst(bbMaskedBody);
|
ctx->BranchInst(bbMaskedBody);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// We need to include the effect of the innermost dimension in the mask
|
// We need to include the effect of the innermost dimension in the mask
|
||||||
// for the final bits here
|
// for the final bits here
|
||||||
ctx->SetCurrentBasicBlock(bbPartial); {
|
ctx->SetCurrentBasicBlock(bbPartial); {
|
||||||
llvm::Value *varyingCounter =
|
llvm::Value *varyingCounter =
|
||||||
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
|
ctx->LoadInst(dimVariables[nDims-1]->storagePtr);
|
||||||
llvm::Value *smearEnd = ctx->BroadcastValue(
|
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||||
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
||||||
|
|
||||||
llvm::Value *emask =
|
llvm::Value *emask =
|
||||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||||
varyingCounter, smearEnd);
|
varyingCounter, smearEnd);
|
||||||
emask = ctx->I1VecToBoolVec(emask);
|
emask = ctx->I1VecToBoolVec(emask);
|
||||||
|
|
||||||
if (nDims == 1) {
|
if (nDims == 1) {
|
||||||
ctx->SetInternalMask(emask);
|
ctx->SetInternalMask(emask);
|
||||||
}
|
}
|
||||||
else {
|
else {
|
||||||
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
|
llvm::Value *oldMask = ctx->LoadInst(extrasMaskPtrs[nDims-2]);
|
||||||
llvm::Value *newMask =
|
llvm::Value *newMask =
|
||||||
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
|
ctx->BinaryOperator(llvm::Instruction::And, oldMask, emask,
|
||||||
"extras_mask");
|
"extras_mask");
|
||||||
ctx->SetInternalMask(newMask);
|
ctx->SetInternalMask(newMask);
|
||||||
}
|
}
|
||||||
|
|
||||||
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
|
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
|
||||||
ctx->BranchInst(bbMaskedBody);
|
ctx->BranchInst(bbMaskedBody);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
@@ -1968,14 +1993,14 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// // run loop body with mask
|
// // run loop body with mask
|
||||||
// }
|
// }
|
||||||
llvm::BasicBlock *bbPartialInnerAllOuter =
|
llvm::BasicBlock *bbPartialInnerAllOuter =
|
||||||
ctx->CreateBasicBlock("partial_inner_all_outer");
|
ctx->CreateBasicBlock("partial_inner_all_outer");
|
||||||
ctx->SetCurrentBasicBlock(bbOuterNotInExtras); {
|
ctx->SetCurrentBasicBlock(bbOuterNotInExtras); {
|
||||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
||||||
llvm::Value *beforeAlignedEnd =
|
llvm::Value *beforeAlignedEnd =
|
||||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||||
counter, alignedEnd[nDims-1], "before_aligned_end");
|
counter, alignedEnd[nDims-1], "before_aligned_end");
|
||||||
ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter,
|
ctx->BranchInst(bbFullBody, bbPartialInnerAllOuter,
|
||||||
beforeAlignedEnd);
|
beforeAlignedEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
@@ -1985,26 +2010,26 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// value of the varying loop counter and have the statements in the
|
// value of the varying loop counter and have the statements in the
|
||||||
// loop body emit their code.
|
// loop body emit their code.
|
||||||
llvm::BasicBlock *bbFullBodyContinue =
|
llvm::BasicBlock *bbFullBodyContinue =
|
||||||
ctx->CreateBasicBlock("foreach_full_continue");
|
ctx->CreateBasicBlock("foreach_full_continue");
|
||||||
ctx->SetCurrentBasicBlock(bbFullBody); {
|
ctx->SetCurrentBasicBlock(bbFullBody); {
|
||||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||||
ctx->SetBlockEntryMask(LLVMMaskAllOn);
|
ctx->SetBlockEntryMask(LLVMMaskAllOn);
|
||||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||||
dimVariables[nDims-1]->storagePtr, span);
|
dimVariables[nDims-1]->storagePtr, span);
|
||||||
ctx->SetContinueTarget(bbFullBodyContinue);
|
ctx->SetContinueTarget(bbFullBodyContinue);
|
||||||
ctx->AddInstrumentationPoint("foreach loop body (all on)");
|
ctx->AddInstrumentationPoint("foreach loop body (all on)");
|
||||||
stmts->EmitCode(ctx);
|
stmts->EmitCode(ctx);
|
||||||
AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
|
AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
|
||||||
ctx->BranchInst(bbFullBodyContinue);
|
ctx->BranchInst(bbFullBodyContinue);
|
||||||
}
|
}
|
||||||
ctx->SetCurrentBasicBlock(bbFullBodyContinue); {
|
ctx->SetCurrentBasicBlock(bbFullBodyContinue); {
|
||||||
ctx->RestoreContinuedLanes();
|
ctx->RestoreContinuedLanes();
|
||||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
|
||||||
llvm::Value *newCounter =
|
llvm::Value *newCounter =
|
||||||
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||||
LLVMInt32(span[nDims-1]), "new_counter");
|
LLVMInt32(span[nDims-1]), "new_counter");
|
||||||
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
|
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
|
||||||
ctx->BranchInst(bbOuterNotInExtras);
|
ctx->BranchInst(bbOuterNotInExtras);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
@@ -2012,33 +2037,33 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// less than the end value, in which case we need to run the body one
|
// less than the end value, in which case we need to run the body one
|
||||||
// more time to get the extra bits.
|
// more time to get the extra bits.
|
||||||
llvm::BasicBlock *bbSetInnerMask =
|
llvm::BasicBlock *bbSetInnerMask =
|
||||||
ctx->CreateBasicBlock("partial_inner_only");
|
ctx->CreateBasicBlock("partial_inner_only");
|
||||||
ctx->SetCurrentBasicBlock(bbPartialInnerAllOuter); {
|
ctx->SetCurrentBasicBlock(bbPartialInnerAllOuter); {
|
||||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1], "counter");
|
||||||
llvm::Value *beforeFullEnd =
|
llvm::Value *beforeFullEnd =
|
||||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||||
counter, endVals[nDims-1], "before_full_end");
|
counter, endVals[nDims-1], "before_full_end");
|
||||||
ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd);
|
ctx->BranchInst(bbSetInnerMask, bbReset[nDims-1], beforeFullEnd);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// The outer dimensions are all on, so the mask is just given by the
|
// The outer dimensions are all on, so the mask is just given by the
|
||||||
// mask for the innermost dimension
|
// mask for the innermost dimension
|
||||||
ctx->SetCurrentBasicBlock(bbSetInnerMask); {
|
ctx->SetCurrentBasicBlock(bbSetInnerMask); {
|
||||||
llvm::Value *varyingCounter =
|
llvm::Value *varyingCounter =
|
||||||
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
lUpdateVaryingCounter(nDims-1, nDims, ctx, uniformCounterPtrs[nDims-1],
|
||||||
dimVariables[nDims-1]->storagePtr, span);
|
dimVariables[nDims-1]->storagePtr, span);
|
||||||
llvm::Value *smearEnd = ctx->BroadcastValue(
|
llvm::Value *smearEnd = ctx->BroadcastValue(
|
||||||
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
endVals[nDims-1], LLVMTypes::Int32VectorType, "smear_end");
|
||||||
llvm::Value *emask =
|
llvm::Value *emask =
|
||||||
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
ctx->CmpInst(llvm::Instruction::ICmp, llvm::CmpInst::ICMP_SLT,
|
||||||
varyingCounter, smearEnd);
|
varyingCounter, smearEnd);
|
||||||
emask = ctx->I1VecToBoolVec(emask);
|
emask = ctx->I1VecToBoolVec(emask);
|
||||||
ctx->SetInternalMask(emask);
|
ctx->SetInternalMask(emask);
|
||||||
ctx->SetBlockEntryMask(emask);
|
ctx->SetBlockEntryMask(emask);
|
||||||
|
|
||||||
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
|
ctx->StoreInst(LLVMFalse, stepIndexAfterMaskedBodyPtr);
|
||||||
ctx->BranchInst(bbMaskedBody);
|
ctx->BranchInst(bbMaskedBody);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
@@ -2048,34 +2073,34 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// mask known to be all-on, which in turn leads to more efficient code
|
// mask known to be all-on, which in turn leads to more efficient code
|
||||||
// for that case.
|
// for that case.
|
||||||
llvm::BasicBlock *bbStepInnerIndex =
|
llvm::BasicBlock *bbStepInnerIndex =
|
||||||
ctx->CreateBasicBlock("step_inner_index");
|
ctx->CreateBasicBlock("step_inner_index");
|
||||||
llvm::BasicBlock *bbMaskedBodyContinue =
|
llvm::BasicBlock *bbMaskedBodyContinue =
|
||||||
ctx->CreateBasicBlock("foreach_masked_continue");
|
ctx->CreateBasicBlock("foreach_masked_continue");
|
||||||
ctx->SetCurrentBasicBlock(bbMaskedBody); {
|
ctx->SetCurrentBasicBlock(bbMaskedBody); {
|
||||||
ctx->AddInstrumentationPoint("foreach loop body (masked)");
|
ctx->AddInstrumentationPoint("foreach loop body (masked)");
|
||||||
ctx->SetContinueTarget(bbMaskedBodyContinue);
|
ctx->SetContinueTarget(bbMaskedBodyContinue);
|
||||||
ctx->DisableGatherScatterWarnings();
|
ctx->DisableGatherScatterWarnings();
|
||||||
ctx->SetBlockEntryMask(ctx->GetFullMask());
|
ctx->SetBlockEntryMask(ctx->GetFullMask());
|
||||||
stmts->EmitCode(ctx);
|
stmts->EmitCode(ctx);
|
||||||
ctx->EnableGatherScatterWarnings();
|
ctx->EnableGatherScatterWarnings();
|
||||||
ctx->BranchInst(bbMaskedBodyContinue);
|
ctx->BranchInst(bbMaskedBodyContinue);
|
||||||
}
|
}
|
||||||
ctx->SetCurrentBasicBlock(bbMaskedBodyContinue); {
|
ctx->SetCurrentBasicBlock(bbMaskedBodyContinue); {
|
||||||
ctx->RestoreContinuedLanes();
|
ctx->RestoreContinuedLanes();
|
||||||
llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
|
llvm::Value *stepIndex = ctx->LoadInst(stepIndexAfterMaskedBodyPtr);
|
||||||
ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex);
|
ctx->BranchInst(bbStepInnerIndex, bbReset[nDims-1], stepIndex);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// step the innermost index, for the case where we're doing the
|
// step the innermost index, for the case where we're doing the
|
||||||
// innermost for loop over full vectors.
|
// innermost for loop over full vectors.
|
||||||
ctx->SetCurrentBasicBlock(bbStepInnerIndex); {
|
ctx->SetCurrentBasicBlock(bbStepInnerIndex); {
|
||||||
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
|
llvm::Value *counter = ctx->LoadInst(uniformCounterPtrs[nDims-1]);
|
||||||
llvm::Value *newCounter =
|
llvm::Value *newCounter =
|
||||||
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
ctx->BinaryOperator(llvm::Instruction::Add, counter,
|
||||||
LLVMInt32(span[nDims-1]), "new_counter");
|
LLVMInt32(span[nDims-1]), "new_counter");
|
||||||
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
|
ctx->StoreInst(newCounter, uniformCounterPtrs[nDims-1]);
|
||||||
ctx->BranchInst(bbOuterInExtras);
|
ctx->BranchInst(bbOuterInExtras);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
@@ -2262,8 +2287,12 @@ ForeachActiveStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// math...)
|
// math...)
|
||||||
|
|
||||||
// Get the "program index" vector value
|
// Get the "program index" vector value
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
llvm::Value *programIndex = g->target->getISA() == Target::NVPTX ?
|
llvm::Value *programIndex = g->target->getISA() == Target::NVPTX ?
|
||||||
ctx->ProgramIndexVectorPTX() : ctx->ProgramIndexVector();
|
ctx->ProgramIndexVectorPTX() : ctx->ProgramIndexVector();
|
||||||
|
#else /* ISPC_NVPTX_ENABLED */
|
||||||
|
llvm::Value *programIndex = ctx->ProgramIndexVector();
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
|
||||||
// And smear the current lane out to a vector
|
// And smear the current lane out to a vector
|
||||||
llvm::Value *firstSet32 =
|
llvm::Value *firstSet32 =
|
||||||
@@ -2460,19 +2489,22 @@ ForeachUniqueStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
// And load the corresponding element value from the temporary
|
// And load the corresponding element value from the temporary
|
||||||
// memory storing the value of the varying expr.
|
// memory storing the value of the varying expr.
|
||||||
llvm::Value *uniqueValue;
|
llvm::Value *uniqueValue;
|
||||||
if (g->target->getISA() != Target::NVPTX)
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
{
|
if (g->target->getISA() == Target::NVPTX)
|
||||||
llvm::Value *uniqueValuePtr =
|
|
||||||
ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType,
|
|
||||||
"unique_index_ptr");
|
|
||||||
uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
|
|
||||||
}
|
|
||||||
else /* in case of PTX target, use __shfl PTX intrinsics via __insert/__extract function */
|
|
||||||
{
|
{
|
||||||
llvm::Value *firstSet32 = ctx->TruncInst(firstSet, LLVMTypes::Int32Type);
|
llvm::Value *firstSet32 = ctx->TruncInst(firstSet, LLVMTypes::Int32Type);
|
||||||
uniqueValue = ctx->Extract(exprValue, firstSet32);
|
uniqueValue = ctx->Extract(exprValue, firstSet32);
|
||||||
}
|
}
|
||||||
|
else
|
||||||
|
{
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
|
llvm::Value *uniqueValuePtr =
|
||||||
|
ctx->GetElementPtrInst(exprMem, LLVMInt64(0), firstSet, exprPtrType,
|
||||||
|
"unique_index_ptr");
|
||||||
|
uniqueValue = ctx->LoadInst(uniqueValuePtr, "unique_value");
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
|
}
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
// If it's a varying pointer type, need to convert from the int
|
// If it's a varying pointer type, need to convert from the int
|
||||||
// type we store in the vector to the actual pointer type
|
// type we store in the vector to the actual pointer type
|
||||||
if (llvm::dyn_cast<llvm::PointerType>(symType) != NULL)
|
if (llvm::dyn_cast<llvm::PointerType>(symType) != NULL)
|
||||||
@@ -3379,8 +3411,12 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
|
|||||||
}
|
}
|
||||||
|
|
||||||
// Now we can emit code to call __do_print()
|
// Now we can emit code to call __do_print()
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
llvm::Function *printFunc = g->target->getISA() != Target::NVPTX ?
|
llvm::Function *printFunc = g->target->getISA() != Target::NVPTX ?
|
||||||
m->module->getFunction("__do_print") : m->module->getFunction("__do_print_nvptx");
|
m->module->getFunction("__do_print") : m->module->getFunction("__do_print_nvptx");
|
||||||
|
#else /* ISPC_NVPTX_ENABLED */
|
||||||
|
llvm::Function *printFunc = m->module->getFunction("__do_print");
|
||||||
|
#endif /* ISPC_NVPTX_ENABLED */
|
||||||
AssertPos(pos, printFunc);
|
AssertPos(pos, printFunc);
|
||||||
|
|
||||||
llvm::Value *mask = ctx->GetFullMask();
|
llvm::Value *mask = ctx->GetFullMask();
|
||||||
|
|||||||
14
type.cpp
14
type.cpp
@@ -751,7 +751,7 @@ EnumType::Mangle() const {
|
|||||||
std::string ret;
|
std::string ret;
|
||||||
if (isConst) ret += "C";
|
if (isConst) ret += "C";
|
||||||
ret += variability.MangleString();
|
ret += variability.MangleString();
|
||||||
ret += std::string("enum_5B_") + name + std::string("_5C_");
|
ret += std::string("enum[") + name + std::string("]");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -1433,7 +1433,7 @@ ArrayType::Mangle() const {
|
|||||||
sprintf(buf, "%d", numElements);
|
sprintf(buf, "%d", numElements);
|
||||||
else
|
else
|
||||||
buf[0] = '\0';
|
buf[0] = '\0';
|
||||||
return s + "_5B_" + buf + "_5C_";
|
return s + "[" + buf + "]";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -2106,12 +2106,12 @@ lMangleStruct(Variability variability, bool isConst, const std::string &name) {
|
|||||||
Assert(variability != Variability::Unbound);
|
Assert(variability != Variability::Unbound);
|
||||||
|
|
||||||
std::string ret;
|
std::string ret;
|
||||||
ret += "s_5B_";
|
ret += "s[";
|
||||||
if (isConst)
|
if (isConst)
|
||||||
ret += "_c_";
|
ret += "_c_";
|
||||||
ret += variability.MangleString();
|
ret += variability.MangleString();
|
||||||
|
|
||||||
ret += name + std::string("_5C_");
|
ret += name + std::string("]");
|
||||||
return ret;
|
return ret;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -3057,7 +3057,11 @@ FunctionType::LLVMFunctionType(llvm::LLVMContext *ctx, bool removeMask) const {
|
|||||||
llvmArgTypes.push_back(LLVMTypes::MaskType);
|
llvmArgTypes.push_back(LLVMTypes::MaskType);
|
||||||
|
|
||||||
std::vector<llvm::Type *> callTypes;
|
std::vector<llvm::Type *> callTypes;
|
||||||
if (isTask && g->target->getISA() != Target::NVPTX) {
|
if (isTask
|
||||||
|
#ifdef ISPC_NVPTX_ENABLED
|
||||||
|
&& (g->target->getISA() != Target::NVPTX)
|
||||||
|
#endif
|
||||||
|
){
|
||||||
// Tasks take three arguments: a pointer to a struct that holds the
|
// Tasks take three arguments: a pointer to a struct that holds the
|
||||||
// actual task arguments, the thread index, and the total number of
|
// actual task arguments, the thread index, and the total number of
|
||||||
// threads the tasks system has running. (Task arguments are
|
// threads the tasks system has running. (Task arguments are
|
||||||
|
|||||||
Reference in New Issue
Block a user