Merge branch 'master' into arm
Conflicts: Makefile builtins.cpp ispc.cpp ispc.h ispc.vcxproj opt.cpp
This commit is contained in:
27
Makefile
27
Makefile
@@ -39,6 +39,10 @@
|
|||||||
LLVM_CONFIG=$(shell which llvm-config)
|
LLVM_CONFIG=$(shell which llvm-config)
|
||||||
CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir)
|
CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir)
|
||||||
|
|
||||||
|
# Enable ARM by request
|
||||||
|
# To enable: make ARM_ENABLED=1
|
||||||
|
ARM_ENABLED=0
|
||||||
|
|
||||||
# Add llvm bin to the path so any scripts run will go to the right llvm-config
|
# Add llvm bin to the path so any scripts run will go to the right llvm-config
|
||||||
LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir)
|
LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir)
|
||||||
export PATH:=$(LLVM_BIN):$(PATH)
|
export PATH:=$(LLVM_BIN):$(PATH)
|
||||||
@@ -55,12 +59,15 @@ LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
|
|||||||
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//)
|
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//)
|
||||||
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
||||||
|
|
||||||
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker arm
|
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker
|
||||||
# Component "option" was introduced in 3.3 and starting with 3.4 it is required for the link step.
|
# Component "option" was introduced in 3.3 and starting with 3.4 it is required for the link step.
|
||||||
# We check if it's available before adding it (to not break 3.2 and earlier).
|
# We check if it's available before adding it (to not break 3.2 and earlier).
|
||||||
ifeq ($(shell $(LLVM_CONFIG) --components |grep -c option), 1)
|
ifeq ($(shell $(LLVM_CONFIG) --components |grep -c option), 1)
|
||||||
LLVM_COMPONENTS+=option
|
LLVM_COMPONENTS+=option
|
||||||
endif
|
endif
|
||||||
|
ifneq ($(ARM_ENABLED), 0)
|
||||||
|
LLVM_COMPONENTS+=arm
|
||||||
|
endif
|
||||||
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs $(LLVM_COMPONENTS))
|
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs $(LLVM_COMPONENTS))
|
||||||
|
|
||||||
CLANG=clang
|
CLANG=clang
|
||||||
@@ -104,6 +111,9 @@ OPT=-O2
|
|||||||
CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -I$(CLANG_INCLUDE) \
|
CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -I$(CLANG_INCLUDE) \
|
||||||
-Wall $(LLVM_VERSION_DEF) \
|
-Wall $(LLVM_VERSION_DEF) \
|
||||||
-DBUILD_DATE="\"$(BUILD_DATE)\"" -DBUILD_VERSION="\"$(BUILD_VERSION)\""
|
-DBUILD_DATE="\"$(BUILD_DATE)\"" -DBUILD_VERSION="\"$(BUILD_VERSION)\""
|
||||||
|
ifneq ($(ARM_ENABLED), 0)
|
||||||
|
CXXFLAGS+=-DISPC_ARM_ENABLED
|
||||||
|
endif
|
||||||
|
|
||||||
LDFLAGS=
|
LDFLAGS=
|
||||||
ifeq ($(ARCH_OS),Linux)
|
ifeq ($(ARCH_OS),Linux)
|
||||||
@@ -122,10 +132,12 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
|
|||||||
type.cpp util.cpp
|
type.cpp util.cpp
|
||||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||||
opt.h stmt.h sym.h type.h util.h
|
opt.h stmt.h sym.h type.h util.h
|
||||||
TARGETS=neon-32 neon-16 neon-8 \
|
TARGETS=avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
||||||
avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
|
sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
|
||||||
sse2 sse2-x2 sse4 sse4-x2 sse4-8 sse4-16 \
|
generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
|
||||||
generic-1 generic-4 generic-8 generic-16 generic-32 generic-64
|
ifneq ($(ARM_ENABLED), 0)
|
||||||
|
TARGETS+=neon-32 neon-16 neon-8
|
||||||
|
endif
|
||||||
# These files need to be compiled in two versions - 32 and 64 bits.
|
# These files need to be compiled in two versions - 32 and 64 bits.
|
||||||
BUILTINS_SRC_TARGET=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS)))
|
BUILTINS_SRC_TARGET=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS)))
|
||||||
# These are files to be compiled in single version.
|
# These are files to be compiled in single version.
|
||||||
@@ -134,12 +146,12 @@ BUILTINS_OBJS_32=$(addprefix builtins-, $(notdir $(BUILTINS_SRC_TARGET:.ll=-32bi
|
|||||||
BUILTINS_OBJS_64=$(addprefix builtins-, $(notdir $(BUILTINS_SRC_TARGET:.ll=-64bit.o)))
|
BUILTINS_OBJS_64=$(addprefix builtins-, $(notdir $(BUILTINS_SRC_TARGET:.ll=-64bit.o)))
|
||||||
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC_COMMON:.ll=.o))) \
|
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC_COMMON:.ll=.o))) \
|
||||||
$(BUILTINS_OBJS_32) $(BUILTINS_OBJS_64) \
|
$(BUILTINS_OBJS_32) $(BUILTINS_OBJS_64) \
|
||||||
builtins-c-32.cpp builtins-c-64.cpp
|
builtins-c-32.cpp builtins-c-64.cpp
|
||||||
BISON_SRC=parse.yy
|
BISON_SRC=parse.yy
|
||||||
FLEX_SRC=lex.ll
|
FLEX_SRC=lex.ll
|
||||||
|
|
||||||
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_OBJS) \
|
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_OBJS) \
|
||||||
stdlib_mask1_ispc.o stdlib_mask8_ispc.o stdlib_mask16_ispc.o stdlib_mask32_ispc.o \
|
stdlib_mask1_ispc.o stdlib_mask8_ispc.o stdlib_mask16_ispc.o stdlib_mask32_ispc.o \
|
||||||
$(BISON_SRC:.yy=.o) $(FLEX_SRC:.ll=.o))
|
$(BISON_SRC:.yy=.o) $(FLEX_SRC:.ll=.o))
|
||||||
|
|
||||||
default: ispc
|
default: ispc
|
||||||
@@ -264,4 +276,3 @@ objs/stdlib_mask32_ispc.cpp: stdlib.ispc
|
|||||||
@echo Creating C++ source from $< for mask32
|
@echo Creating C++ source from $< for mask32
|
||||||
@$(CLANG) -E -x c -DISPC_MASK_BITS=32 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
@$(CLANG) -E -x c -DISPC_MASK_BITS=32 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||||
python stdlib2cpp.py mask32 > $@
|
python stdlib2cpp.py mask32 > $@
|
||||||
|
|
||||||
|
|||||||
@@ -631,7 +631,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
|||||||
llvm::Triple bcTriple(bcModule->getTargetTriple());
|
llvm::Triple bcTriple(bcModule->getTargetTriple());
|
||||||
Debug(SourcePos(), "module triple: %s\nbitcode triple: %s\n",
|
Debug(SourcePos(), "module triple: %s\nbitcode triple: %s\n",
|
||||||
mTriple.str().c_str(), bcTriple.str().c_str());
|
mTriple.str().c_str(), bcTriple.str().c_str());
|
||||||
#ifndef __arm__
|
#if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
|
||||||
// FIXME: More ugly and dangerous stuff. We really haven't set up
|
// FIXME: More ugly and dangerous stuff. We really haven't set up
|
||||||
// proper build and runtime infrastructure for ispc to do
|
// proper build and runtime infrastructure for ispc to do
|
||||||
// cross-compilation, yet it's at minimum useful to be able to emit
|
// cross-compilation, yet it's at minimum useful to be able to emit
|
||||||
@@ -812,6 +812,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
// Next, add the target's custom implementations of the various needed
|
// Next, add the target's custom implementations of the various needed
|
||||||
// builtin functions (e.g. __masked_store_32(), etc).
|
// builtin functions (e.g. __masked_store_32(), etc).
|
||||||
switch (g->target->getISA()) {
|
switch (g->target->getISA()) {
|
||||||
|
|
||||||
|
#ifdef ISPC_ARM_ENABLED
|
||||||
case Target::NEON8: {
|
case Target::NEON8: {
|
||||||
if (runtime32) {
|
if (runtime32) {
|
||||||
EXPORT_MODULE(builtins_bitcode_neon_8_32bit);
|
EXPORT_MODULE(builtins_bitcode_neon_8_32bit);
|
||||||
@@ -839,6 +841,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
case Target::SSE2: {
|
case Target::SSE2: {
|
||||||
switch (g->target->getVectorWidth()) {
|
switch (g->target->getVectorWidth()) {
|
||||||
case 4:
|
case 4:
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
|
|
||||||
EXAMPLE=mandelbrot
|
EXAMPLE=mandelbrot_tasks
|
||||||
CPP_SRC=mandelbrot.cpp mandelbrot_serial.cpp
|
CPP_SRC=mandelbrot_tasks.cpp mandelbrot_tasks_serial.cpp
|
||||||
ISPC_SRC=mandelbrot.ispc
|
ISPC_SRC=mandelbrot_tasks.ispc
|
||||||
ISPC_IA_TARGETS=sse2,sse4-x2,avx-x2
|
ISPC_IA_TARGETS=sse2,sse4-x2,avx-x2
|
||||||
ISPC_ARM_TARGETS=neon
|
ISPC_ARM_TARGETS=neon
|
||||||
|
|
||||||
|
|||||||
@@ -42,7 +42,7 @@
|
|||||||
#include <algorithm>
|
#include <algorithm>
|
||||||
#include <string.h>
|
#include <string.h>
|
||||||
#include "../timing.h"
|
#include "../timing.h"
|
||||||
#include "mandelbrot_ispc.h"
|
#include "mandelbrot_tasks_ispc.h"
|
||||||
using namespace ispc;
|
using namespace ispc;
|
||||||
|
|
||||||
extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
||||||
@@ -21,7 +21,7 @@
|
|||||||
<PropertyGroup Label="Globals">
|
<PropertyGroup Label="Globals">
|
||||||
<ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid>
|
<ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid>
|
||||||
<Keyword>Win32Proj</Keyword>
|
<Keyword>Win32Proj</Keyword>
|
||||||
<RootNamespace>mandelbrot</RootNamespace>
|
<RootNamespace>mandelbrot_tasks</RootNamespace>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||||
@@ -65,22 +65,22 @@
|
|||||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
<LinkIncremental>true</LinkIncremental>
|
<LinkIncremental>true</LinkIncremental>
|
||||||
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||||
<TargetName>mandelbrot</TargetName>
|
<TargetName>mandelbrot_tasks</TargetName>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||||
<LinkIncremental>true</LinkIncremental>
|
<LinkIncremental>true</LinkIncremental>
|
||||||
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||||
<TargetName>mandelbrot</TargetName>
|
<TargetName>mandelbrot_tasks</TargetName>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||||
<LinkIncremental>false</LinkIncremental>
|
<LinkIncremental>false</LinkIncremental>
|
||||||
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||||
<TargetName>mandelbrot</TargetName>
|
<TargetName>mandelbrot_tasks</TargetName>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||||
<LinkIncremental>false</LinkIncremental>
|
<LinkIncremental>false</LinkIncremental>
|
||||||
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
|
||||||
<TargetName>mandelbrot</TargetName>
|
<TargetName>mandelbrot_tasks</TargetName>
|
||||||
</PropertyGroup>
|
</PropertyGroup>
|
||||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||||
<ClCompile>
|
<ClCompile>
|
||||||
@@ -153,12 +153,12 @@
|
|||||||
</Link>
|
</Link>
|
||||||
</ItemDefinitionGroup>
|
</ItemDefinitionGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<ClCompile Include="mandelbrot.cpp" />
|
<ClCompile Include="mandelbrot_tasks.cpp" />
|
||||||
<ClCompile Include="mandelbrot_serial.cpp" />
|
<ClCompile Include="mandelbrot_tasks_serial.cpp" />
|
||||||
<ClCompile Include="../tasksys.cpp" />
|
<ClCompile Include="../tasksys.cpp" />
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="mandelbrot.ispc">
|
<CustomBuild Include="mandelbrot_tasks.ispc">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
|
||||||
</Command>
|
</Command>
|
||||||
|
|||||||
@@ -22,7 +22,7 @@ mandelbrot
|
|||||||
#***
|
#***
|
||||||
Mandelbrot Set
|
Mandelbrot Set
|
||||||
mandelbrot_tasks
|
mandelbrot_tasks
|
||||||
mandelbrot
|
mandelbrot_tasks
|
||||||
^
|
^
|
||||||
#***
|
#***
|
||||||
Perlin Noise Function
|
Perlin Noise Function
|
||||||
|
|||||||
@@ -73,10 +73,19 @@ def cpu_get():
|
|||||||
#returns cpu_usage
|
#returns cpu_usage
|
||||||
def cpu_check():
|
def cpu_check():
|
||||||
if is_windows == False:
|
if is_windows == False:
|
||||||
cpu1 = cpu_get()
|
if is_mac == False:
|
||||||
time.sleep(1)
|
cpu1 = cpu_get()
|
||||||
cpu2 = cpu_get()
|
time.sleep(1)
|
||||||
cpu_percent = (float(cpu1[0] - cpu2[0])/float(cpu1[1] - cpu2[1]))*100
|
cpu2 = cpu_get()
|
||||||
|
cpu_percent = (float(cpu1[0] - cpu2[0])/float(cpu1[1] - cpu2[1]))*100
|
||||||
|
else:
|
||||||
|
os.system("sysctl -n vm.loadavg > cpu_temp")
|
||||||
|
c = open("cpu_temp", 'r')
|
||||||
|
c_line = c.readline()
|
||||||
|
c.close
|
||||||
|
os.remove("cpu_temp")
|
||||||
|
R = c_line.split(' ')
|
||||||
|
cpu_percent = float(R[1]) * 3
|
||||||
else:
|
else:
|
||||||
os.system("wmic cpu get loadpercentage /value > cpu_temp")
|
os.system("wmic cpu get loadpercentage /value > cpu_temp")
|
||||||
c = open("cpu_temp", 'r')
|
c = open("cpu_temp", 'r')
|
||||||
@@ -143,6 +152,8 @@ parser.add_option('-p', '--path', dest='path',
|
|||||||
global is_windows
|
global is_windows
|
||||||
is_windows = (platform.system() == 'Windows' or
|
is_windows = (platform.system() == 'Windows' or
|
||||||
'CYGWIN_NT' in platform.system())
|
'CYGWIN_NT' in platform.system())
|
||||||
|
global is_mac
|
||||||
|
is_mac = (platform.system() == 'Darwin')
|
||||||
|
|
||||||
# save corrent path
|
# save corrent path
|
||||||
pwd = os.getcwd()
|
pwd = os.getcwd()
|
||||||
|
|||||||
@@ -365,7 +365,7 @@ lAtomicCompareAndSwap32(volatile int32_t *v, int32_t newValue, int32_t oldValue)
|
|||||||
static inline int32_t
|
static inline int32_t
|
||||||
lAtomicAdd(volatile int32_t *v, int32_t delta) {
|
lAtomicAdd(volatile int32_t *v, int32_t delta) {
|
||||||
#ifdef ISPC_IS_WINDOWS
|
#ifdef ISPC_IS_WINDOWS
|
||||||
return InterlockedAdd((volatile LONG *)v, delta);
|
return InterlockedExchangeAdd((volatile LONG *)v, delta)+delta;
|
||||||
#else
|
#else
|
||||||
return __sync_fetch_and_add(v, delta);
|
return __sync_fetch_and_add(v, delta);
|
||||||
#endif
|
#endif
|
||||||
|
|||||||
32
ispc.cpp
32
ispc.cpp
@@ -141,10 +141,12 @@ lGetSystemISA() {
|
|||||||
|
|
||||||
|
|
||||||
static const char *supportedCPUs[] = {
|
static const char *supportedCPUs[] = {
|
||||||
|
#ifdef ISPC_ARM_ENABLED
|
||||||
// FIXME: LLVM supports a ton of different ARM CPU variants--not just
|
// FIXME: LLVM supports a ton of different ARM CPU variants--not just
|
||||||
// cortex-a9 and a15. We should be able to handle any of them that also
|
// cortex-a9 and a15. We should be able to handle any of them that also
|
||||||
// have NEON support.
|
// have NEON support.
|
||||||
"cortex-a9", "cortex-a15",
|
"cortex-a9", "cortex-a15",
|
||||||
|
#endif
|
||||||
"atom", "penryn", "core2", "corei7", "corei7-avx"
|
"atom", "penryn", "core2", "corei7", "corei7-avx"
|
||||||
#if !defined(LLVM_3_1)
|
#if !defined(LLVM_3_1)
|
||||||
, "core-avx-i", "core-avx2"
|
, "core-avx-i", "core-avx2"
|
||||||
@@ -185,9 +187,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
// possible ISA based on that.
|
// possible ISA based on that.
|
||||||
if (!strcmp(cpu, "core-avx2"))
|
if (!strcmp(cpu, "core-avx2"))
|
||||||
isa = "avx2";
|
isa = "avx2";
|
||||||
|
#ifdef ISPC_ARM_ENABLED
|
||||||
else if (!strcmp(cpu, "cortex-a9") ||
|
else if (!strcmp(cpu, "cortex-a9") ||
|
||||||
!strcmp(cpu, "cortex-a15"))
|
!strcmp(cpu, "cortex-a15"))
|
||||||
isa = "neon-32";
|
isa = "neon-32";
|
||||||
|
#endif
|
||||||
else if (!strcmp(cpu, "core-avx-i"))
|
else if (!strcmp(cpu, "core-avx-i"))
|
||||||
isa = "avx1.1";
|
isa = "avx1.1";
|
||||||
else if (!strcmp(cpu, "sandybridge") ||
|
else if (!strcmp(cpu, "sandybridge") ||
|
||||||
@@ -211,7 +215,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
#if !defined(__arm__)
|
#if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
|
||||||
if (cpu == NULL && !strncmp(isa, "neon", 4))
|
if (cpu == NULL && !strncmp(isa, "neon", 4))
|
||||||
// If we're compiling NEON on an x86 host and the CPU wasn't
|
// If we're compiling NEON on an x86 host and the CPU wasn't
|
||||||
// supplied, don't go and set the CPU based on the host...
|
// supplied, don't go and set the CPU based on the host...
|
||||||
@@ -246,9 +250,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_cpu = cpu;
|
this->m_cpu = cpu;
|
||||||
|
|
||||||
if (arch == NULL) {
|
if (arch == NULL) {
|
||||||
|
#ifdef ISPC_ARM_ENABLED
|
||||||
if (!strncmp(isa, "neon", 4))
|
if (!strncmp(isa, "neon", 4))
|
||||||
arch = "arm";
|
arch = "arm";
|
||||||
else
|
else
|
||||||
|
#endif
|
||||||
arch = "x86-64";
|
arch = "x86-64";
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -461,6 +467,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_hasGather = true;
|
this->m_hasGather = true;
|
||||||
#endif
|
#endif
|
||||||
}
|
}
|
||||||
|
#ifdef ISPC_ARM_ENABLED
|
||||||
else if (!strcasecmp(isa, "neon-8")) {
|
else if (!strcasecmp(isa, "neon-8")) {
|
||||||
this->m_isa = Target::NEON8;
|
this->m_isa = Target::NEON8;
|
||||||
this->m_nativeVectorWidth = 16;
|
this->m_nativeVectorWidth = 16;
|
||||||
@@ -488,6 +495,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
this->m_maskingIsFree = false;
|
this->m_maskingIsFree = false;
|
||||||
this->m_maskBitCount = 32;
|
this->m_maskBitCount = 32;
|
||||||
}
|
}
|
||||||
|
#endif
|
||||||
else {
|
else {
|
||||||
fprintf(stderr, "Target ISA \"%s\" is unknown. Choices are: %s\n",
|
fprintf(stderr, "Target ISA \"%s\" is unknown. Choices are: %s\n",
|
||||||
isa, SupportedTargetISAs());
|
isa, SupportedTargetISAs());
|
||||||
@@ -502,9 +510,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
|
|||||||
llvm::Reloc::Default;
|
llvm::Reloc::Default;
|
||||||
std::string featuresString = m_attributes;
|
std::string featuresString = m_attributes;
|
||||||
llvm::TargetOptions options;
|
llvm::TargetOptions options;
|
||||||
|
#ifdef ISPC_ARM_ENABLED
|
||||||
if (m_isa == Target::NEON8 || m_isa == Target::NEON16 ||
|
if (m_isa == Target::NEON8 || m_isa == Target::NEON16 ||
|
||||||
m_isa == Target::NEON32)
|
m_isa == Target::NEON32)
|
||||||
options.FloatABIType = llvm::FloatABI::Hard;
|
options.FloatABIType = llvm::FloatABI::Hard;
|
||||||
|
#endif
|
||||||
#if !defined(LLVM_3_1)
|
#if !defined(LLVM_3_1)
|
||||||
if (g->opt.disableFMA == false)
|
if (g->opt.disableFMA == false)
|
||||||
options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
|
options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
|
||||||
@@ -596,13 +606,21 @@ Target::SupportedTargetCPUs() {
|
|||||||
|
|
||||||
const char *
|
const char *
|
||||||
Target::SupportedTargetArchs() {
|
Target::SupportedTargetArchs() {
|
||||||
return "arm, x86, x86-64";
|
return
|
||||||
|
#ifdef ISPC_ARM_ENABLED
|
||||||
|
"arm, "
|
||||||
|
#endif
|
||||||
|
"x86, x86-64";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
const char *
|
const char *
|
||||||
Target::SupportedTargetISAs() {
|
Target::SupportedTargetISAs() {
|
||||||
return "neon-8, neon-16, neon-32, sse2, sse2-x2, sse4, sse4-8, sse4-16, sse4-x2, "
|
return
|
||||||
|
#ifdef ISPC_ARM_ENABLED
|
||||||
|
"neon-8, neon-16, neon-32, "
|
||||||
|
#endif
|
||||||
|
"sse2, sse2-x2, sse4, sse4-8, sse4-16, sse4-x2, "
|
||||||
"avx, avx-x2, avx1.1, avx1.1-x2, avx2, avx2-x2, "
|
"avx, avx-x2, avx1.1, avx1.1-x2, avx2, avx2-x2, "
|
||||||
"generic-1, generic-4, generic-8, generic-16, generic-32";
|
"generic-1, generic-4, generic-8, generic-16, generic-32";
|
||||||
}
|
}
|
||||||
@@ -611,10 +629,13 @@ Target::SupportedTargetISAs() {
|
|||||||
std::string
|
std::string
|
||||||
Target::GetTripleString() const {
|
Target::GetTripleString() const {
|
||||||
llvm::Triple triple;
|
llvm::Triple triple;
|
||||||
|
#ifdef ISPC_ARM_ENABLED
|
||||||
if (m_arch == "arm") {
|
if (m_arch == "arm") {
|
||||||
triple.setTriple("armv7-eabi");
|
triple.setTriple("armv7-eabi");
|
||||||
}
|
}
|
||||||
else {
|
else
|
||||||
|
#endif
|
||||||
|
{
|
||||||
// Start with the host triple as the default
|
// Start with the host triple as the default
|
||||||
triple.setTriple(llvm::sys::getDefaultTargetTriple());
|
triple.setTriple(llvm::sys::getDefaultTargetTriple());
|
||||||
|
|
||||||
@@ -637,12 +658,14 @@ Target::GetTripleString() const {
|
|||||||
const char *
|
const char *
|
||||||
Target::ISAToString(ISA isa) {
|
Target::ISAToString(ISA isa) {
|
||||||
switch (isa) {
|
switch (isa) {
|
||||||
|
#ifdef ISPC_ARM_ENABLED
|
||||||
case Target::NEON8:
|
case Target::NEON8:
|
||||||
return "neon-8";
|
return "neon-8";
|
||||||
case Target::NEON16:
|
case Target::NEON16:
|
||||||
return "neon-16";
|
return "neon-16";
|
||||||
case Target::NEON32:
|
case Target::NEON32:
|
||||||
return "neon-32";
|
return "neon-32";
|
||||||
|
#endif
|
||||||
case Target::SSE2:
|
case Target::SSE2:
|
||||||
return "sse2";
|
return "sse2";
|
||||||
case Target::SSE4:
|
case Target::SSE4:
|
||||||
@@ -813,6 +836,7 @@ Globals::Globals() {
|
|||||||
includeStdlib = true;
|
includeStdlib = true;
|
||||||
runCPP = true;
|
runCPP = true;
|
||||||
debugPrint = false;
|
debugPrint = false;
|
||||||
|
debugIR = -1;
|
||||||
disableWarnings = false;
|
disableWarnings = false;
|
||||||
warningsAsErrors = false;
|
warningsAsErrors = false;
|
||||||
quiet = false;
|
quiet = false;
|
||||||
|
|||||||
20
ispc.h
20
ispc.h
@@ -59,6 +59,7 @@
|
|||||||
#include <stdlib.h>
|
#include <stdlib.h>
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
#include <vector>
|
#include <vector>
|
||||||
|
#include <set>
|
||||||
#include <string>
|
#include <string>
|
||||||
|
|
||||||
/** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
|
/** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
|
||||||
@@ -66,6 +67,9 @@
|
|||||||
*/
|
*/
|
||||||
#define ISPC_MAX_NVEC 64
|
#define ISPC_MAX_NVEC 64
|
||||||
|
|
||||||
|
// Number of final optimization phase
|
||||||
|
#define LAST_OPT_NUMBER 1000
|
||||||
|
|
||||||
// Forward declarations of a number of widely-used LLVM types
|
// Forward declarations of a number of widely-used LLVM types
|
||||||
namespace llvm {
|
namespace llvm {
|
||||||
class AttributeSet;
|
class AttributeSet;
|
||||||
@@ -175,7 +179,11 @@ public:
|
|||||||
flexible/performant of them will apear last in the enumerant. Note
|
flexible/performant of them will apear last in the enumerant. Note
|
||||||
also that __best_available_isa() needs to be updated if ISAs are
|
also that __best_available_isa() needs to be updated if ISAs are
|
||||||
added or the enumerant values are reordered. */
|
added or the enumerant values are reordered. */
|
||||||
enum ISA { NEON32, NEON16, NEON8, SSE2, SSE4, AVX, AVX11, AVX2, GENERIC,
|
enum ISA {
|
||||||
|
#ifdef ISPC_ARM_ENABLED
|
||||||
|
NEON32, NEON16, NEON8,
|
||||||
|
#endif
|
||||||
|
SSE2, SSE4, AVX, AVX11, AVX2, GENERIC,
|
||||||
NUM_ISAS };
|
NUM_ISAS };
|
||||||
|
|
||||||
/** Initializes the given Target pointer for a target of the given
|
/** Initializes the given Target pointer for a target of the given
|
||||||
@@ -495,6 +503,16 @@ struct Globals {
|
|||||||
ispc's execution. */
|
ispc's execution. */
|
||||||
bool debugPrint;
|
bool debugPrint;
|
||||||
|
|
||||||
|
/** Indicates which stages of optimization we want to dump. */
|
||||||
|
std::set<int> debug_stages;
|
||||||
|
|
||||||
|
/** Indicates after which optimization we want to generate
|
||||||
|
DebugIR information. */
|
||||||
|
int debugIR;
|
||||||
|
|
||||||
|
/** Indicates which phases of optimization we want to switch off. */
|
||||||
|
std::set<int> off_stages;
|
||||||
|
|
||||||
/** Indicates whether all warning messages should be surpressed. */
|
/** Indicates whether all warning messages should be surpressed. */
|
||||||
bool disableWarnings;
|
bool disableWarnings;
|
||||||
|
|
||||||
|
|||||||
82
ispc.vcxproj
82
ispc.vcxproj
@@ -1,4 +1,4 @@
|
|||||||
<?xml version="1.0" encoding="utf-8"?>
|
<?xml version="1.0" encoding="utf-8"?>
|
||||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||||
<ItemGroup Label="ProjectConfigurations">
|
<ItemGroup Label="ProjectConfigurations">
|
||||||
<ProjectConfiguration Include="Debug|Win32">
|
<ProjectConfiguration Include="Debug|Win32">
|
||||||
@@ -45,12 +45,6 @@
|
|||||||
<ClCompile Include="$(Configuration)\gen-bitcode-generic-32-64bit.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-generic-32-64bit.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-generic-64-32bit.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-generic-64-32bit.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-generic-64-64bit.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-generic-64-64bit.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-neon-8-32bit.cpp" />
|
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-neon-8-64bit.cpp" />
|
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-neon-16-32bit.cpp" />
|
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-neon-16-64bit.cpp" />
|
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-neon-32-32bit.cpp" />
|
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-neon-32-64bit.cpp" />
|
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-sse2-32bit.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-sse2-32bit.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-sse2-64bit.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-sse2-64bit.cpp" />
|
||||||
<ClCompile Include="$(Configuration)\gen-bitcode-sse2-x2-32bit.cpp" />
|
<ClCompile Include="$(Configuration)\gen-bitcode-sse2-x2-32bit.cpp" />
|
||||||
@@ -191,60 +185,6 @@
|
|||||||
<Message>Building gen-bitcode-sse2-x2-64bit.cpp</Message>
|
<Message>Building gen-bitcode-sse2-x2-64bit.cpp</Message>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
|
||||||
<CustomBuild Include="builtins\target-neon-8.ll">
|
|
||||||
<FileType>Document</FileType>
|
|
||||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-neon-8.ll | python bitcode2cpp.py builtins\target-neon-8.ll 32bit > $(Configuration)/gen-bitcode-neon-8-32bit.cpp</Command>
|
|
||||||
<Outputs>$(Configuration)/gen-bitcode-neon-8-32bit.cpp</Outputs>
|
|
||||||
<AdditionalInputs>builtins\util.m4;builtins\target-neon-common.ll</AdditionalInputs>
|
|
||||||
<Message>Building gen-bitcode-neon-8-32bit.cpp</Message>
|
|
||||||
</CustomBuild>
|
|
||||||
</ItemGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CustomBuild Include="builtins\target-neon-8.ll">
|
|
||||||
<FileType>Document</FileType>
|
|
||||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-neon-8.ll | python bitcode2cpp.py builtins\target-neon-8.ll 64bit > $(Configuration)/gen-bitcode-neon-8-64bit.cpp</Command>
|
|
||||||
<Outputs>$(Configuration)/gen-bitcode-neon-8-64bit.cpp</Outputs>
|
|
||||||
<AdditionalInputs>builtins\util.m4;builtins\target-neon-common.ll</AdditionalInputs>
|
|
||||||
<Message>Building gen-bitcode-neon-8-64bit.cpp</Message>
|
|
||||||
</CustomBuild>
|
|
||||||
</ItemGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CustomBuild Include="builtins\target-neon-16.ll">
|
|
||||||
<FileType>Document</FileType>
|
|
||||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-neon-16.ll | python bitcode2cpp.py builtins\target-neon-16.ll 32bit > $(Configuration)/gen-bitcode-neon-16-32bit.cpp</Command>
|
|
||||||
<Outputs>$(Configuration)/gen-bitcode-neon-16-32bit.cpp</Outputs>
|
|
||||||
<AdditionalInputs>builtins\util.m4;builtins\target-neon-common.ll</AdditionalInputs>
|
|
||||||
<Message>Building gen-bitcode-neon-16-32bit.cpp</Message>
|
|
||||||
</CustomBuild>
|
|
||||||
</ItemGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CustomBuild Include="builtins\target-neon-16.ll">
|
|
||||||
<FileType>Document</FileType>
|
|
||||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-neon-16.ll | python bitcode2cpp.py builtins\target-neon-16.ll 64bit > $(Configuration)/gen-bitcode-neon-16-64bit.cpp</Command>
|
|
||||||
<Outputs>$(Configuration)/gen-bitcode-neon-16-64bit.cpp</Outputs>
|
|
||||||
<AdditionalInputs>builtins\util.m4;builtins\target-neon-common.ll</AdditionalInputs>
|
|
||||||
<Message>Building gen-bitcode-neon-16-64bit.cpp</Message>
|
|
||||||
</CustomBuild>
|
|
||||||
</ItemGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CustomBuild Include="builtins\target-neon-32.ll">
|
|
||||||
<FileType>Document</FileType>
|
|
||||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-neon-32.ll | python bitcode2cpp.py builtins\target-neon-32.ll 32bit > $(Configuration)/gen-bitcode-neon-32-32bit.cpp</Command>
|
|
||||||
<Outputs>$(Configuration)/gen-bitcode-neon-32-32bit.cpp</Outputs>
|
|
||||||
<AdditionalInputs>builtins\util.m4;builtins\target-neon-common.ll</AdditionalInputs>
|
|
||||||
<Message>Building gen-bitcode-neon-32-32bit.cpp</Message>
|
|
||||||
</CustomBuild>
|
|
||||||
</ItemGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CustomBuild Include="builtins\target-neon-32.ll">
|
|
||||||
<FileType>Document</FileType>
|
|
||||||
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-neon-32.ll | python bitcode2cpp.py builtins\target-neon-32.ll 64bit > $(Configuration)/gen-bitcode-neon-32-64bit.cpp</Command>
|
|
||||||
<Outputs>$(Configuration)/gen-bitcode-neon-32-64bit.cpp</Outputs>
|
|
||||||
<AdditionalInputs>builtins\util.m4;builtins\target-neon-common.ll</AdditionalInputs>
|
|
||||||
<Message>Building gen-bitcode-neon-32-64bit.cpp</Message>
|
|
||||||
</CustomBuild>
|
|
||||||
</ItemGroup>
|
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="builtins\target-avx1.ll">
|
<CustomBuild Include="builtins\target-avx1.ll">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
@@ -263,6 +203,26 @@
|
|||||||
<Message>Building gen-bitcode-avx1-64bit.cpp</Message>
|
<Message>Building gen-bitcode-avx1-64bit.cpp</Message>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
=======
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="builtins\target-avx1.ll">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 32bit > $(Configuration)/gen-bitcode-avx1-32bit.cpp</Command>
|
||||||
|
<Outputs>$(Configuration)/gen-bitcode-avx1-32bit.cpp</Outputs>
|
||||||
|
<AdditionalInputs>builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||||
|
<Message>Building gen-bitcode-avx1-32bit.cpp</Message>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="builtins\target-avx1.ll">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 64bit > $(Configuration)/gen-bitcode-avx1-64bit.cpp</Command>
|
||||||
|
<Outputs>$(Configuration)/gen-bitcode-avx1-64bit.cpp</Outputs>
|
||||||
|
<AdditionalInputs>builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
|
||||||
|
<Message>Building gen-bitcode-avx1-64bit.cpp</Message>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
|
>>>>>>> master
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="builtins\target-avx1-x2.ll">
|
<CustomBuild Include="builtins\target-avx1-x2.ll">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
|
|||||||
63
main.cpp
63
main.cpp
@@ -155,6 +155,11 @@ devUsage(int ret) {
|
|||||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||||
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
||||||
printf(" [--yydebug]\t\t\t\tPrint debugging information during parsing\n");
|
printf(" [--yydebug]\t\t\t\tPrint debugging information during parsing\n");
|
||||||
|
printf(" [--debug-phase=<value>]\t\tSet optimization phases to dump. --debug-phase=first,210:220,300,305,310:last\n");
|
||||||
|
#ifdef LLVM_3_4
|
||||||
|
printf(" [--debug-ir=<value>]\t\tSet optimization phase to generate debugIR after it\n");
|
||||||
|
#endif
|
||||||
|
printf(" [--off-phase=<value>]\t\tSwitch off optimization phases. --off-phase=first,210:220,300,305,310:last\n");
|
||||||
exit(ret);
|
exit(ret);
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -211,6 +216,47 @@ lSignal(void *) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static int ParsingPhaseName(char * stage) {
|
||||||
|
if (strncmp(stage, "first", 5) == 0) {
|
||||||
|
return 0;
|
||||||
|
}
|
||||||
|
else if (strncmp(stage, "last", 4) == 0) {
|
||||||
|
return LAST_OPT_NUMBER;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
int t = atoi(stage);
|
||||||
|
if (t < 0 || t > LAST_OPT_NUMBER) {
|
||||||
|
fprintf(stderr, "Phases must be from 0 to %d. %s is incorrect.\n", LAST_OPT_NUMBER, stage);
|
||||||
|
exit(0);
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
return t;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
static std::set<int> ParsingPhases(char * stages) {
|
||||||
|
std::set<int> phases;
|
||||||
|
int begin = ParsingPhaseName(stages);
|
||||||
|
int end = begin;
|
||||||
|
|
||||||
|
for (unsigned i = 0; i < strlen(stages); i++) {
|
||||||
|
if ((stages[i] == ',') || (i == strlen(stages) - 1)) {
|
||||||
|
for (int j = begin; j < end + 1; j++) {
|
||||||
|
phases.insert(j);
|
||||||
|
}
|
||||||
|
begin = ParsingPhaseName(stages + i + 1);
|
||||||
|
end = begin;
|
||||||
|
}
|
||||||
|
else if (stages[i] == ':') {
|
||||||
|
end = ParsingPhaseName(stages + i + 1);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
return phases;
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
static void
|
static void
|
||||||
lParseInclude(const char *path) {
|
lParseInclude(const char *path) {
|
||||||
#ifdef ISPC_IS_WINDOWS
|
#ifdef ISPC_IS_WINDOWS
|
||||||
@@ -253,6 +299,8 @@ int main(int Argc, char *Argv[]) {
|
|||||||
LLVMInitializeX86Disassembler();
|
LLVMInitializeX86Disassembler();
|
||||||
LLVMInitializeX86TargetMC();
|
LLVMInitializeX86TargetMC();
|
||||||
#endif // !__ARM__
|
#endif // !__ARM__
|
||||||
|
|
||||||
|
#ifdef ISPC_ARM_ENABLED
|
||||||
// Generating ARM from x86 is more likely to be useful, though.
|
// Generating ARM from x86 is more likely to be useful, though.
|
||||||
LLVMInitializeARMTargetInfo();
|
LLVMInitializeARMTargetInfo();
|
||||||
LLVMInitializeARMTarget();
|
LLVMInitializeARMTarget();
|
||||||
@@ -260,6 +308,7 @@ int main(int Argc, char *Argv[]) {
|
|||||||
LLVMInitializeARMAsmParser();
|
LLVMInitializeARMAsmParser();
|
||||||
LLVMInitializeARMDisassembler();
|
LLVMInitializeARMDisassembler();
|
||||||
LLVMInitializeARMTargetMC();
|
LLVMInitializeARMTargetMC();
|
||||||
|
#endif
|
||||||
|
|
||||||
char *file = NULL;
|
char *file = NULL;
|
||||||
const char *headerFileName = NULL;
|
const char *headerFileName = NULL;
|
||||||
@@ -486,6 +535,20 @@ int main(int Argc, char *Argv[]) {
|
|||||||
}
|
}
|
||||||
hostStubFileName = argv[i];
|
hostStubFileName = argv[i];
|
||||||
}
|
}
|
||||||
|
else if (strncmp(argv[i], "--debug-phase=", 14) == 0) {
|
||||||
|
fprintf(stderr, "WARNING: Adding debug phases may change the way PassManager"
|
||||||
|
"handles the phases and it may possibly make some bugs go"
|
||||||
|
"away or introduce the new ones.\n");
|
||||||
|
g->debug_stages = ParsingPhases(argv[i] + strlen("--debug-phase="));
|
||||||
|
}
|
||||||
|
#ifdef LLVM_3_4
|
||||||
|
else if (strncmp(argv[i], "--debug-ir=", 11) == 0) {
|
||||||
|
g->debugIR = ParsingPhaseName(argv[i] + strlen("--debug-ir="));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
else if (strncmp(argv[i], "--off-phase=", 12) == 0) {
|
||||||
|
g->off_stages = ParsingPhases(argv[i] + strlen("--off-phase="));
|
||||||
|
}
|
||||||
else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) {
|
else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) {
|
||||||
lPrintVersion();
|
lPrintVersion();
|
||||||
return 0;
|
return 0;
|
||||||
|
|||||||
145
opt.cpp
145
opt.cpp
@@ -63,6 +63,9 @@
|
|||||||
#include <llvm/IR/BasicBlock.h>
|
#include <llvm/IR/BasicBlock.h>
|
||||||
#include <llvm/IR/Constants.h>
|
#include <llvm/IR/Constants.h>
|
||||||
#endif
|
#endif
|
||||||
|
#if defined (LLVM_3_4)
|
||||||
|
#include <llvm/Transforms/Instrumentation.h>
|
||||||
|
#endif
|
||||||
#include <llvm/PassManager.h>
|
#include <llvm/PassManager.h>
|
||||||
#include <llvm/PassRegistry.h>
|
#include <llvm/PassRegistry.h>
|
||||||
#include <llvm/Assembly/PrintModulePass.h>
|
#include <llvm/Assembly/PrintModulePass.h>
|
||||||
@@ -119,6 +122,8 @@ static llvm::Pass *CreateReplacePseudoMemoryOpsPass();
|
|||||||
static llvm::Pass *CreateIsCompileTimeConstantPass(bool isLastTry);
|
static llvm::Pass *CreateIsCompileTimeConstantPass(bool isLastTry);
|
||||||
static llvm::Pass *CreateMakeInternalFuncsStaticPass();
|
static llvm::Pass *CreateMakeInternalFuncsStaticPass();
|
||||||
|
|
||||||
|
static llvm::Pass *CreateDebugPass(char * output);
|
||||||
|
|
||||||
#define DEBUG_START_PASS(NAME) \
|
#define DEBUG_START_PASS(NAME) \
|
||||||
if (g->debugPrint && \
|
if (g->debugPrint && \
|
||||||
(getenv("FUNC") == NULL || \
|
(getenv("FUNC") == NULL || \
|
||||||
@@ -395,6 +400,54 @@ lGetMaskStatus(llvm::Value *mask, int vecWidth = -1) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
// This is a wrap over class llvm::PassManager. This duplicates PassManager function run()
|
||||||
|
// and change PassManager function add by adding some checks and debug passes.
|
||||||
|
// This wrap can control:
|
||||||
|
// - If we want to switch off optimization with given number.
|
||||||
|
// - If we want to dump LLVM IR after optimization with given number.
|
||||||
|
// - If we want to generate LLVM IR debug for gdb after optimization with given number.
|
||||||
|
class DebugPassManager {
|
||||||
|
public:
|
||||||
|
DebugPassManager():number(0){}
|
||||||
|
void add(llvm::Pass * P, int stage);
|
||||||
|
bool run(llvm::Module& M) {return PM.run(M);}
|
||||||
|
llvm::PassManager& getPM() {return PM;}
|
||||||
|
|
||||||
|
private:
|
||||||
|
llvm::PassManager PM;
|
||||||
|
int number;
|
||||||
|
};
|
||||||
|
|
||||||
|
void
|
||||||
|
DebugPassManager::add(llvm::Pass * P, int stage = -1) {
|
||||||
|
// taking number of optimization
|
||||||
|
if (stage == -1) {
|
||||||
|
number++;
|
||||||
|
}
|
||||||
|
else {
|
||||||
|
number = stage;
|
||||||
|
}
|
||||||
|
if (g->off_stages.find(number) == g->off_stages.end()) {
|
||||||
|
// adding optimization (not switched off)
|
||||||
|
PM.add(P);
|
||||||
|
if (g->debug_stages.find(number) != g->debug_stages.end()) {
|
||||||
|
// adding dump of LLVM IR after optimization
|
||||||
|
char buf[100];
|
||||||
|
sprintf(buf, "\n\n*****LLVM IR after phase %d: %s*****\n\n",
|
||||||
|
number, P->getPassName());
|
||||||
|
PM.add(CreateDebugPass(buf));
|
||||||
|
}
|
||||||
|
#ifdef LLVM_3_4
|
||||||
|
if (g->debugIR == number) {
|
||||||
|
// adding generating of LLVM IR debug after optimization
|
||||||
|
char buf[100];
|
||||||
|
sprintf(buf, "Debug_IR_after_%d_phase.bc", number);
|
||||||
|
PM.add(llvm::createDebugIRPass(true, true, ".", buf));
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
}
|
||||||
|
}
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
|
|
||||||
void
|
void
|
||||||
@@ -403,14 +456,8 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
printf("*** Code going into optimization ***\n");
|
printf("*** Code going into optimization ***\n");
|
||||||
module->dump();
|
module->dump();
|
||||||
}
|
}
|
||||||
|
DebugPassManager optPM;
|
||||||
llvm::PassManager optPM;
|
optPM.add(llvm::createVerifierPass(),0);
|
||||||
optPM.add(llvm::createVerifierPass());
|
|
||||||
|
|
||||||
#if 0
|
|
||||||
std::string err;
|
|
||||||
optPM.add(llvm::createPrintModulePass(new llvm::raw_fd_ostream("-", err)));
|
|
||||||
#endif
|
|
||||||
|
|
||||||
llvm::TargetLibraryInfo *targetLibraryInfo =
|
llvm::TargetLibraryInfo *targetLibraryInfo =
|
||||||
new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple()));
|
new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple()));
|
||||||
@@ -427,7 +474,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
optPM.add(new llvm::TargetTransformInfo(targetMachine->getScalarTargetTransformInfo(),
|
optPM.add(new llvm::TargetTransformInfo(targetMachine->getScalarTargetTransformInfo(),
|
||||||
targetMachine->getVectorTargetTransformInfo()));
|
targetMachine->getVectorTargetTransformInfo()));
|
||||||
#else // LLVM 3.3+
|
#else // LLVM 3.3+
|
||||||
targetMachine->addAnalysisPasses(optPM);
|
targetMachine->addAnalysisPasses(optPM.getPM());
|
||||||
#endif
|
#endif
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
@@ -439,11 +486,11 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
// run absolutely no optimizations, since the front-end needs us to
|
// run absolutely no optimizations, since the front-end needs us to
|
||||||
// take the various __pseudo_* functions it has emitted and turn
|
// take the various __pseudo_* functions it has emitted and turn
|
||||||
// them into something that can actually execute.
|
// them into something that can actually execute.
|
||||||
optPM.add(CreateImproveMemoryOpsPass());
|
optPM.add(CreateImproveMemoryOpsPass(), 100);
|
||||||
if (g->opt.disableHandlePseudoMemoryOps == false)
|
if (g->opt.disableHandlePseudoMemoryOps == false)
|
||||||
optPM.add(CreateReplacePseudoMemoryOpsPass());
|
optPM.add(CreateReplacePseudoMemoryOpsPass());
|
||||||
|
|
||||||
optPM.add(CreateIntrinsicsOptPass());
|
optPM.add(CreateIntrinsicsOptPass(), 102);
|
||||||
optPM.add(CreateIsCompileTimeConstantPass(true));
|
optPM.add(CreateIsCompileTimeConstantPass(true));
|
||||||
optPM.add(llvm::createFunctionInliningPass());
|
optPM.add(llvm::createFunctionInliningPass());
|
||||||
optPM.add(CreateMakeInternalFuncsStaticPass());
|
optPM.add(CreateMakeInternalFuncsStaticPass());
|
||||||
@@ -462,7 +509,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
llvm::initializeInstrumentation(*registry);
|
llvm::initializeInstrumentation(*registry);
|
||||||
llvm::initializeTarget(*registry);
|
llvm::initializeTarget(*registry);
|
||||||
|
|
||||||
optPM.add(llvm::createGlobalDCEPass());
|
optPM.add(llvm::createGlobalDCEPass(), 200);
|
||||||
|
|
||||||
// Early optimizations to try to reduce the total amount of code to
|
// Early optimizations to try to reduce the total amount of code to
|
||||||
// work with if we can
|
// work with if we can
|
||||||
@@ -476,14 +523,14 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
|
|
||||||
if (g->opt.disableGatherScatterOptimizations == false &&
|
if (g->opt.disableGatherScatterOptimizations == false &&
|
||||||
g->target->getVectorWidth() > 1) {
|
g->target->getVectorWidth() > 1) {
|
||||||
optPM.add(llvm::createInstructionCombiningPass());
|
optPM.add(llvm::createInstructionCombiningPass(), 210);
|
||||||
optPM.add(CreateImproveMemoryOpsPass());
|
optPM.add(CreateImproveMemoryOpsPass());
|
||||||
}
|
}
|
||||||
if (!g->opt.disableMaskAllOnOptimizations) {
|
if (!g->opt.disableMaskAllOnOptimizations) {
|
||||||
optPM.add(CreateIntrinsicsOptPass());
|
optPM.add(CreateIntrinsicsOptPass(), 215);
|
||||||
optPM.add(CreateInstructionSimplifyPass());
|
optPM.add(CreateInstructionSimplifyPass());
|
||||||
}
|
}
|
||||||
optPM.add(llvm::createDeadInstEliminationPass());
|
optPM.add(llvm::createDeadInstEliminationPass(), 220);
|
||||||
|
|
||||||
// Max struct size threshold for scalar replacement is
|
// Max struct size threshold for scalar replacement is
|
||||||
// 1) 4 fields (r,g,b,w)
|
// 1) 4 fields (r,g,b,w)
|
||||||
@@ -513,10 +560,10 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
#if defined(LLVM_3_1) || defined(LLVM_3_2) || defined(LLVM_3_3)
|
#if defined(LLVM_3_1) || defined(LLVM_3_2) || defined(LLVM_3_3)
|
||||||
// Starting from 3.4 this functionality was moved to
|
// Starting from 3.4 this functionality was moved to
|
||||||
// InstructionCombiningPass. See r184459 for details.
|
// InstructionCombiningPass. See r184459 for details.
|
||||||
optPM.add(llvm::createSimplifyLibCallsPass());
|
optPM.add(llvm::createSimplifyLibCallsPass(), 240);
|
||||||
#endif
|
#endif
|
||||||
optPM.add(llvm::createAggressiveDCEPass());
|
optPM.add(llvm::createAggressiveDCEPass());
|
||||||
optPM.add(llvm::createInstructionCombiningPass());
|
optPM.add(llvm::createInstructionCombiningPass(), 241);
|
||||||
optPM.add(llvm::createJumpThreadingPass());
|
optPM.add(llvm::createJumpThreadingPass());
|
||||||
optPM.add(llvm::createCFGSimplificationPass());
|
optPM.add(llvm::createCFGSimplificationPass());
|
||||||
optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold));
|
optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold));
|
||||||
@@ -524,44 +571,45 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
optPM.add(llvm::createTailCallEliminationPass());
|
optPM.add(llvm::createTailCallEliminationPass());
|
||||||
|
|
||||||
if (!g->opt.disableMaskAllOnOptimizations) {
|
if (!g->opt.disableMaskAllOnOptimizations) {
|
||||||
optPM.add(CreateIntrinsicsOptPass());
|
optPM.add(CreateIntrinsicsOptPass(), 250);
|
||||||
optPM.add(CreateInstructionSimplifyPass());
|
optPM.add(CreateInstructionSimplifyPass());
|
||||||
}
|
}
|
||||||
|
|
||||||
if (g->opt.disableGatherScatterOptimizations == false &&
|
if (g->opt.disableGatherScatterOptimizations == false &&
|
||||||
g->target->getVectorWidth() > 1) {
|
g->target->getVectorWidth() > 1) {
|
||||||
optPM.add(llvm::createInstructionCombiningPass());
|
optPM.add(llvm::createInstructionCombiningPass(), 255);
|
||||||
optPM.add(CreateImproveMemoryOpsPass());
|
optPM.add(CreateImproveMemoryOpsPass());
|
||||||
|
|
||||||
if (g->opt.disableCoalescing == false &&
|
if (g->opt.disableCoalescing == false &&
|
||||||
g->target->getISA() != Target::GENERIC) {
|
g->target->getISA() != Target::GENERIC) {
|
||||||
// It is important to run this here to make it easier to
|
// It is important to run this here to make it easier to
|
||||||
// finding matching gathers we can coalesce..
|
// finding matching gathers we can coalesce..
|
||||||
optPM.add(llvm::createEarlyCSEPass());
|
optPM.add(llvm::createEarlyCSEPass(), 260);
|
||||||
optPM.add(CreateGatherCoalescePass());
|
optPM.add(CreateGatherCoalescePass());
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
optPM.add(llvm::createFunctionInliningPass());
|
optPM.add(llvm::createFunctionInliningPass(), 265);
|
||||||
optPM.add(llvm::createConstantPropagationPass());
|
optPM.add(llvm::createConstantPropagationPass());
|
||||||
optPM.add(CreateIntrinsicsOptPass());
|
optPM.add(CreateIntrinsicsOptPass());
|
||||||
optPM.add(CreateInstructionSimplifyPass());
|
optPM.add(CreateInstructionSimplifyPass());
|
||||||
|
|
||||||
if (g->opt.disableGatherScatterOptimizations == false &&
|
if (g->opt.disableGatherScatterOptimizations == false &&
|
||||||
g->target->getVectorWidth() > 1) {
|
g->target->getVectorWidth() > 1) {
|
||||||
optPM.add(llvm::createInstructionCombiningPass());
|
optPM.add(llvm::createInstructionCombiningPass(), 270);
|
||||||
optPM.add(CreateImproveMemoryOpsPass());
|
optPM.add(CreateImproveMemoryOpsPass());
|
||||||
}
|
}
|
||||||
|
|
||||||
optPM.add(llvm::createIPSCCPPass());
|
optPM.add(llvm::createIPSCCPPass(), 275);
|
||||||
optPM.add(llvm::createDeadArgEliminationPass());
|
optPM.add(llvm::createDeadArgEliminationPass());
|
||||||
optPM.add(llvm::createAggressiveDCEPass());
|
optPM.add(llvm::createAggressiveDCEPass());
|
||||||
optPM.add(llvm::createInstructionCombiningPass());
|
optPM.add(llvm::createInstructionCombiningPass());
|
||||||
optPM.add(llvm::createCFGSimplificationPass());
|
optPM.add(llvm::createCFGSimplificationPass());
|
||||||
|
|
||||||
if (g->opt.disableHandlePseudoMemoryOps == false)
|
if (g->opt.disableHandlePseudoMemoryOps == false) {
|
||||||
optPM.add(CreateReplacePseudoMemoryOpsPass());
|
optPM.add(CreateReplacePseudoMemoryOpsPass(),280);
|
||||||
optPM.add(CreateIntrinsicsOptPass());
|
}
|
||||||
|
optPM.add(CreateIntrinsicsOptPass(),281);
|
||||||
optPM.add(CreateInstructionSimplifyPass());
|
optPM.add(CreateInstructionSimplifyPass());
|
||||||
|
|
||||||
optPM.add(llvm::createFunctionInliningPass());
|
optPM.add(llvm::createFunctionInliningPass());
|
||||||
@@ -579,9 +627,10 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
optPM.add(llvm::createIndVarSimplifyPass());
|
optPM.add(llvm::createIndVarSimplifyPass());
|
||||||
optPM.add(llvm::createLoopIdiomPass());
|
optPM.add(llvm::createLoopIdiomPass());
|
||||||
optPM.add(llvm::createLoopDeletionPass());
|
optPM.add(llvm::createLoopDeletionPass());
|
||||||
if (g->opt.unrollLoops)
|
if (g->opt.unrollLoops) {
|
||||||
optPM.add(llvm::createLoopUnrollPass());
|
optPM.add(llvm::createLoopUnrollPass(), 300);
|
||||||
optPM.add(llvm::createGVNPass());
|
}
|
||||||
|
optPM.add(llvm::createGVNPass(), 301);
|
||||||
|
|
||||||
optPM.add(CreateIsCompileTimeConstantPass(true));
|
optPM.add(CreateIsCompileTimeConstantPass(true));
|
||||||
optPM.add(CreateIntrinsicsOptPass());
|
optPM.add(CreateIntrinsicsOptPass());
|
||||||
@@ -609,7 +658,7 @@ Optimize(llvm::Module *module, int optLevel) {
|
|||||||
|
|
||||||
// Finish up by making sure we didn't mess anything up in the IR along
|
// Finish up by making sure we didn't mess anything up in the IR along
|
||||||
// the way.
|
// the way.
|
||||||
optPM.add(llvm::createVerifierPass());
|
optPM.add(llvm::createVerifierPass(), LAST_OPT_NUMBER);
|
||||||
optPM.run(*module);
|
optPM.run(*module);
|
||||||
|
|
||||||
if (g->debugPrint) {
|
if (g->debugPrint) {
|
||||||
@@ -4330,6 +4379,42 @@ CreateIsCompileTimeConstantPass(bool isLastTry) {
|
|||||||
return new IsCompileTimeConstantPass(isLastTry);
|
return new IsCompileTimeConstantPass(isLastTry);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
//////////////////////////////////////////////////////////////////////////
|
||||||
|
// DebugPass
|
||||||
|
|
||||||
|
/** This pass is added in list of passes after optimizations which
|
||||||
|
we want to debug and print dump of LLVM IR in stderr. Also it
|
||||||
|
prints name and number of previous optimization.
|
||||||
|
*/
|
||||||
|
class DebugPass : public llvm::ModulePass {
|
||||||
|
public:
|
||||||
|
static char ID;
|
||||||
|
DebugPass(char * output) : ModulePass(ID) {
|
||||||
|
sprintf(str_output, "%s", output);
|
||||||
|
}
|
||||||
|
|
||||||
|
const char *getPassName() const { return "Dump LLVM IR"; }
|
||||||
|
bool runOnModule(llvm::Module &m);
|
||||||
|
|
||||||
|
private:
|
||||||
|
char str_output[100];
|
||||||
|
};
|
||||||
|
|
||||||
|
char DebugPass::ID = 0;
|
||||||
|
|
||||||
|
bool
|
||||||
|
DebugPass::runOnModule(llvm::Module &module) {
|
||||||
|
fprintf(stderr, "%s", str_output);
|
||||||
|
fflush(stderr);
|
||||||
|
module.dump();
|
||||||
|
return true;
|
||||||
|
}
|
||||||
|
|
||||||
|
static llvm::Pass *
|
||||||
|
CreateDebugPass(char * output) {
|
||||||
|
return new DebugPass(output);
|
||||||
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// MakeInternalFuncsStaticPass
|
// MakeInternalFuncsStaticPass
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user