Merge branch 'master' into arm

Conflicts:
	Makefile
	builtins.cpp
	ispc.cpp
	ispc.h
	ispc.vcxproj
	opt.cpp
This commit is contained in:
Matt Pharr
2013-08-06 17:09:48 -07:00
15 changed files with 298 additions and 123 deletions

View File

@@ -39,6 +39,10 @@
LLVM_CONFIG=$(shell which llvm-config) LLVM_CONFIG=$(shell which llvm-config)
CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir) CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir)
# Enable ARM by request
# To enable: make ARM_ENABLED=1
ARM_ENABLED=0
# Add llvm bin to the path so any scripts run will go to the right llvm-config # Add llvm bin to the path so any scripts run will go to the right llvm-config
LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir) LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir)
export PATH:=$(LLVM_BIN):$(PATH) export PATH:=$(LLVM_BIN):$(PATH)
@@ -55,12 +59,15 @@ LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//) LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//)
LLVM_VERSION_DEF=-D$(LLVM_VERSION) LLVM_VERSION_DEF=-D$(LLVM_VERSION)
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker arm LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker
# Component "option" was introduced in 3.3 and starting with 3.4 it is required for the link step. # Component "option" was introduced in 3.3 and starting with 3.4 it is required for the link step.
# We check if it's available before adding it (to not break 3.2 and earlier). # We check if it's available before adding it (to not break 3.2 and earlier).
ifeq ($(shell $(LLVM_CONFIG) --components |grep -c option), 1) ifeq ($(shell $(LLVM_CONFIG) --components |grep -c option), 1)
LLVM_COMPONENTS+=option LLVM_COMPONENTS+=option
endif endif
ifneq ($(ARM_ENABLED), 0)
LLVM_COMPONENTS+=arm
endif
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs $(LLVM_COMPONENTS)) LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs $(LLVM_COMPONENTS))
CLANG=clang CLANG=clang
@@ -104,6 +111,9 @@ OPT=-O2
CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -I$(CLANG_INCLUDE) \ CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -I$(CLANG_INCLUDE) \
-Wall $(LLVM_VERSION_DEF) \ -Wall $(LLVM_VERSION_DEF) \
-DBUILD_DATE="\"$(BUILD_DATE)\"" -DBUILD_VERSION="\"$(BUILD_VERSION)\"" -DBUILD_DATE="\"$(BUILD_DATE)\"" -DBUILD_VERSION="\"$(BUILD_VERSION)\""
ifneq ($(ARM_ENABLED), 0)
CXXFLAGS+=-DISPC_ARM_ENABLED
endif
LDFLAGS= LDFLAGS=
ifeq ($(ARCH_OS),Linux) ifeq ($(ARCH_OS),Linux)
@@ -122,10 +132,12 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
type.cpp util.cpp type.cpp util.cpp
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \ HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
opt.h stmt.h sym.h type.h util.h opt.h stmt.h sym.h type.h util.h
TARGETS=neon-32 neon-16 neon-8 \ TARGETS=avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \
avx1 avx1-x2 avx11 avx11-x2 avx2 avx2-x2 \ sse2 sse2-x2 sse4-8 sse4-16 sse4 sse4-x2 \
sse2 sse2-x2 sse4 sse4-x2 sse4-8 sse4-16 \ generic-4 generic-8 generic-16 generic-32 generic-64 generic-1
generic-1 generic-4 generic-8 generic-16 generic-32 generic-64 ifneq ($(ARM_ENABLED), 0)
TARGETS+=neon-32 neon-16 neon-8
endif
# These files need to be compiled in two versions - 32 and 64 bits. # These files need to be compiled in two versions - 32 and 64 bits.
BUILTINS_SRC_TARGET=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) BUILTINS_SRC_TARGET=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS)))
# These are files to be compiled in single version. # These are files to be compiled in single version.
@@ -134,12 +146,12 @@ BUILTINS_OBJS_32=$(addprefix builtins-, $(notdir $(BUILTINS_SRC_TARGET:.ll=-32bi
BUILTINS_OBJS_64=$(addprefix builtins-, $(notdir $(BUILTINS_SRC_TARGET:.ll=-64bit.o))) BUILTINS_OBJS_64=$(addprefix builtins-, $(notdir $(BUILTINS_SRC_TARGET:.ll=-64bit.o)))
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC_COMMON:.ll=.o))) \ BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC_COMMON:.ll=.o))) \
$(BUILTINS_OBJS_32) $(BUILTINS_OBJS_64) \ $(BUILTINS_OBJS_32) $(BUILTINS_OBJS_64) \
builtins-c-32.cpp builtins-c-64.cpp builtins-c-32.cpp builtins-c-64.cpp
BISON_SRC=parse.yy BISON_SRC=parse.yy
FLEX_SRC=lex.ll FLEX_SRC=lex.ll
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_OBJS) \ OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_OBJS) \
stdlib_mask1_ispc.o stdlib_mask8_ispc.o stdlib_mask16_ispc.o stdlib_mask32_ispc.o \ stdlib_mask1_ispc.o stdlib_mask8_ispc.o stdlib_mask16_ispc.o stdlib_mask32_ispc.o \
$(BISON_SRC:.yy=.o) $(FLEX_SRC:.ll=.o)) $(BISON_SRC:.yy=.o) $(FLEX_SRC:.ll=.o))
default: ispc default: ispc
@@ -264,4 +276,3 @@ objs/stdlib_mask32_ispc.cpp: stdlib.ispc
@echo Creating C++ source from $< for mask32 @echo Creating C++ source from $< for mask32
@$(CLANG) -E -x c -DISPC_MASK_BITS=32 -DISPC=1 -DPI=3.1415926536 $< -o - | \ @$(CLANG) -E -x c -DISPC_MASK_BITS=32 -DISPC=1 -DPI=3.1415926536 $< -o - | \
python stdlib2cpp.py mask32 > $@ python stdlib2cpp.py mask32 > $@

View File

@@ -631,7 +631,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
llvm::Triple bcTriple(bcModule->getTargetTriple()); llvm::Triple bcTriple(bcModule->getTargetTriple());
Debug(SourcePos(), "module triple: %s\nbitcode triple: %s\n", Debug(SourcePos(), "module triple: %s\nbitcode triple: %s\n",
mTriple.str().c_str(), bcTriple.str().c_str()); mTriple.str().c_str(), bcTriple.str().c_str());
#ifndef __arm__ #if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
// FIXME: More ugly and dangerous stuff. We really haven't set up // FIXME: More ugly and dangerous stuff. We really haven't set up
// proper build and runtime infrastructure for ispc to do // proper build and runtime infrastructure for ispc to do
// cross-compilation, yet it's at minimum useful to be able to emit // cross-compilation, yet it's at minimum useful to be able to emit
@@ -812,6 +812,8 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
// Next, add the target's custom implementations of the various needed // Next, add the target's custom implementations of the various needed
// builtin functions (e.g. __masked_store_32(), etc). // builtin functions (e.g. __masked_store_32(), etc).
switch (g->target->getISA()) { switch (g->target->getISA()) {
#ifdef ISPC_ARM_ENABLED
case Target::NEON8: { case Target::NEON8: {
if (runtime32) { if (runtime32) {
EXPORT_MODULE(builtins_bitcode_neon_8_32bit); EXPORT_MODULE(builtins_bitcode_neon_8_32bit);
@@ -839,6 +841,7 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
} }
break; break;
} }
#endif
case Target::SSE2: { case Target::SSE2: {
switch (g->target->getVectorWidth()) { switch (g->target->getVectorWidth()) {
case 4: case 4:

View File

@@ -1,7 +1,7 @@
EXAMPLE=mandelbrot EXAMPLE=mandelbrot_tasks
CPP_SRC=mandelbrot.cpp mandelbrot_serial.cpp CPP_SRC=mandelbrot_tasks.cpp mandelbrot_tasks_serial.cpp
ISPC_SRC=mandelbrot.ispc ISPC_SRC=mandelbrot_tasks.ispc
ISPC_IA_TARGETS=sse2,sse4-x2,avx-x2 ISPC_IA_TARGETS=sse2,sse4-x2,avx-x2
ISPC_ARM_TARGETS=neon ISPC_ARM_TARGETS=neon

View File

@@ -42,7 +42,7 @@
#include <algorithm> #include <algorithm>
#include <string.h> #include <string.h>
#include "../timing.h" #include "../timing.h"
#include "mandelbrot_ispc.h" #include "mandelbrot_tasks_ispc.h"
using namespace ispc; using namespace ispc;
extern void mandelbrot_serial(float x0, float y0, float x1, float y1, extern void mandelbrot_serial(float x0, float y0, float x1, float y1,

View File

@@ -21,7 +21,7 @@
<PropertyGroup Label="Globals"> <PropertyGroup Label="Globals">
<ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid> <ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid>
<Keyword>Win32Proj</Keyword> <Keyword>Win32Proj</Keyword>
<RootNamespace>mandelbrot</RootNamespace> <RootNamespace>mandelbrot_tasks</RootNamespace>
</PropertyGroup> </PropertyGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" /> <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
@@ -65,22 +65,22 @@
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<LinkIncremental>true</LinkIncremental> <LinkIncremental>true</LinkIncremental>
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath> <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
<TargetName>mandelbrot</TargetName> <TargetName>mandelbrot_tasks</TargetName>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
<LinkIncremental>true</LinkIncremental> <LinkIncremental>true</LinkIncremental>
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath> <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
<TargetName>mandelbrot</TargetName> <TargetName>mandelbrot_tasks</TargetName>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
<LinkIncremental>false</LinkIncremental> <LinkIncremental>false</LinkIncremental>
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath> <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
<TargetName>mandelbrot</TargetName> <TargetName>mandelbrot_tasks</TargetName>
</PropertyGroup> </PropertyGroup>
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'"> <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
<LinkIncremental>false</LinkIncremental> <LinkIncremental>false</LinkIncremental>
<ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath> <ExecutablePath>$(ProjectDir)..\..;$(ExecutablePath)</ExecutablePath>
<TargetName>mandelbrot</TargetName> <TargetName>mandelbrot_tasks</TargetName>
</PropertyGroup> </PropertyGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'"> <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
<ClCompile> <ClCompile>
@@ -153,12 +153,12 @@
</Link> </Link>
</ItemDefinitionGroup> </ItemDefinitionGroup>
<ItemGroup> <ItemGroup>
<ClCompile Include="mandelbrot.cpp" /> <ClCompile Include="mandelbrot_tasks.cpp" />
<ClCompile Include="mandelbrot_serial.cpp" /> <ClCompile Include="mandelbrot_tasks_serial.cpp" />
<ClCompile Include="../tasksys.cpp" /> <ClCompile Include="../tasksys.cpp" />
</ItemGroup> </ItemGroup>
<ItemGroup> <ItemGroup>
<CustomBuild Include="mandelbrot.ispc"> <CustomBuild Include="mandelbrot_tasks.ispc">
<FileType>Document</FileType> <FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2 <Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc -O2 %(Filename).ispc -o $(TargetDir)%(Filename).obj -h $(TargetDir)%(Filename)_ispc.h --arch=x86 --target=sse2,sse4-x2,avx-x2
</Command> </Command>

View File

@@ -22,7 +22,7 @@ mandelbrot
#*** #***
Mandelbrot Set Mandelbrot Set
mandelbrot_tasks mandelbrot_tasks
mandelbrot mandelbrot_tasks
^ ^
#*** #***
Perlin Noise Function Perlin Noise Function

View File

@@ -73,10 +73,19 @@ def cpu_get():
#returns cpu_usage #returns cpu_usage
def cpu_check(): def cpu_check():
if is_windows == False: if is_windows == False:
cpu1 = cpu_get() if is_mac == False:
time.sleep(1) cpu1 = cpu_get()
cpu2 = cpu_get() time.sleep(1)
cpu_percent = (float(cpu1[0] - cpu2[0])/float(cpu1[1] - cpu2[1]))*100 cpu2 = cpu_get()
cpu_percent = (float(cpu1[0] - cpu2[0])/float(cpu1[1] - cpu2[1]))*100
else:
os.system("sysctl -n vm.loadavg > cpu_temp")
c = open("cpu_temp", 'r')
c_line = c.readline()
c.close
os.remove("cpu_temp")
R = c_line.split(' ')
cpu_percent = float(R[1]) * 3
else: else:
os.system("wmic cpu get loadpercentage /value > cpu_temp") os.system("wmic cpu get loadpercentage /value > cpu_temp")
c = open("cpu_temp", 'r') c = open("cpu_temp", 'r')
@@ -143,6 +152,8 @@ parser.add_option('-p', '--path', dest='path',
global is_windows global is_windows
is_windows = (platform.system() == 'Windows' or is_windows = (platform.system() == 'Windows' or
'CYGWIN_NT' in platform.system()) 'CYGWIN_NT' in platform.system())
global is_mac
is_mac = (platform.system() == 'Darwin')
# save corrent path # save corrent path
pwd = os.getcwd() pwd = os.getcwd()

View File

@@ -365,7 +365,7 @@ lAtomicCompareAndSwap32(volatile int32_t *v, int32_t newValue, int32_t oldValue)
static inline int32_t static inline int32_t
lAtomicAdd(volatile int32_t *v, int32_t delta) { lAtomicAdd(volatile int32_t *v, int32_t delta) {
#ifdef ISPC_IS_WINDOWS #ifdef ISPC_IS_WINDOWS
return InterlockedAdd((volatile LONG *)v, delta); return InterlockedExchangeAdd((volatile LONG *)v, delta)+delta;
#else #else
return __sync_fetch_and_add(v, delta); return __sync_fetch_and_add(v, delta);
#endif #endif

View File

@@ -141,10 +141,12 @@ lGetSystemISA() {
static const char *supportedCPUs[] = { static const char *supportedCPUs[] = {
#ifdef ISPC_ARM_ENABLED
// FIXME: LLVM supports a ton of different ARM CPU variants--not just // FIXME: LLVM supports a ton of different ARM CPU variants--not just
// cortex-a9 and a15. We should be able to handle any of them that also // cortex-a9 and a15. We should be able to handle any of them that also
// have NEON support. // have NEON support.
"cortex-a9", "cortex-a15", "cortex-a9", "cortex-a15",
#endif
"atom", "penryn", "core2", "corei7", "corei7-avx" "atom", "penryn", "core2", "corei7", "corei7-avx"
#if !defined(LLVM_3_1) #if !defined(LLVM_3_1)
, "core-avx-i", "core-avx2" , "core-avx-i", "core-avx2"
@@ -185,9 +187,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
// possible ISA based on that. // possible ISA based on that.
if (!strcmp(cpu, "core-avx2")) if (!strcmp(cpu, "core-avx2"))
isa = "avx2"; isa = "avx2";
#ifdef ISPC_ARM_ENABLED
else if (!strcmp(cpu, "cortex-a9") || else if (!strcmp(cpu, "cortex-a9") ||
!strcmp(cpu, "cortex-a15")) !strcmp(cpu, "cortex-a15"))
isa = "neon-32"; isa = "neon-32";
#endif
else if (!strcmp(cpu, "core-avx-i")) else if (!strcmp(cpu, "core-avx-i"))
isa = "avx1.1"; isa = "avx1.1";
else if (!strcmp(cpu, "sandybridge") || else if (!strcmp(cpu, "sandybridge") ||
@@ -211,7 +215,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
} }
} }
#if !defined(__arm__) #if defined(ISPC_ARM_ENABLED) && !defined(__arm__)
if (cpu == NULL && !strncmp(isa, "neon", 4)) if (cpu == NULL && !strncmp(isa, "neon", 4))
// If we're compiling NEON on an x86 host and the CPU wasn't // If we're compiling NEON on an x86 host and the CPU wasn't
// supplied, don't go and set the CPU based on the host... // supplied, don't go and set the CPU based on the host...
@@ -246,9 +250,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_cpu = cpu; this->m_cpu = cpu;
if (arch == NULL) { if (arch == NULL) {
#ifdef ISPC_ARM_ENABLED
if (!strncmp(isa, "neon", 4)) if (!strncmp(isa, "neon", 4))
arch = "arm"; arch = "arm";
else else
#endif
arch = "x86-64"; arch = "x86-64";
} }
@@ -461,6 +467,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_hasGather = true; this->m_hasGather = true;
#endif #endif
} }
#ifdef ISPC_ARM_ENABLED
else if (!strcasecmp(isa, "neon-8")) { else if (!strcasecmp(isa, "neon-8")) {
this->m_isa = Target::NEON8; this->m_isa = Target::NEON8;
this->m_nativeVectorWidth = 16; this->m_nativeVectorWidth = 16;
@@ -488,6 +495,7 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
this->m_maskingIsFree = false; this->m_maskingIsFree = false;
this->m_maskBitCount = 32; this->m_maskBitCount = 32;
} }
#endif
else { else {
fprintf(stderr, "Target ISA \"%s\" is unknown. Choices are: %s\n", fprintf(stderr, "Target ISA \"%s\" is unknown. Choices are: %s\n",
isa, SupportedTargetISAs()); isa, SupportedTargetISAs());
@@ -502,9 +510,11 @@ Target::Target(const char *arch, const char *cpu, const char *isa, bool pic) :
llvm::Reloc::Default; llvm::Reloc::Default;
std::string featuresString = m_attributes; std::string featuresString = m_attributes;
llvm::TargetOptions options; llvm::TargetOptions options;
#ifdef ISPC_ARM_ENABLED
if (m_isa == Target::NEON8 || m_isa == Target::NEON16 || if (m_isa == Target::NEON8 || m_isa == Target::NEON16 ||
m_isa == Target::NEON32) m_isa == Target::NEON32)
options.FloatABIType = llvm::FloatABI::Hard; options.FloatABIType = llvm::FloatABI::Hard;
#endif
#if !defined(LLVM_3_1) #if !defined(LLVM_3_1)
if (g->opt.disableFMA == false) if (g->opt.disableFMA == false)
options.AllowFPOpFusion = llvm::FPOpFusion::Fast; options.AllowFPOpFusion = llvm::FPOpFusion::Fast;
@@ -596,13 +606,21 @@ Target::SupportedTargetCPUs() {
const char * const char *
Target::SupportedTargetArchs() { Target::SupportedTargetArchs() {
return "arm, x86, x86-64"; return
#ifdef ISPC_ARM_ENABLED
"arm, "
#endif
"x86, x86-64";
} }
const char * const char *
Target::SupportedTargetISAs() { Target::SupportedTargetISAs() {
return "neon-8, neon-16, neon-32, sse2, sse2-x2, sse4, sse4-8, sse4-16, sse4-x2, " return
#ifdef ISPC_ARM_ENABLED
"neon-8, neon-16, neon-32, "
#endif
"sse2, sse2-x2, sse4, sse4-8, sse4-16, sse4-x2, "
"avx, avx-x2, avx1.1, avx1.1-x2, avx2, avx2-x2, " "avx, avx-x2, avx1.1, avx1.1-x2, avx2, avx2-x2, "
"generic-1, generic-4, generic-8, generic-16, generic-32"; "generic-1, generic-4, generic-8, generic-16, generic-32";
} }
@@ -611,10 +629,13 @@ Target::SupportedTargetISAs() {
std::string std::string
Target::GetTripleString() const { Target::GetTripleString() const {
llvm::Triple triple; llvm::Triple triple;
#ifdef ISPC_ARM_ENABLED
if (m_arch == "arm") { if (m_arch == "arm") {
triple.setTriple("armv7-eabi"); triple.setTriple("armv7-eabi");
} }
else { else
#endif
{
// Start with the host triple as the default // Start with the host triple as the default
triple.setTriple(llvm::sys::getDefaultTargetTriple()); triple.setTriple(llvm::sys::getDefaultTargetTriple());
@@ -637,12 +658,14 @@ Target::GetTripleString() const {
const char * const char *
Target::ISAToString(ISA isa) { Target::ISAToString(ISA isa) {
switch (isa) { switch (isa) {
#ifdef ISPC_ARM_ENABLED
case Target::NEON8: case Target::NEON8:
return "neon-8"; return "neon-8";
case Target::NEON16: case Target::NEON16:
return "neon-16"; return "neon-16";
case Target::NEON32: case Target::NEON32:
return "neon-32"; return "neon-32";
#endif
case Target::SSE2: case Target::SSE2:
return "sse2"; return "sse2";
case Target::SSE4: case Target::SSE4:
@@ -813,6 +836,7 @@ Globals::Globals() {
includeStdlib = true; includeStdlib = true;
runCPP = true; runCPP = true;
debugPrint = false; debugPrint = false;
debugIR = -1;
disableWarnings = false; disableWarnings = false;
warningsAsErrors = false; warningsAsErrors = false;
quiet = false; quiet = false;

20
ispc.h
View File

@@ -59,6 +59,7 @@
#include <stdlib.h> #include <stdlib.h>
#include <stdio.h> #include <stdio.h>
#include <vector> #include <vector>
#include <set>
#include <string> #include <string>
/** @def ISPC_MAX_NVEC maximum vector size of any of the compliation /** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
@@ -66,6 +67,9 @@
*/ */
#define ISPC_MAX_NVEC 64 #define ISPC_MAX_NVEC 64
// Number of final optimization phase
#define LAST_OPT_NUMBER 1000
// Forward declarations of a number of widely-used LLVM types // Forward declarations of a number of widely-used LLVM types
namespace llvm { namespace llvm {
class AttributeSet; class AttributeSet;
@@ -175,7 +179,11 @@ public:
flexible/performant of them will apear last in the enumerant. Note flexible/performant of them will apear last in the enumerant. Note
also that __best_available_isa() needs to be updated if ISAs are also that __best_available_isa() needs to be updated if ISAs are
added or the enumerant values are reordered. */ added or the enumerant values are reordered. */
enum ISA { NEON32, NEON16, NEON8, SSE2, SSE4, AVX, AVX11, AVX2, GENERIC, enum ISA {
#ifdef ISPC_ARM_ENABLED
NEON32, NEON16, NEON8,
#endif
SSE2, SSE4, AVX, AVX11, AVX2, GENERIC,
NUM_ISAS }; NUM_ISAS };
/** Initializes the given Target pointer for a target of the given /** Initializes the given Target pointer for a target of the given
@@ -495,6 +503,16 @@ struct Globals {
ispc's execution. */ ispc's execution. */
bool debugPrint; bool debugPrint;
/** Indicates which stages of optimization we want to dump. */
std::set<int> debug_stages;
/** Indicates after which optimization we want to generate
DebugIR information. */
int debugIR;
/** Indicates which phases of optimization we want to switch off. */
std::set<int> off_stages;
/** Indicates whether all warning messages should be surpressed. */ /** Indicates whether all warning messages should be surpressed. */
bool disableWarnings; bool disableWarnings;

View File

@@ -1,4 +1,4 @@
<?xml version="1.0" encoding="utf-8"?> <?xml version="1.0" encoding="utf-8"?>
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003"> <Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Label="ProjectConfigurations"> <ItemGroup Label="ProjectConfigurations">
<ProjectConfiguration Include="Debug|Win32"> <ProjectConfiguration Include="Debug|Win32">
@@ -45,12 +45,6 @@
<ClCompile Include="$(Configuration)\gen-bitcode-generic-32-64bit.cpp" /> <ClCompile Include="$(Configuration)\gen-bitcode-generic-32-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-generic-64-32bit.cpp" /> <ClCompile Include="$(Configuration)\gen-bitcode-generic-64-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-generic-64-64bit.cpp" /> <ClCompile Include="$(Configuration)\gen-bitcode-generic-64-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-neon-8-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-neon-8-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-neon-16-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-neon-16-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-neon-32-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-neon-32-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-sse2-32bit.cpp" /> <ClCompile Include="$(Configuration)\gen-bitcode-sse2-32bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-sse2-64bit.cpp" /> <ClCompile Include="$(Configuration)\gen-bitcode-sse2-64bit.cpp" />
<ClCompile Include="$(Configuration)\gen-bitcode-sse2-x2-32bit.cpp" /> <ClCompile Include="$(Configuration)\gen-bitcode-sse2-x2-32bit.cpp" />
@@ -191,60 +185,6 @@
<Message>Building gen-bitcode-sse2-x2-64bit.cpp</Message> <Message>Building gen-bitcode-sse2-x2-64bit.cpp</Message>
</CustomBuild> </CustomBuild>
</ItemGroup> </ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-neon-8.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-neon-8.ll | python bitcode2cpp.py builtins\target-neon-8.ll 32bit &gt; $(Configuration)/gen-bitcode-neon-8-32bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-neon-8-32bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\target-neon-common.ll</AdditionalInputs>
<Message>Building gen-bitcode-neon-8-32bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-neon-8.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-neon-8.ll | python bitcode2cpp.py builtins\target-neon-8.ll 64bit &gt; $(Configuration)/gen-bitcode-neon-8-64bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-neon-8-64bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\target-neon-common.ll</AdditionalInputs>
<Message>Building gen-bitcode-neon-8-64bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-neon-16.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-neon-16.ll | python bitcode2cpp.py builtins\target-neon-16.ll 32bit &gt; $(Configuration)/gen-bitcode-neon-16-32bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-neon-16-32bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\target-neon-common.ll</AdditionalInputs>
<Message>Building gen-bitcode-neon-16-32bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-neon-16.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-neon-16.ll | python bitcode2cpp.py builtins\target-neon-16.ll 64bit &gt; $(Configuration)/gen-bitcode-neon-16-64bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-neon-16-64bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\target-neon-common.ll</AdditionalInputs>
<Message>Building gen-bitcode-neon-16-64bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-neon-32.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-neon-32.ll | python bitcode2cpp.py builtins\target-neon-32.ll 32bit &gt; $(Configuration)/gen-bitcode-neon-32-32bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-neon-32-32bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\target-neon-common.ll</AdditionalInputs>
<Message>Building gen-bitcode-neon-32-32bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-neon-32.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-neon-32.ll | python bitcode2cpp.py builtins\target-neon-32.ll 64bit &gt; $(Configuration)/gen-bitcode-neon-32-64bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-neon-32-64bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\target-neon-common.ll</AdditionalInputs>
<Message>Building gen-bitcode-neon-32-64bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup> <ItemGroup>
<CustomBuild Include="builtins\target-avx1.ll"> <CustomBuild Include="builtins\target-avx1.ll">
<FileType>Document</FileType> <FileType>Document</FileType>
@@ -263,6 +203,26 @@
<Message>Building gen-bitcode-avx1-64bit.cpp</Message> <Message>Building gen-bitcode-avx1-64bit.cpp</Message>
</CustomBuild> </CustomBuild>
</ItemGroup> </ItemGroup>
=======
<ItemGroup>
<CustomBuild Include="builtins\target-avx1.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=32 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 32bit &gt; $(Configuration)/gen-bitcode-avx1-32bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-avx1-32bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
<Message>Building gen-bitcode-avx1-32bit.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-avx1.ll">
<FileType>Document</FileType>
<Command>m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% -DBUILD_OS=WINDOWS -DRUNTIME=64 builtins/target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll 64bit &gt; $(Configuration)/gen-bitcode-avx1-64bit.cpp</Command>
<Outputs>$(Configuration)/gen-bitcode-avx1-64bit.cpp</Outputs>
<AdditionalInputs>builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll</AdditionalInputs>
<Message>Building gen-bitcode-avx1-64bit.cpp</Message>
</CustomBuild>
</ItemGroup>
>>>>>>> master
<ItemGroup> <ItemGroup>
<CustomBuild Include="builtins\target-avx1-x2.ll"> <CustomBuild Include="builtins\target-avx1-x2.ll">
<FileType>Document</FileType> <FileType>Document</FileType>

View File

@@ -155,6 +155,11 @@ devUsage(int ret) {
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n"); printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n"); printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
printf(" [--yydebug]\t\t\t\tPrint debugging information during parsing\n"); printf(" [--yydebug]\t\t\t\tPrint debugging information during parsing\n");
printf(" [--debug-phase=<value>]\t\tSet optimization phases to dump. --debug-phase=first,210:220,300,305,310:last\n");
#ifdef LLVM_3_4
printf(" [--debug-ir=<value>]\t\tSet optimization phase to generate debugIR after it\n");
#endif
printf(" [--off-phase=<value>]\t\tSwitch off optimization phases. --off-phase=first,210:220,300,305,310:last\n");
exit(ret); exit(ret);
} }
@@ -211,6 +216,47 @@ lSignal(void *) {
} }
static int ParsingPhaseName(char * stage) {
if (strncmp(stage, "first", 5) == 0) {
return 0;
}
else if (strncmp(stage, "last", 4) == 0) {
return LAST_OPT_NUMBER;
}
else {
int t = atoi(stage);
if (t < 0 || t > LAST_OPT_NUMBER) {
fprintf(stderr, "Phases must be from 0 to %d. %s is incorrect.\n", LAST_OPT_NUMBER, stage);
exit(0);
}
else {
return t;
}
}
}
static std::set<int> ParsingPhases(char * stages) {
std::set<int> phases;
int begin = ParsingPhaseName(stages);
int end = begin;
for (unsigned i = 0; i < strlen(stages); i++) {
if ((stages[i] == ',') || (i == strlen(stages) - 1)) {
for (int j = begin; j < end + 1; j++) {
phases.insert(j);
}
begin = ParsingPhaseName(stages + i + 1);
end = begin;
}
else if (stages[i] == ':') {
end = ParsingPhaseName(stages + i + 1);
}
}
return phases;
}
static void static void
lParseInclude(const char *path) { lParseInclude(const char *path) {
#ifdef ISPC_IS_WINDOWS #ifdef ISPC_IS_WINDOWS
@@ -253,6 +299,8 @@ int main(int Argc, char *Argv[]) {
LLVMInitializeX86Disassembler(); LLVMInitializeX86Disassembler();
LLVMInitializeX86TargetMC(); LLVMInitializeX86TargetMC();
#endif // !__ARM__ #endif // !__ARM__
#ifdef ISPC_ARM_ENABLED
// Generating ARM from x86 is more likely to be useful, though. // Generating ARM from x86 is more likely to be useful, though.
LLVMInitializeARMTargetInfo(); LLVMInitializeARMTargetInfo();
LLVMInitializeARMTarget(); LLVMInitializeARMTarget();
@@ -260,6 +308,7 @@ int main(int Argc, char *Argv[]) {
LLVMInitializeARMAsmParser(); LLVMInitializeARMAsmParser();
LLVMInitializeARMDisassembler(); LLVMInitializeARMDisassembler();
LLVMInitializeARMTargetMC(); LLVMInitializeARMTargetMC();
#endif
char *file = NULL; char *file = NULL;
const char *headerFileName = NULL; const char *headerFileName = NULL;
@@ -486,6 +535,20 @@ int main(int Argc, char *Argv[]) {
} }
hostStubFileName = argv[i]; hostStubFileName = argv[i];
} }
else if (strncmp(argv[i], "--debug-phase=", 14) == 0) {
fprintf(stderr, "WARNING: Adding debug phases may change the way PassManager"
"handles the phases and it may possibly make some bugs go"
"away or introduce the new ones.\n");
g->debug_stages = ParsingPhases(argv[i] + strlen("--debug-phase="));
}
#ifdef LLVM_3_4
else if (strncmp(argv[i], "--debug-ir=", 11) == 0) {
g->debugIR = ParsingPhaseName(argv[i] + strlen("--debug-ir="));
}
#endif
else if (strncmp(argv[i], "--off-phase=", 12) == 0) {
g->off_stages = ParsingPhases(argv[i] + strlen("--off-phase="));
}
else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) { else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) {
lPrintVersion(); lPrintVersion();
return 0; return 0;

145
opt.cpp
View File

@@ -63,6 +63,9 @@
#include <llvm/IR/BasicBlock.h> #include <llvm/IR/BasicBlock.h>
#include <llvm/IR/Constants.h> #include <llvm/IR/Constants.h>
#endif #endif
#if defined (LLVM_3_4)
#include <llvm/Transforms/Instrumentation.h>
#endif
#include <llvm/PassManager.h> #include <llvm/PassManager.h>
#include <llvm/PassRegistry.h> #include <llvm/PassRegistry.h>
#include <llvm/Assembly/PrintModulePass.h> #include <llvm/Assembly/PrintModulePass.h>
@@ -119,6 +122,8 @@ static llvm::Pass *CreateReplacePseudoMemoryOpsPass();
static llvm::Pass *CreateIsCompileTimeConstantPass(bool isLastTry); static llvm::Pass *CreateIsCompileTimeConstantPass(bool isLastTry);
static llvm::Pass *CreateMakeInternalFuncsStaticPass(); static llvm::Pass *CreateMakeInternalFuncsStaticPass();
static llvm::Pass *CreateDebugPass(char * output);
#define DEBUG_START_PASS(NAME) \ #define DEBUG_START_PASS(NAME) \
if (g->debugPrint && \ if (g->debugPrint && \
(getenv("FUNC") == NULL || \ (getenv("FUNC") == NULL || \
@@ -395,6 +400,54 @@ lGetMaskStatus(llvm::Value *mask, int vecWidth = -1) {
} }
///////////////////////////////////////////////////////////////////////////
// This is a wrap over class llvm::PassManager. This duplicates PassManager function run()
// and change PassManager function add by adding some checks and debug passes.
// This wrap can control:
// - If we want to switch off optimization with given number.
// - If we want to dump LLVM IR after optimization with given number.
// - If we want to generate LLVM IR debug for gdb after optimization with given number.
class DebugPassManager {
public:
DebugPassManager():number(0){}
void add(llvm::Pass * P, int stage);
bool run(llvm::Module& M) {return PM.run(M);}
llvm::PassManager& getPM() {return PM;}
private:
llvm::PassManager PM;
int number;
};
void
DebugPassManager::add(llvm::Pass * P, int stage = -1) {
// taking number of optimization
if (stage == -1) {
number++;
}
else {
number = stage;
}
if (g->off_stages.find(number) == g->off_stages.end()) {
// adding optimization (not switched off)
PM.add(P);
if (g->debug_stages.find(number) != g->debug_stages.end()) {
// adding dump of LLVM IR after optimization
char buf[100];
sprintf(buf, "\n\n*****LLVM IR after phase %d: %s*****\n\n",
number, P->getPassName());
PM.add(CreateDebugPass(buf));
}
#ifdef LLVM_3_4
if (g->debugIR == number) {
// adding generating of LLVM IR debug after optimization
char buf[100];
sprintf(buf, "Debug_IR_after_%d_phase.bc", number);
PM.add(llvm::createDebugIRPass(true, true, ".", buf));
}
#endif
}
}
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
void void
@@ -403,14 +456,8 @@ Optimize(llvm::Module *module, int optLevel) {
printf("*** Code going into optimization ***\n"); printf("*** Code going into optimization ***\n");
module->dump(); module->dump();
} }
DebugPassManager optPM;
llvm::PassManager optPM; optPM.add(llvm::createVerifierPass(),0);
optPM.add(llvm::createVerifierPass());
#if 0
std::string err;
optPM.add(llvm::createPrintModulePass(new llvm::raw_fd_ostream("-", err)));
#endif
llvm::TargetLibraryInfo *targetLibraryInfo = llvm::TargetLibraryInfo *targetLibraryInfo =
new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple())); new llvm::TargetLibraryInfo(llvm::Triple(module->getTargetTriple()));
@@ -427,7 +474,7 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(new llvm::TargetTransformInfo(targetMachine->getScalarTargetTransformInfo(), optPM.add(new llvm::TargetTransformInfo(targetMachine->getScalarTargetTransformInfo(),
targetMachine->getVectorTargetTransformInfo())); targetMachine->getVectorTargetTransformInfo()));
#else // LLVM 3.3+ #else // LLVM 3.3+
targetMachine->addAnalysisPasses(optPM); targetMachine->addAnalysisPasses(optPM.getPM());
#endif #endif
#endif #endif
@@ -439,11 +486,11 @@ Optimize(llvm::Module *module, int optLevel) {
// run absolutely no optimizations, since the front-end needs us to // run absolutely no optimizations, since the front-end needs us to
// take the various __pseudo_* functions it has emitted and turn // take the various __pseudo_* functions it has emitted and turn
// them into something that can actually execute. // them into something that can actually execute.
optPM.add(CreateImproveMemoryOpsPass()); optPM.add(CreateImproveMemoryOpsPass(), 100);
if (g->opt.disableHandlePseudoMemoryOps == false) if (g->opt.disableHandlePseudoMemoryOps == false)
optPM.add(CreateReplacePseudoMemoryOpsPass()); optPM.add(CreateReplacePseudoMemoryOpsPass());
optPM.add(CreateIntrinsicsOptPass()); optPM.add(CreateIntrinsicsOptPass(), 102);
optPM.add(CreateIsCompileTimeConstantPass(true)); optPM.add(CreateIsCompileTimeConstantPass(true));
optPM.add(llvm::createFunctionInliningPass()); optPM.add(llvm::createFunctionInliningPass());
optPM.add(CreateMakeInternalFuncsStaticPass()); optPM.add(CreateMakeInternalFuncsStaticPass());
@@ -462,7 +509,7 @@ Optimize(llvm::Module *module, int optLevel) {
llvm::initializeInstrumentation(*registry); llvm::initializeInstrumentation(*registry);
llvm::initializeTarget(*registry); llvm::initializeTarget(*registry);
optPM.add(llvm::createGlobalDCEPass()); optPM.add(llvm::createGlobalDCEPass(), 200);
// Early optimizations to try to reduce the total amount of code to // Early optimizations to try to reduce the total amount of code to
// work with if we can // work with if we can
@@ -476,14 +523,14 @@ Optimize(llvm::Module *module, int optLevel) {
if (g->opt.disableGatherScatterOptimizations == false && if (g->opt.disableGatherScatterOptimizations == false &&
g->target->getVectorWidth() > 1) { g->target->getVectorWidth() > 1) {
optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createInstructionCombiningPass(), 210);
optPM.add(CreateImproveMemoryOpsPass()); optPM.add(CreateImproveMemoryOpsPass());
} }
if (!g->opt.disableMaskAllOnOptimizations) { if (!g->opt.disableMaskAllOnOptimizations) {
optPM.add(CreateIntrinsicsOptPass()); optPM.add(CreateIntrinsicsOptPass(), 215);
optPM.add(CreateInstructionSimplifyPass()); optPM.add(CreateInstructionSimplifyPass());
} }
optPM.add(llvm::createDeadInstEliminationPass()); optPM.add(llvm::createDeadInstEliminationPass(), 220);
// Max struct size threshold for scalar replacement is // Max struct size threshold for scalar replacement is
// 1) 4 fields (r,g,b,w) // 1) 4 fields (r,g,b,w)
@@ -513,10 +560,10 @@ Optimize(llvm::Module *module, int optLevel) {
#if defined(LLVM_3_1) || defined(LLVM_3_2) || defined(LLVM_3_3) #if defined(LLVM_3_1) || defined(LLVM_3_2) || defined(LLVM_3_3)
// Starting from 3.4 this functionality was moved to // Starting from 3.4 this functionality was moved to
// InstructionCombiningPass. See r184459 for details. // InstructionCombiningPass. See r184459 for details.
optPM.add(llvm::createSimplifyLibCallsPass()); optPM.add(llvm::createSimplifyLibCallsPass(), 240);
#endif #endif
optPM.add(llvm::createAggressiveDCEPass()); optPM.add(llvm::createAggressiveDCEPass());
optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createInstructionCombiningPass(), 241);
optPM.add(llvm::createJumpThreadingPass()); optPM.add(llvm::createJumpThreadingPass());
optPM.add(llvm::createCFGSimplificationPass()); optPM.add(llvm::createCFGSimplificationPass());
optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold)); optPM.add(llvm::createScalarReplAggregatesPass(sr_threshold));
@@ -524,44 +571,45 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createTailCallEliminationPass()); optPM.add(llvm::createTailCallEliminationPass());
if (!g->opt.disableMaskAllOnOptimizations) { if (!g->opt.disableMaskAllOnOptimizations) {
optPM.add(CreateIntrinsicsOptPass()); optPM.add(CreateIntrinsicsOptPass(), 250);
optPM.add(CreateInstructionSimplifyPass()); optPM.add(CreateInstructionSimplifyPass());
} }
if (g->opt.disableGatherScatterOptimizations == false && if (g->opt.disableGatherScatterOptimizations == false &&
g->target->getVectorWidth() > 1) { g->target->getVectorWidth() > 1) {
optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createInstructionCombiningPass(), 255);
optPM.add(CreateImproveMemoryOpsPass()); optPM.add(CreateImproveMemoryOpsPass());
if (g->opt.disableCoalescing == false && if (g->opt.disableCoalescing == false &&
g->target->getISA() != Target::GENERIC) { g->target->getISA() != Target::GENERIC) {
// It is important to run this here to make it easier to // It is important to run this here to make it easier to
// finding matching gathers we can coalesce.. // finding matching gathers we can coalesce..
optPM.add(llvm::createEarlyCSEPass()); optPM.add(llvm::createEarlyCSEPass(), 260);
optPM.add(CreateGatherCoalescePass()); optPM.add(CreateGatherCoalescePass());
} }
} }
optPM.add(llvm::createFunctionInliningPass()); optPM.add(llvm::createFunctionInliningPass(), 265);
optPM.add(llvm::createConstantPropagationPass()); optPM.add(llvm::createConstantPropagationPass());
optPM.add(CreateIntrinsicsOptPass()); optPM.add(CreateIntrinsicsOptPass());
optPM.add(CreateInstructionSimplifyPass()); optPM.add(CreateInstructionSimplifyPass());
if (g->opt.disableGatherScatterOptimizations == false && if (g->opt.disableGatherScatterOptimizations == false &&
g->target->getVectorWidth() > 1) { g->target->getVectorWidth() > 1) {
optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createInstructionCombiningPass(), 270);
optPM.add(CreateImproveMemoryOpsPass()); optPM.add(CreateImproveMemoryOpsPass());
} }
optPM.add(llvm::createIPSCCPPass()); optPM.add(llvm::createIPSCCPPass(), 275);
optPM.add(llvm::createDeadArgEliminationPass()); optPM.add(llvm::createDeadArgEliminationPass());
optPM.add(llvm::createAggressiveDCEPass()); optPM.add(llvm::createAggressiveDCEPass());
optPM.add(llvm::createInstructionCombiningPass()); optPM.add(llvm::createInstructionCombiningPass());
optPM.add(llvm::createCFGSimplificationPass()); optPM.add(llvm::createCFGSimplificationPass());
if (g->opt.disableHandlePseudoMemoryOps == false) if (g->opt.disableHandlePseudoMemoryOps == false) {
optPM.add(CreateReplacePseudoMemoryOpsPass()); optPM.add(CreateReplacePseudoMemoryOpsPass(),280);
optPM.add(CreateIntrinsicsOptPass()); }
optPM.add(CreateIntrinsicsOptPass(),281);
optPM.add(CreateInstructionSimplifyPass()); optPM.add(CreateInstructionSimplifyPass());
optPM.add(llvm::createFunctionInliningPass()); optPM.add(llvm::createFunctionInliningPass());
@@ -579,9 +627,10 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createIndVarSimplifyPass()); optPM.add(llvm::createIndVarSimplifyPass());
optPM.add(llvm::createLoopIdiomPass()); optPM.add(llvm::createLoopIdiomPass());
optPM.add(llvm::createLoopDeletionPass()); optPM.add(llvm::createLoopDeletionPass());
if (g->opt.unrollLoops) if (g->opt.unrollLoops) {
optPM.add(llvm::createLoopUnrollPass()); optPM.add(llvm::createLoopUnrollPass(), 300);
optPM.add(llvm::createGVNPass()); }
optPM.add(llvm::createGVNPass(), 301);
optPM.add(CreateIsCompileTimeConstantPass(true)); optPM.add(CreateIsCompileTimeConstantPass(true));
optPM.add(CreateIntrinsicsOptPass()); optPM.add(CreateIntrinsicsOptPass());
@@ -609,7 +658,7 @@ Optimize(llvm::Module *module, int optLevel) {
// Finish up by making sure we didn't mess anything up in the IR along // Finish up by making sure we didn't mess anything up in the IR along
// the way. // the way.
optPM.add(llvm::createVerifierPass()); optPM.add(llvm::createVerifierPass(), LAST_OPT_NUMBER);
optPM.run(*module); optPM.run(*module);
if (g->debugPrint) { if (g->debugPrint) {
@@ -4330,6 +4379,42 @@ CreateIsCompileTimeConstantPass(bool isLastTry) {
return new IsCompileTimeConstantPass(isLastTry); return new IsCompileTimeConstantPass(isLastTry);
} }
//////////////////////////////////////////////////////////////////////////
// DebugPass
/** This pass is added in list of passes after optimizations which
we want to debug and print dump of LLVM IR in stderr. Also it
prints name and number of previous optimization.
*/
class DebugPass : public llvm::ModulePass {
public:
static char ID;
DebugPass(char * output) : ModulePass(ID) {
sprintf(str_output, "%s", output);
}
const char *getPassName() const { return "Dump LLVM IR"; }
bool runOnModule(llvm::Module &m);
private:
char str_output[100];
};
char DebugPass::ID = 0;
bool
DebugPass::runOnModule(llvm::Module &module) {
fprintf(stderr, "%s", str_output);
fflush(stderr);
module.dump();
return true;
}
static llvm::Pass *
CreateDebugPass(char * output) {
return new DebugPass(output);
}
/////////////////////////////////////////////////////////////////////////// ///////////////////////////////////////////////////////////////////////////
// MakeInternalFuncsStaticPass // MakeInternalFuncsStaticPass