Compare commits
26 Commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
594485c38c | ||
|
|
d52e2d5a8d | ||
|
|
fc66066d4d | ||
|
|
6169338815 | ||
|
|
6bc8cb1ff1 | ||
|
|
0fc49b1c37 | ||
|
|
9fb981e9a0 | ||
|
|
cba1b3cedd | ||
|
|
12c4512932 | ||
|
|
0dd1dbb568 | ||
|
|
fdcec5a219 | ||
|
|
bebab7ab0d | ||
|
|
fb771b6aa3 | ||
|
|
8156559475 | ||
|
|
9f5e51cd01 | ||
|
|
27daab2f1b | ||
|
|
c4d404b15f | ||
|
|
95fcdc36ee | ||
|
|
2fdaba53c1 | ||
|
|
5c89080469 | ||
|
|
d92f9df17c | ||
|
|
f551390420 | ||
|
|
8642b4d89f | ||
|
|
6fb70c307d | ||
|
|
d08346fbcf | ||
|
|
141d240a91 |
@@ -1,4 +1,4 @@
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2013, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -77,7 +77,7 @@ covered by the following license:
|
||||
University of Illinois/NCSA
|
||||
Open Source License
|
||||
|
||||
Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
|
||||
Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.
|
||||
All rights reserved.
|
||||
|
||||
Developed by:
|
||||
|
||||
16
Makefile
16
Makefile
@@ -51,7 +51,17 @@ else
|
||||
endif
|
||||
ARCH_TYPE = $(shell arch)
|
||||
|
||||
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs engine ipo bitreader bitwriter instrumentation linker)
|
||||
LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
|
||||
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//)
|
||||
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
||||
|
||||
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker
|
||||
# Component "option" was introduced in 3.3 and starting with 3.4 it is required for the link step.
|
||||
# We check if it's available before adding it (to not break 3.2 and earlier).
|
||||
ifeq ($(shell $(LLVM_CONFIG) --components |grep -c option), 1)
|
||||
LLVM_COMPONENTS+=option
|
||||
endif
|
||||
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs $(LLVM_COMPONENTS))
|
||||
|
||||
CLANG=clang
|
||||
CLANG_LIBS = -lclangFrontend -lclangDriver \
|
||||
@@ -70,10 +80,6 @@ ifeq ($(ARCH_OS2),Msys)
|
||||
ISPC_LIBS += -lshlwapi -limagehlp -lpsapi
|
||||
endif
|
||||
|
||||
LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
|
||||
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//)
|
||||
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
||||
|
||||
# Define build time stamp and revision.
|
||||
# For revision we use GIT or SVN info.
|
||||
BUILD_DATE=$(shell date +%Y%m%d)
|
||||
|
||||
@@ -45,7 +45,7 @@ for i in range(0, len(data), 1):
|
||||
sys.stdout.write("\n")
|
||||
|
||||
sys.stdout.write("0x00 };\n\n")
|
||||
sys.stdout.write("int builtins_bitcode_" + name + "_length = " + str(i+1) + ";\n")
|
||||
sys.stdout.write("int builtins_bitcode_" + name + "_length = " + str(len(data)) + ";\n")
|
||||
|
||||
as_out.wait()
|
||||
|
||||
|
||||
@@ -2188,7 +2188,11 @@ bool CWriter::doInitialization(llvm::Module &M) {
|
||||
#endif
|
||||
TAsm = new CBEMCAsmInfo();
|
||||
MRI = new llvm::MCRegisterInfo();
|
||||
#if defined(LLVM_3_4)
|
||||
TCtx = new llvm::MCContext(TAsm, MRI, NULL);
|
||||
#else
|
||||
TCtx = new llvm::MCContext(*TAsm, *MRI, NULL);
|
||||
#endif
|
||||
//Mang = new llvm::Mangler(*TCtx, *TD);
|
||||
|
||||
// Keep track of which functions are static ctors/dtors so they can have
|
||||
|
||||
8
ctx.cpp
8
ctx.cpp
@@ -1414,7 +1414,7 @@ FunctionEmitContext::ProgramIndexVector(bool is32bits) {
|
||||
|
||||
llvm::Value *
|
||||
FunctionEmitContext::GetStringPtr(const std::string &str) {
|
||||
llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str);
|
||||
llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str, false);
|
||||
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage;
|
||||
llvm::Value *lstrPtr = new llvm::GlobalVariable(*m->module, lstr->getType(),
|
||||
true /*isConst*/,
|
||||
@@ -1464,11 +1464,13 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
|
||||
|
||||
static llvm::Value *
|
||||
lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
|
||||
llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s);
|
||||
llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s, false);
|
||||
std::string var_name = "_";
|
||||
var_name = var_name + s;
|
||||
llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(),
|
||||
true /* const */,
|
||||
llvm::GlobalValue::InternalLinkage,
|
||||
sConstant, s);
|
||||
sConstant, var_name.c_str());
|
||||
llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(0) };
|
||||
llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
|
||||
return llvm::GetElementPtrInst::Create(sPtr, arrayRef, "sptr", bblock);
|
||||
|
||||
@@ -1,3 +1,17 @@
|
||||
=== v1.4.3 === (25 June 2013)
|
||||
|
||||
A minor version update with several stability improvements:
|
||||
|
||||
* Two bugs were fixed (including a bug in LLVM) to improve stability on 32 bit
|
||||
platforms.
|
||||
|
||||
* A bug affecting several examples was fixed.
|
||||
|
||||
* --instrument switch is fixed.
|
||||
|
||||
All tests and examples now properly compile and execute on native targets on
|
||||
Unix platforms (Linux and MacOS).
|
||||
|
||||
=== v1.4.2 === (11 June 2013)
|
||||
|
||||
A minor version update with a few important changes:
|
||||
|
||||
@@ -2,6 +2,14 @@
|
||||
ispc News
|
||||
=========
|
||||
|
||||
ispc 1.4.3 is Released
|
||||
----------------------
|
||||
|
||||
A minor update of ``ispc`` has been released with several stability improvements.
|
||||
All tests and examples now properly compile and execute on native targets on
|
||||
Unix platforms (Linux and MacOS).
|
||||
The released binaries are built with patched version of LLVM 3.3.
|
||||
|
||||
ispc 1.4.2 is Released
|
||||
----------------------
|
||||
|
||||
|
||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 1.4.2
|
||||
PROJECT_NUMBER = 1.4.3
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
||||
@@ -60,7 +60,7 @@ int countbits(int i) {
|
||||
// Callback function that ispc compiler emits calls to when --instrument
|
||||
// command-line flag is given while compiling.
|
||||
void
|
||||
ISPCInstrument(const char *fn, const char *note, int line, int mask) {
|
||||
ISPCInstrument(const char *fn, const char *note, int line, uint64_t mask) {
|
||||
char sline[16];
|
||||
sprintf(sline, "%04d", line);
|
||||
std::string s = std::string(fn) + std::string("(") + std::string(sline) +
|
||||
|
||||
@@ -28,7 +28,7 @@
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef INSTRUMENT_H
|
||||
@@ -36,8 +36,8 @@
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
extern "C" {
|
||||
void ISPCInstrument(const char *fn, const char *note, int line, int mask);
|
||||
extern "C" {
|
||||
void ISPCInstrument(const char *fn, const char *note, int line, uint64_t mask);
|
||||
}
|
||||
|
||||
void ISPCPrintInstrument();
|
||||
|
||||
18
expr.cpp
18
expr.cpp
@@ -6802,7 +6802,23 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
|
||||
if (!conv)
|
||||
return NULL;
|
||||
|
||||
llvm::Value *cast = ctx->BroadcastValue(conv, toType->LLVMType(g->ctx));
|
||||
llvm::Value *cast = NULL;
|
||||
llvm::Type* toTypeLLVM = toType->LLVMType(g->ctx);
|
||||
if (llvm::isa<llvm::VectorType>(toTypeLLVM)) {
|
||||
// Example uniform float => uniform float<3>
|
||||
cast = ctx->BroadcastValue(conv, toTypeLLVM);
|
||||
}
|
||||
else if (llvm::isa<llvm::ArrayType>(toTypeLLVM)) {
|
||||
// Example varying float => varying float<3>
|
||||
cast = llvm::UndefValue::get(toType->LLVMType(g->ctx));
|
||||
for (int i = 0; i < toVector->GetElementCount(); ++i) {
|
||||
// Here's InsertInst produces InsertValueInst.
|
||||
cast = ctx->InsertInst(cast, conv, i);
|
||||
}
|
||||
}
|
||||
else {
|
||||
FATAL("TypeCastExpr::GetValue: problem with cast");
|
||||
}
|
||||
|
||||
return cast;
|
||||
}
|
||||
|
||||
8
func.cpp
8
func.cpp
@@ -85,19 +85,19 @@ Function::Function(Symbol *s, Stmt *c) {
|
||||
code = TypeCheck(code);
|
||||
|
||||
if (code != NULL && g->debugPrint) {
|
||||
fprintf(stderr, "After typechecking function \"%s\":\n",
|
||||
printf("After typechecking function \"%s\":\n",
|
||||
sym->name.c_str());
|
||||
code->Print(0);
|
||||
fprintf(stderr, "---------------------\n");
|
||||
printf("---------------------\n");
|
||||
}
|
||||
|
||||
if (code != NULL) {
|
||||
code = Optimize(code);
|
||||
if (g->debugPrint) {
|
||||
fprintf(stderr, "After optimizing function \"%s\":\n",
|
||||
printf("After optimizing function \"%s\":\n",
|
||||
sym->name.c_str());
|
||||
code->Print(0);
|
||||
fprintf(stderr, "---------------------\n");
|
||||
printf("---------------------\n");
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
2
ispc.h
2
ispc.h
@@ -38,7 +38,7 @@
|
||||
#ifndef ISPC_H
|
||||
#define ISPC_H
|
||||
|
||||
#define ISPC_VERSION "1.4.2"
|
||||
#define ISPC_VERSION "1.4.3"
|
||||
|
||||
#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4)
|
||||
#error "Only LLVM 3.1, 3.2, 3.3 and the 3.4 development branch are supported"
|
||||
|
||||
@@ -464,6 +464,7 @@
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>clangFrontend.lib;clangDriver.lib;clangSerialization.lib;clangParse.lib;clangSema.lib;clangAnalysis.lib;clangEdit.lib;clangAST.lib;clangLex.lib;clangBasic.lib;LLVMAnalysis.lib;LLVMAsmParser.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Desc.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;LLVMipa.lib;LLVMipo.lib;shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies Condition="'$(LLVM_VERSION)'!='LLVM_3_1'AND'$(LLVM_VERSION)'!='LLVM_3_2'AND'$(LLVM_VERSION)'!='LLVM_3_3'">LLVMOption.lib;LLVMSupport.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
@@ -484,6 +485,7 @@
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>clangFrontend.lib;clangDriver.lib;clangSerialization.lib;clangParse.lib;clangSema.lib;clangAnalysis.lib;clangEdit.lib;clangAST.lib;clangLex.lib;clangBasic.lib;LLVMAnalysis.lib;LLVMAsmParser.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Desc.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;LLVMipa.lib;LLVMipo.lib;shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
<AdditionalDependencies Condition="'$(LLVM_VERSION)'!='LLVM_3_1'AND'$(LLVM_VERSION)'!='LLVM_3_2'AND'$(LLVM_VERSION)'!='LLVM_3_3'">LLVMOption.lib;LLVMSupport.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
|
||||
52
llvm_patches/r184575-x86-shift.patch
Normal file
52
llvm_patches/r184575-x86-shift.patch
Normal file
@@ -0,0 +1,52 @@
|
||||
This patch needs to be applied to LLVM 3.2/3.3 (but was verified with 3.3 only) to
|
||||
fix a problem with shift instructions on x86 (see PR16360 in LLVM bugzilla).
|
||||
This is general LLVM problem, which triggers on one of x86 tests in out test suit.
|
||||
LLVM 3.4 contains this fix (r184575).
|
||||
|
||||
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
|
||||
===================================================================
|
||||
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp (revision 183970)
|
||||
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp (working copy)
|
||||
@@ -3901,8 +3901,7 @@
|
||||
DAG.getConstant(~0ULL >> ShAmt, VT));
|
||||
}
|
||||
|
||||
-
|
||||
- // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
|
||||
+ // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
|
||||
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
|
||||
// Shifting in all undef bits?
|
||||
EVT SmallVT = N0.getOperand(0).getValueType();
|
||||
@@ -3915,7 +3914,10 @@
|
||||
N0.getOperand(0),
|
||||
DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
|
||||
AddToWorkList(SmallShift.getNode());
|
||||
- return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
|
||||
+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt);
|
||||
+ return DAG.getNode(ISD::AND, SDLoc(N), VT,
|
||||
+ DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
|
||||
+ DAG.getConstant(Mask, VT));
|
||||
}
|
||||
}
|
||||
|
||||
Index: test/CodeGen/X86/pr16360.ll
|
||||
===================================================================
|
||||
--- test/CodeGen/X86/pr16360.ll (revision 0)
|
||||
+++ test/CodeGen/X86/pr16360.ll (revision 0)
|
||||
@@ -0,0 +1,16 @@
|
||||
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
|
||||
+
|
||||
+define i64 @foo(i32 %sum) {
|
||||
+entry:
|
||||
+ %conv = sext i32 %sum to i64
|
||||
+ %shr = lshr i64 %conv, 2
|
||||
+ %or = or i64 4611686018360279040, %shr
|
||||
+ ret i64 %or
|
||||
+}
|
||||
+
|
||||
+; CHECK: foo
|
||||
+; CHECK: shrl $2
|
||||
+; CHECK: orl $-67108864
|
||||
+; CHECK-NOT: movl $-1
|
||||
+; CHECK: movl $1073741823
|
||||
+; CHECK: ret
|
||||
144
llvmutil.cpp
144
llvmutil.cpp
@@ -600,9 +600,10 @@ lGetIntValue(llvm::Value *offset) {
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
llvm::Value *
|
||||
LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
|
||||
llvm::Value **elements) {
|
||||
bool compare, bool undef) {
|
||||
llvm::Value ** elements = new llvm::Value*[vectorWidth];
|
||||
for (int i = 0; i < vectorWidth; ++i) {
|
||||
elements[i] = NULL;
|
||||
}
|
||||
@@ -610,43 +611,93 @@ LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
|
||||
// Catch a pattern of InsertElement chain.
|
||||
if (llvm::InsertElementInst *ie =
|
||||
llvm::dyn_cast<llvm::InsertElementInst>(inst)) {
|
||||
//Gather elements of vector
|
||||
while (ie != NULL) {
|
||||
int64_t iOffset = lGetIntValue(ie->getOperand(2));
|
||||
Assert(iOffset >= 0 && iOffset < vectorWidth);
|
||||
Assert(elements[iOffset] == NULL);
|
||||
|
||||
// Get the scalar value from this insert
|
||||
elements[iOffset] = ie->getOperand(1);
|
||||
if (elements[iOffset] == NULL) {
|
||||
elements[iOffset] = ie->getOperand(1);
|
||||
}
|
||||
|
||||
// Do we have another insert?
|
||||
llvm::Value *insertBase = ie->getOperand(0);
|
||||
ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
|
||||
if (ie == NULL) {
|
||||
if (llvm::isa<llvm::UndefValue>(insertBase)) {
|
||||
return;
|
||||
if (ie != NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
if (llvm::isa<llvm::UndefValue>(insertBase)) {
|
||||
break;
|
||||
}
|
||||
|
||||
if (llvm::isa<llvm::ConstantVector>(insertBase) ||
|
||||
llvm::isa<llvm::ConstantAggregateZero>(insertBase)) {
|
||||
llvm::Constant *cv = llvm::dyn_cast<llvm::Constant>(insertBase);
|
||||
Assert(vectorWidth == (int)(cv->getNumOperands()));
|
||||
for (int i=0; i<vectorWidth; i++) {
|
||||
if (elements[i] == NULL) {
|
||||
elements[i] = cv->getOperand(i);
|
||||
}
|
||||
}
|
||||
break;
|
||||
}
|
||||
else {
|
||||
// Here chain ends in llvm::LoadInst or some other.
|
||||
// They are not equal to each other so we should return NULL if compare
|
||||
// and first element if we have it.
|
||||
Assert(compare == true || elements[0] != NULL);
|
||||
if (compare) {
|
||||
return NULL;
|
||||
}
|
||||
else {
|
||||
return elements[0];
|
||||
}
|
||||
}
|
||||
// TODO: Also, should we handle some other values like
|
||||
// ConstantDataVectors.
|
||||
}
|
||||
if (compare == false) {
|
||||
//We simply want first element
|
||||
return elements[0];
|
||||
}
|
||||
|
||||
// Get the value out of a constant vector if that's what we
|
||||
// have
|
||||
llvm::ConstantVector *cv =
|
||||
llvm::dyn_cast<llvm::ConstantVector>(insertBase);
|
||||
|
||||
// FIXME: this assert is a little questionable; we probably
|
||||
// shouldn't fail in this case but should just return an
|
||||
// incomplete result. But there aren't currently any known
|
||||
// cases where we have anything other than an undef value or a
|
||||
// constant vector at the base, so if that ever does happen,
|
||||
// it'd be nice to know what happend so that perhaps we can
|
||||
// handle it.
|
||||
// FIXME: Also, should we handle ConstantDataVectors with
|
||||
// LLVM3.1? What about ConstantAggregateZero values??
|
||||
Assert(cv != NULL);
|
||||
|
||||
Assert(iOffset < (int)cv->getNumOperands());
|
||||
elements[iOffset] = cv->getOperand((int32_t)iOffset);
|
||||
int null_number = 0;
|
||||
int NonNull = 0;
|
||||
for(int i = 0; i < vectorWidth; i++) {
|
||||
if (elements[i] == NULL) {
|
||||
null_number++;
|
||||
}
|
||||
else {
|
||||
NonNull = i;
|
||||
}
|
||||
}
|
||||
if (null_number == vectorWidth) {
|
||||
//All of elements are NULLs
|
||||
return NULL;
|
||||
}
|
||||
if ((undef == false) && (null_number != 0)) {
|
||||
//We don't want NULLs in chain, but we have them
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Compare elements of vector
|
||||
for (int i = 0; i < vectorWidth; i++) {
|
||||
if (elements[i] == NULL) {
|
||||
continue;
|
||||
}
|
||||
|
||||
std::vector<llvm::PHINode *> seenPhi0;
|
||||
std::vector<llvm::PHINode *> seenPhi1;
|
||||
if (lValuesAreEqual(elements[NonNull], elements[i],
|
||||
seenPhi0, seenPhi1) == false) {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
return elements[NonNull];
|
||||
}
|
||||
|
||||
// Catch a pattern of broadcast implemented as InsertElement + Shuffle:
|
||||
// %broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0
|
||||
// %broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
|
||||
@@ -663,14 +714,12 @@ LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
|
||||
llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2));
|
||||
|
||||
if (ci->isZero()) {
|
||||
for (int i = 0; i < vectorWidth; ++i) {
|
||||
elements[i] = ie->getOperand(1);
|
||||
}
|
||||
return;
|
||||
return ie->getOperand(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
@@ -726,12 +775,10 @@ lIsExactMultiple(llvm::Value *val, int baseValue, int vectorLength,
|
||||
|
||||
if (llvm::isa<llvm::InsertElementInst>(val) ||
|
||||
llvm::isa<llvm::ShuffleVectorInst>(val)) {
|
||||
llvm::Value *elts[ISPC_MAX_NVEC];
|
||||
LLVMFlattenInsertChain(val, g->target->getVectorWidth(), elts);
|
||||
llvm::Value *element = LLVMFlattenInsertChain(val, g->target->getVectorWidth());
|
||||
// We just need to check the scalar first value, since we know that
|
||||
// all elements are equal
|
||||
return lIsExactMultiple(elts[0], baseValue, vectorLength,
|
||||
seenPhis);
|
||||
return lIsExactMultiple(element, baseValue, vectorLength, seenPhis);
|
||||
}
|
||||
|
||||
llvm::PHINode *phi = llvm::dyn_cast<llvm::PHINode>(val);
|
||||
@@ -995,32 +1042,7 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
|
||||
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(v);
|
||||
if (ie != NULL) {
|
||||
llvm::Value *elements[ISPC_MAX_NVEC];
|
||||
LLVMFlattenInsertChain(ie, vectorLength, elements);
|
||||
|
||||
// We will ignore any values of elements[] that are NULL; as they
|
||||
// correspond to undefined values--we just want to see if all of
|
||||
// the defined values have the same value.
|
||||
int lastNonNull = 0;
|
||||
while (lastNonNull < vectorLength && elements[lastNonNull] == NULL)
|
||||
++lastNonNull;
|
||||
|
||||
if (lastNonNull == vectorLength)
|
||||
// all of them are undef!
|
||||
return true;
|
||||
|
||||
for (int i = lastNonNull; i < vectorLength; ++i) {
|
||||
if (elements[i] == NULL)
|
||||
continue;
|
||||
|
||||
std::vector<llvm::PHINode *> seenPhi0;
|
||||
std::vector<llvm::PHINode *> seenPhi1;
|
||||
if (lValuesAreEqual(elements[lastNonNull], elements[i], seenPhi0,
|
||||
seenPhi1) == false)
|
||||
return false;
|
||||
lastNonNull = i;
|
||||
}
|
||||
return true;
|
||||
return (LLVMFlattenInsertChain(ie, vectorLength) != NULL);
|
||||
}
|
||||
|
||||
llvm::PHINode *phi = llvm::dyn_cast<llvm::PHINode>(v);
|
||||
@@ -1472,9 +1494,7 @@ lExtractFirstVectorElement(llvm::Value *v,
|
||||
// flatten them out and grab the value for the first one.
|
||||
if (llvm::isa<llvm::InsertElementInst>(v) ||
|
||||
llvm::isa<llvm::ShuffleVectorInst>(v)) {
|
||||
llvm::Value *elements[ISPC_MAX_NVEC];
|
||||
LLVMFlattenInsertChain(v, vt->getNumElements(), elements);
|
||||
return elements[0];
|
||||
return LLVMFlattenInsertChain(v, vt->getNumElements(), false);
|
||||
}
|
||||
|
||||
// Worst case, for everything else, just do a regular extract element
|
||||
|
||||
@@ -269,9 +269,13 @@ extern bool LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts);
|
||||
%broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0
|
||||
%broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
|
||||
<4 x i32> zeroinitializer
|
||||
Function returns:
|
||||
Compare all elements and return one of them if all are equal, otherwise NULL.
|
||||
If compare argument is false, don't do compare and return first element instead.
|
||||
If undef argument is true, ignore undef elements (but all undef yields NULL anyway).
|
||||
*/
|
||||
extern void LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
|
||||
llvm::Value **elements);
|
||||
extern llvm::Value * LLVMFlattenInsertChain (llvm::Value *inst, int vectorWidth,
|
||||
bool compare = true, bool undef = true);
|
||||
|
||||
/** This is a utility routine for debugging that dumps out the given LLVM
|
||||
value as well as (recursively) all of the other values that it depends
|
||||
|
||||
27
opt.cpp
27
opt.cpp
@@ -505,7 +505,11 @@ Optimize(llvm::Module *module, int optLevel) {
|
||||
optPM.add(llvm::createCFGSimplificationPass());
|
||||
|
||||
optPM.add(llvm::createArgumentPromotionPass());
|
||||
#if defined(LLVM_3_1) || defined(LLVM_3_2) || defined(LLVM_3_3)
|
||||
// Starting from 3.4 this functionality was moved to
|
||||
// InstructionCombiningPass. See r184459 for details.
|
||||
optPM.add(llvm::createSimplifyLibCallsPass());
|
||||
#endif
|
||||
optPM.add(llvm::createInstructionCombiningPass());
|
||||
optPM.add(llvm::createJumpThreadingPass());
|
||||
optPM.add(llvm::createCFGSimplificationPass());
|
||||
@@ -1060,23 +1064,18 @@ static llvm::Value *
|
||||
lGetBasePointer(llvm::Value *v) {
|
||||
if (llvm::isa<llvm::InsertElementInst>(v) ||
|
||||
llvm::isa<llvm::ShuffleVectorInst>(v)) {
|
||||
llvm::Value *elements[ISPC_MAX_NVEC];
|
||||
LLVMFlattenInsertChain(v, g->target->getVectorWidth(), elements);
|
||||
|
||||
// Make sure none of the elements is undefined.
|
||||
llvm::Value *element = LLVMFlattenInsertChain
|
||||
(v, g->target->getVectorWidth(), true, false);
|
||||
// TODO: it's probably ok to allow undefined elements and return
|
||||
// the base pointer if all of the other elements have the same
|
||||
// value.
|
||||
for (int i = 0; i < g->target->getVectorWidth(); ++i)
|
||||
if (elements[i] == NULL)
|
||||
return NULL;
|
||||
|
||||
// Do all of the elements have the same value?
|
||||
for (int i = 0; i < g->target->getVectorWidth()-1; ++i)
|
||||
if (elements[i] != elements[i+1])
|
||||
return NULL;
|
||||
|
||||
return lCheckForActualPointer(elements[0]);
|
||||
if (element != NULL) {
|
||||
//all elements are the same and not NULLs
|
||||
return lCheckForActualPointer(element);
|
||||
}
|
||||
else {
|
||||
return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
// This case comes up with global/static arrays
|
||||
|
||||
92
run_tests.py
92
run_tests.py
@@ -17,6 +17,7 @@ import shlex
|
||||
import platform
|
||||
import tempfile
|
||||
import os.path
|
||||
import time
|
||||
|
||||
# disable fancy error/warning printing with ANSI colors, so grepping for error
|
||||
# messages doesn't get confused
|
||||
@@ -52,6 +53,8 @@ parser.add_option('-v', '--verbose', dest='verbose', help='Enable verbose output
|
||||
parser.add_option('--wrap-exe', dest='wrapexe',
|
||||
help='Executable to wrap test runs with (e.g. "valgrind")',
|
||||
default="")
|
||||
parser.add_option('--time', dest='time', help='Enable time output',
|
||||
default=False, action="store_true")
|
||||
|
||||
(options, args) = parser.parse_args()
|
||||
|
||||
@@ -217,19 +220,60 @@ def run_cmds(compile_cmds, run_cmd, filename, expect_failure):
|
||||
return (0, 0)
|
||||
|
||||
|
||||
def run_test(testname):
|
||||
def add_prefix(path):
|
||||
global is_windows
|
||||
if is_windows:
|
||||
# On Windows we run tests in tmp dir, so the root is one level up.
|
||||
# On Windows we run tests in tmp dir, so the root is one level up.
|
||||
input_prefix = "..\\"
|
||||
else:
|
||||
input_prefix = ""
|
||||
path = input_prefix + path
|
||||
path = os.path.normpath(path)
|
||||
return path
|
||||
|
||||
|
||||
def check_test(filename):
|
||||
prev_arch = False
|
||||
prev_os = False
|
||||
done_arch = True
|
||||
done_os = True
|
||||
done = True
|
||||
global is_windows
|
||||
if is_windows:
|
||||
oss = "windows"
|
||||
else:
|
||||
oss = "linux"
|
||||
b = buffer(file(add_prefix(filename)).read());
|
||||
for run in re.finditer('// *rule: run on .*', b):
|
||||
arch = re.match('.* arch=.*', run.group())
|
||||
if arch != None:
|
||||
if re.search(' arch='+options.arch+'$', arch.group()) != None:
|
||||
prev_arch = True
|
||||
if re.search(' arch='+options.arch+' ', arch.group()) != None:
|
||||
prev_arch = True
|
||||
done_arch = prev_arch
|
||||
OS = re.match('.* OS=.*', run.group())
|
||||
if OS != None:
|
||||
if re.search(' OS='+oss, OS.group()) != None:
|
||||
prev_os = True
|
||||
done_os = prev_os
|
||||
done = done_arch and done_os
|
||||
for skip in re.finditer('// *rule: skip on .*', b):
|
||||
if re.search(' arch=' + options.arch + '$', skip.group())!=None:
|
||||
done = False
|
||||
if re.search(' arch=' + options.arch + ' ', skip.group())!=None:
|
||||
done = False
|
||||
if re.search(' OS=' + oss, skip.group())!=None:
|
||||
done = False
|
||||
return done
|
||||
|
||||
|
||||
def run_test(testname):
|
||||
# testname is a path to the test from the root of ispc dir
|
||||
# filename is a path to the test from the current dir
|
||||
# ispc_exe_rel is a relative path to ispc
|
||||
filename = os.path.normpath(input_prefix + testname)
|
||||
ispc_exe_rel = os.path.normpath(input_prefix + ispc_exe)
|
||||
filename = add_prefix(testname)
|
||||
ispc_exe_rel = add_prefix(ispc_exe)
|
||||
|
||||
# is this a test to make sure an error is issued?
|
||||
want_error = (filename.find("tests_errors") != -1)
|
||||
@@ -290,8 +334,8 @@ def run_test(testname):
|
||||
obj_name = "%s.obj" % os.path.basename(filename)
|
||||
exe_name = "%s.exe" % os.path.basename(filename)
|
||||
|
||||
cc_cmd = "%s /I. /I../winstuff /Zi /nologo /DTEST_SIG=%d %stest_static.cpp %s /Fe%s" % \
|
||||
(options.compiler_exe, match, input_prefix, obj_name, exe_name)
|
||||
cc_cmd = "%s /I. /I../winstuff /Zi /nologo /DTEST_SIG=%d %s %s /Fe%s" % \
|
||||
(options.compiler_exe, match, add_prefix("test_static.cpp"), obj_name, exe_name)
|
||||
if should_fail:
|
||||
cc_cmd += " /DEXPECT_FAILURE"
|
||||
else:
|
||||
@@ -327,7 +371,7 @@ def run_test(testname):
|
||||
if options.no_opt:
|
||||
ispc_cmd += " -O0"
|
||||
if is_generic_target:
|
||||
ispc_cmd += " --emit-c++ --c++-include-file=%s" % os.path.normpath(input_prefix + options.include_file)
|
||||
ispc_cmd += " --emit-c++ --c++-include-file=%s" % add_prefix(options.include_file)
|
||||
|
||||
# compile the ispc code, make the executable, and run it...
|
||||
(compile_error, run_error) = run_cmds([ispc_cmd, cc_cmd],
|
||||
@@ -351,7 +395,7 @@ def run_test(testname):
|
||||
# pull tests to run from the given queue and run them. Multiple copies of
|
||||
# this function will be running in parallel across all of the CPU cores of
|
||||
# the system.
|
||||
def run_tasks_from_queue(queue, queue_ret, total_tests_arg, max_test_length_arg, counter, mutex):
|
||||
def run_tasks_from_queue(queue, queue_ret, queue_skip, total_tests_arg, max_test_length_arg, counter, mutex):
|
||||
if is_windows:
|
||||
tmpdir = "tmp%d" % os.getpid()
|
||||
os.mkdir(tmpdir)
|
||||
@@ -380,14 +424,18 @@ def run_tasks_from_queue(queue, queue_ret, total_tests_arg, max_test_length_arg,
|
||||
|
||||
sys.exit(0)
|
||||
|
||||
(compile_error, run_error) = run_test(filename)
|
||||
if compile_error != 0:
|
||||
compile_error_files += [ filename ]
|
||||
if run_error != 0:
|
||||
run_error_files += [ filename ]
|
||||
if check_test(filename):
|
||||
(compile_error, run_error) = run_test(filename)
|
||||
if compile_error != 0:
|
||||
compile_error_files += [ filename ]
|
||||
if run_error != 0:
|
||||
run_error_files += [ filename ]
|
||||
|
||||
with mutex:
|
||||
update_progress(filename, total_tests_arg, counter, max_test_length_arg)
|
||||
else:
|
||||
queue_skip.put(filename)
|
||||
|
||||
with mutex:
|
||||
update_progress(filename, total_tests_arg, counter, max_test_length_arg)
|
||||
|
||||
task_threads = []
|
||||
|
||||
@@ -413,6 +461,7 @@ if __name__ == '__main__':
|
||||
for x in range(nthreads):
|
||||
q.put('STOP')
|
||||
qret = multiprocessing.Queue()
|
||||
qskip = multiprocessing.Queue()
|
||||
|
||||
# need to catch sigint so that we can terminate all of the tasks if
|
||||
# we're interrupted
|
||||
@@ -421,9 +470,10 @@ if __name__ == '__main__':
|
||||
finished_tests_counter = multiprocessing.Value(c_int)
|
||||
finished_tests_counter_lock = multiprocessing.Lock()
|
||||
|
||||
start_time = time.time()
|
||||
# launch jobs to run tests
|
||||
for x in range(nthreads):
|
||||
t = multiprocessing.Process(target=run_tasks_from_queue, args=(q, qret, total_tests, max_test_length, finished_tests_counter, finished_tests_counter_lock))
|
||||
t = multiprocessing.Process(target=run_tasks_from_queue, args=(q, qret, qskip, total_tests, max_test_length, finished_tests_counter, finished_tests_counter_lock))
|
||||
task_threads.append(t)
|
||||
t.start()
|
||||
|
||||
@@ -433,11 +483,21 @@ if __name__ == '__main__':
|
||||
t.join()
|
||||
sys.stdout.write("\n")
|
||||
|
||||
elapsed_time = time.time() - start_time
|
||||
if options.time:
|
||||
sys.stdout.write("Elapsed time: %d s\n" % elapsed_time)
|
||||
|
||||
while not qret.empty():
|
||||
(c, r) = qret.get()
|
||||
compile_error_files += c
|
||||
run_error_files += r
|
||||
|
||||
skip = 0
|
||||
if qskip.qsize() > 0:
|
||||
sys.stdout.write("%d / %d tests SKIPPED:\n" % (qskip.qsize(), total_tests))
|
||||
while not qskip.empty():
|
||||
sys.stdout.write("\t%s\n" % qskip.get())
|
||||
|
||||
if len(compile_error_files) > 0:
|
||||
compile_error_files.sort()
|
||||
sys.stdout.write("%d / %d tests FAILED compilation:\n" % (len(compile_error_files), total_tests))
|
||||
|
||||
@@ -1,4 +1,5 @@
|
||||
// Pointer type cast of type "uniform int32 * uniform" to integer type "uniform int32" may lose information.
|
||||
// rule: run on arch=x86-64
|
||||
|
||||
int32 foo(int * uniform x) {
|
||||
return (int32) x;
|
||||
|
||||
Reference in New Issue
Block a user