26 Commits

Author SHA1 Message Date
Dmitry Babokin
594485c38c Release 1.4.3 2013-06-25 18:38:21 +04:00
Dmitry Babokin
d52e2d5a8d License update (just dates) 2013-06-25 17:02:42 +04:00
Dmitry Babokin
fc66066d4d Merge pull request #530 from dbabokin/llvm_fix
Adding LLVM patch to fix #519 with LLVM 3.3
2013-06-25 05:22:09 -07:00
Dmitry Babokin
6169338815 Adding LLVM patch to fix #519 with LLVM 3.3 2013-06-25 16:21:14 +04:00
Dmitry Babokin
6bc8cb1ff1 Merge pull request #529 from ifilippov/instrument_fix
correction of --instrument option support
2013-06-25 03:08:02 -07:00
Dmitry Babokin
0fc49b1c37 Merge pull request #528 from ifilippov/test3
Reapplying lost commits
2013-06-25 02:14:24 -07:00
Ilia Filippov
9fb981e9a0 correction of --instrument option support 2013-06-25 12:33:23 +04:00
Ilia Filippov
cba1b3cedd additional libraries for LLVM_3_4 build 2013-06-25 12:22:53 +04:00
Ilia Filippov
12c4512932 adding two additional libraries for LLVM_3_4 build 2013-06-25 12:22:53 +04:00
Dmitry Babokin
0dd1dbb568 Merge pull request #526 from dbabokin/master
Tracking LLVM trunk: removing llvm::createSimplifyLibCallsPass() call
2013-06-23 23:10:19 -07:00
Dmitry Babokin
fdcec5a219 Tracking LLVM trunk: removing llvm::createSimplifyLibCallsPass() call 2013-06-24 10:08:06 +04:00
Dmitry Babokin
bebab7ab0d Merge pull request #525 from dbabokin/debug
--debug output: stdout instead of stderr
2013-06-21 03:56:17 -07:00
Dmitry Babokin
fb771b6aa3 --debug output: stdout instead of stderr 2013-06-20 22:47:29 +04:00
jbrodman
8156559475 Merge pull request #522 from dbabokin/broadcast
Fix for #520
2013-06-18 11:47:24 -07:00
jbrodman
9f5e51cd01 Merge pull request #523 from dbabokin/tot
Tracking ToT changes
2013-06-18 11:47:16 -07:00
Dmitry Babokin
27daab2f1b Fix for #520 2013-06-18 22:15:49 +04:00
Dmitry Babokin
c4d404b15f Tracking ToT changes: changes in MCContext interface 2013-06-18 22:13:14 +04:00
Dmitry Babokin
95fcdc36ee Tracking ToT changes, which now require to link option library. This is Unix only. Windows will be fixed separately 2013-06-18 22:12:33 +04:00
Dmitry Babokin
2fdaba53c1 Merge pull request #517 from ifilippov/bug_34
Fix for tests/soa-22 on x86/sse4 - cleanup in function LLVMFlattenInsertChain().
2013-06-14 08:40:01 -07:00
Ilia Filippov
5c89080469 changes in function LLVMFlattenInsertChain 2013-06-14 16:38:54 +04:00
Ilia Filippov
d92f9df17c changes in function LLVMFlattenInsertChain 2013-06-14 15:21:45 +04:00
Dmitry Babokin
f551390420 Merge pull request #516 from ifilippov/master
Changes to support skipping tests.
2013-06-13 08:48:29 -07:00
Ilia Filippov
8642b4d89f changing run_tests to support skipping tests and time 2013-06-13 19:25:34 +04:00
Ilia Filippov
6fb70c307d changing run_tests to support skipping tests and time 2013-06-13 19:00:02 +04:00
Ilia Filippov
d08346fbcf changes to support skipping tests 2013-06-13 16:47:10 +04:00
jbrodman
141d240a91 Merge pull request #513 from dbabokin/release_142
Release 1.4.2, 11 June 2013
2013-06-11 07:47:37 -07:00
20 changed files with 304 additions and 116 deletions

View File

@@ -1,4 +1,4 @@
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2013, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -77,7 +77,7 @@ covered by the following license:
University of Illinois/NCSA
Open Source License
Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
Copyright (c) 2003-2013 University of Illinois at Urbana-Champaign.
All rights reserved.
Developed by:

View File

@@ -51,7 +51,17 @@ else
endif
ARCH_TYPE = $(shell arch)
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs engine ipo bitreader bitwriter instrumentation linker)
LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//)
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
LLVM_COMPONENTS = engine ipo bitreader bitwriter instrumentation linker
# Component "option" was introduced in 3.3 and starting with 3.4 it is required for the link step.
# We check if it's available before adding it (to not break 3.2 and earlier).
ifeq ($(shell $(LLVM_CONFIG) --components |grep -c option), 1)
LLVM_COMPONENTS+=option
endif
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs $(LLVM_COMPONENTS))
CLANG=clang
CLANG_LIBS = -lclangFrontend -lclangDriver \
@@ -70,10 +80,6 @@ ifeq ($(ARCH_OS2),Msys)
ISPC_LIBS += -lshlwapi -limagehlp -lpsapi
endif
LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//)
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
# Define build time stamp and revision.
# For revision we use GIT or SVN info.
BUILD_DATE=$(shell date +%Y%m%d)

View File

@@ -45,7 +45,7 @@ for i in range(0, len(data), 1):
sys.stdout.write("\n")
sys.stdout.write("0x00 };\n\n")
sys.stdout.write("int builtins_bitcode_" + name + "_length = " + str(i+1) + ";\n")
sys.stdout.write("int builtins_bitcode_" + name + "_length = " + str(len(data)) + ";\n")
as_out.wait()

View File

@@ -2188,7 +2188,11 @@ bool CWriter::doInitialization(llvm::Module &M) {
#endif
TAsm = new CBEMCAsmInfo();
MRI = new llvm::MCRegisterInfo();
#if defined(LLVM_3_4)
TCtx = new llvm::MCContext(TAsm, MRI, NULL);
#else
TCtx = new llvm::MCContext(*TAsm, *MRI, NULL);
#endif
//Mang = new llvm::Mangler(*TCtx, *TD);
// Keep track of which functions are static ctors/dtors so they can have

View File

@@ -1414,7 +1414,7 @@ FunctionEmitContext::ProgramIndexVector(bool is32bits) {
llvm::Value *
FunctionEmitContext::GetStringPtr(const std::string &str) {
llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str);
llvm::Constant *lstr = llvm::ConstantDataArray::getString(*g->ctx, str, false);
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::InternalLinkage;
llvm::Value *lstrPtr = new llvm::GlobalVariable(*m->module, lstr->getType(),
true /*isConst*/,
@@ -1464,11 +1464,13 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
static llvm::Value *
lGetStringAsValue(llvm::BasicBlock *bblock, const char *s) {
llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s);
llvm::Constant *sConstant = llvm::ConstantDataArray::getString(*g->ctx, s, false);
std::string var_name = "_";
var_name = var_name + s;
llvm::Value *sPtr = new llvm::GlobalVariable(*m->module, sConstant->getType(),
true /* const */,
llvm::GlobalValue::InternalLinkage,
sConstant, s);
sConstant, var_name.c_str());
llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(0) };
llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
return llvm::GetElementPtrInst::Create(sPtr, arrayRef, "sptr", bblock);

View File

@@ -1,3 +1,17 @@
=== v1.4.3 === (25 June 2013)
A minor version update with several stability improvements:
* Two bugs were fixed (including a bug in LLVM) to improve stability on 32 bit
platforms.
* A bug affecting several examples was fixed.
* --instrument switch is fixed.
All tests and examples now properly compile and execute on native targets on
Unix platforms (Linux and MacOS).
=== v1.4.2 === (11 June 2013)
A minor version update with a few important changes:

View File

@@ -2,6 +2,14 @@
ispc News
=========
ispc 1.4.3 is Released
----------------------
A minor update of ``ispc`` has been released with several stability improvements.
All tests and examples now properly compile and execute on native targets on
Unix platforms (Linux and MacOS).
The released binaries are built with patched version of LLVM 3.3.
ispc 1.4.2 is Released
----------------------

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 1.4.2
PROJECT_NUMBER = 1.4.3
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.

View File

@@ -60,7 +60,7 @@ int countbits(int i) {
// Callback function that ispc compiler emits calls to when --instrument
// command-line flag is given while compiling.
void
ISPCInstrument(const char *fn, const char *note, int line, int mask) {
ISPCInstrument(const char *fn, const char *note, int line, uint64_t mask) {
char sline[16];
sprintf(sline, "%04d", line);
std::string s = std::string(fn) + std::string("(") + std::string(sline) +

View File

@@ -28,7 +28,7 @@
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
*/
#ifndef INSTRUMENT_H
@@ -36,8 +36,8 @@
#include <stdint.h>
extern "C" {
void ISPCInstrument(const char *fn, const char *note, int line, int mask);
extern "C" {
void ISPCInstrument(const char *fn, const char *note, int line, uint64_t mask);
}
void ISPCPrintInstrument();

View File

@@ -6802,7 +6802,23 @@ TypeCastExpr::GetValue(FunctionEmitContext *ctx) const {
if (!conv)
return NULL;
llvm::Value *cast = ctx->BroadcastValue(conv, toType->LLVMType(g->ctx));
llvm::Value *cast = NULL;
llvm::Type* toTypeLLVM = toType->LLVMType(g->ctx);
if (llvm::isa<llvm::VectorType>(toTypeLLVM)) {
// Example uniform float => uniform float<3>
cast = ctx->BroadcastValue(conv, toTypeLLVM);
}
else if (llvm::isa<llvm::ArrayType>(toTypeLLVM)) {
// Example varying float => varying float<3>
cast = llvm::UndefValue::get(toType->LLVMType(g->ctx));
for (int i = 0; i < toVector->GetElementCount(); ++i) {
// Here's InsertInst produces InsertValueInst.
cast = ctx->InsertInst(cast, conv, i);
}
}
else {
FATAL("TypeCastExpr::GetValue: problem with cast");
}
return cast;
}

View File

@@ -85,19 +85,19 @@ Function::Function(Symbol *s, Stmt *c) {
code = TypeCheck(code);
if (code != NULL && g->debugPrint) {
fprintf(stderr, "After typechecking function \"%s\":\n",
printf("After typechecking function \"%s\":\n",
sym->name.c_str());
code->Print(0);
fprintf(stderr, "---------------------\n");
printf("---------------------\n");
}
if (code != NULL) {
code = Optimize(code);
if (g->debugPrint) {
fprintf(stderr, "After optimizing function \"%s\":\n",
printf("After optimizing function \"%s\":\n",
sym->name.c_str());
code->Print(0);
fprintf(stderr, "---------------------\n");
printf("---------------------\n");
}
}
}

2
ispc.h
View File

@@ -38,7 +38,7 @@
#ifndef ISPC_H
#define ISPC_H
#define ISPC_VERSION "1.4.2"
#define ISPC_VERSION "1.4.3"
#if !defined(LLVM_3_1) && !defined(LLVM_3_2) && !defined(LLVM_3_3) && !defined(LLVM_3_4)
#error "Only LLVM 3.1, 3.2, 3.3 and the 3.4 development branch are supported"

View File

@@ -464,6 +464,7 @@
<GenerateDebugInformation>true</GenerateDebugInformation>
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>clangFrontend.lib;clangDriver.lib;clangSerialization.lib;clangParse.lib;clangSema.lib;clangAnalysis.lib;clangEdit.lib;clangAST.lib;clangLex.lib;clangBasic.lib;LLVMAnalysis.lib;LLVMAsmParser.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Desc.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;LLVMipa.lib;LLVMipo.lib;shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies Condition="'$(LLVM_VERSION)'!='LLVM_3_1'AND'$(LLVM_VERSION)'!='LLVM_3_2'AND'$(LLVM_VERSION)'!='LLVM_3_3'">LLVMOption.lib;LLVMSupport.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
@@ -484,6 +485,7 @@
<OptimizeReferences>true</OptimizeReferences>
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
<AdditionalDependencies>clangFrontend.lib;clangDriver.lib;clangSerialization.lib;clangParse.lib;clangSema.lib;clangAnalysis.lib;clangEdit.lib;clangAST.lib;clangLex.lib;clangBasic.lib;LLVMAnalysis.lib;LLVMAsmParser.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Desc.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;LLVMipa.lib;LLVMipo.lib;shlwapi.lib;%(AdditionalDependencies)</AdditionalDependencies>
<AdditionalDependencies Condition="'$(LLVM_VERSION)'!='LLVM_3_1'AND'$(LLVM_VERSION)'!='LLVM_3_2'AND'$(LLVM_VERSION)'!='LLVM_3_3'">LLVMOption.lib;LLVMSupport.lib;%(AdditionalDependencies)</AdditionalDependencies>
</Link>
</ItemDefinitionGroup>
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />

View File

@@ -0,0 +1,52 @@
This patch needs to be applied to LLVM 3.2/3.3 (but was verified with 3.3 only) to
fix a problem with shift instructions on x86 (see PR16360 in LLVM bugzilla).
This is general LLVM problem, which triggers on one of x86 tests in out test suit.
LLVM 3.4 contains this fix (r184575).
Index: lib/CodeGen/SelectionDAG/DAGCombiner.cpp
===================================================================
--- lib/CodeGen/SelectionDAG/DAGCombiner.cpp (revision 183970)
+++ lib/CodeGen/SelectionDAG/DAGCombiner.cpp (working copy)
@@ -3901,8 +3901,7 @@
DAG.getConstant(~0ULL >> ShAmt, VT));
}
-
- // fold (srl (anyextend x), c) -> (anyextend (srl x, c))
+ // fold (srl (anyextend x), c) -> (and (anyextend (srl x, c)), mask)
if (N1C && N0.getOpcode() == ISD::ANY_EXTEND) {
// Shifting in all undef bits?
EVT SmallVT = N0.getOperand(0).getValueType();
@@ -3915,7 +3914,10 @@
N0.getOperand(0),
DAG.getConstant(ShiftAmt, getShiftAmountTy(SmallVT)));
AddToWorkList(SmallShift.getNode());
- return DAG.getNode(ISD::ANY_EXTEND, N->getDebugLoc(), VT, SmallShift);
+ APInt Mask = APInt::getAllOnesValue(VT.getSizeInBits()).lshr(ShiftAmt);
+ return DAG.getNode(ISD::AND, SDLoc(N), VT,
+ DAG.getNode(ISD::ANY_EXTEND, SDLoc(N), VT, SmallShift),
+ DAG.getConstant(Mask, VT));
}
}
Index: test/CodeGen/X86/pr16360.ll
===================================================================
--- test/CodeGen/X86/pr16360.ll (revision 0)
+++ test/CodeGen/X86/pr16360.ll (revision 0)
@@ -0,0 +1,16 @@
+; RUN: llc < %s -mtriple=i686-pc-linux | FileCheck %s
+
+define i64 @foo(i32 %sum) {
+entry:
+ %conv = sext i32 %sum to i64
+ %shr = lshr i64 %conv, 2
+ %or = or i64 4611686018360279040, %shr
+ ret i64 %or
+}
+
+; CHECK: foo
+; CHECK: shrl $2
+; CHECK: orl $-67108864
+; CHECK-NOT: movl $-1
+; CHECK: movl $1073741823
+; CHECK: ret

View File

@@ -600,9 +600,10 @@ lGetIntValue(llvm::Value *offset) {
}
void
llvm::Value *
LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
llvm::Value **elements) {
bool compare, bool undef) {
llvm::Value ** elements = new llvm::Value*[vectorWidth];
for (int i = 0; i < vectorWidth; ++i) {
elements[i] = NULL;
}
@@ -610,43 +611,93 @@ LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
// Catch a pattern of InsertElement chain.
if (llvm::InsertElementInst *ie =
llvm::dyn_cast<llvm::InsertElementInst>(inst)) {
//Gather elements of vector
while (ie != NULL) {
int64_t iOffset = lGetIntValue(ie->getOperand(2));
Assert(iOffset >= 0 && iOffset < vectorWidth);
Assert(elements[iOffset] == NULL);
// Get the scalar value from this insert
elements[iOffset] = ie->getOperand(1);
if (elements[iOffset] == NULL) {
elements[iOffset] = ie->getOperand(1);
}
// Do we have another insert?
llvm::Value *insertBase = ie->getOperand(0);
ie = llvm::dyn_cast<llvm::InsertElementInst>(insertBase);
if (ie == NULL) {
if (llvm::isa<llvm::UndefValue>(insertBase)) {
return;
if (ie != NULL) {
continue;
}
if (llvm::isa<llvm::UndefValue>(insertBase)) {
break;
}
if (llvm::isa<llvm::ConstantVector>(insertBase) ||
llvm::isa<llvm::ConstantAggregateZero>(insertBase)) {
llvm::Constant *cv = llvm::dyn_cast<llvm::Constant>(insertBase);
Assert(vectorWidth == (int)(cv->getNumOperands()));
for (int i=0; i<vectorWidth; i++) {
if (elements[i] == NULL) {
elements[i] = cv->getOperand(i);
}
}
break;
}
else {
// Here chain ends in llvm::LoadInst or some other.
// They are not equal to each other so we should return NULL if compare
// and first element if we have it.
Assert(compare == true || elements[0] != NULL);
if (compare) {
return NULL;
}
else {
return elements[0];
}
}
// TODO: Also, should we handle some other values like
// ConstantDataVectors.
}
if (compare == false) {
//We simply want first element
return elements[0];
}
// Get the value out of a constant vector if that's what we
// have
llvm::ConstantVector *cv =
llvm::dyn_cast<llvm::ConstantVector>(insertBase);
// FIXME: this assert is a little questionable; we probably
// shouldn't fail in this case but should just return an
// incomplete result. But there aren't currently any known
// cases where we have anything other than an undef value or a
// constant vector at the base, so if that ever does happen,
// it'd be nice to know what happend so that perhaps we can
// handle it.
// FIXME: Also, should we handle ConstantDataVectors with
// LLVM3.1? What about ConstantAggregateZero values??
Assert(cv != NULL);
Assert(iOffset < (int)cv->getNumOperands());
elements[iOffset] = cv->getOperand((int32_t)iOffset);
int null_number = 0;
int NonNull = 0;
for(int i = 0; i < vectorWidth; i++) {
if (elements[i] == NULL) {
null_number++;
}
else {
NonNull = i;
}
}
if (null_number == vectorWidth) {
//All of elements are NULLs
return NULL;
}
if ((undef == false) && (null_number != 0)) {
//We don't want NULLs in chain, but we have them
return NULL;
}
// Compare elements of vector
for (int i = 0; i < vectorWidth; i++) {
if (elements[i] == NULL) {
continue;
}
std::vector<llvm::PHINode *> seenPhi0;
std::vector<llvm::PHINode *> seenPhi1;
if (lValuesAreEqual(elements[NonNull], elements[i],
seenPhi0, seenPhi1) == false) {
return NULL;
}
}
return elements[NonNull];
}
// Catch a pattern of broadcast implemented as InsertElement + Shuffle:
// %broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0
// %broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
@@ -663,14 +714,12 @@ LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
llvm::dyn_cast<llvm::ConstantInt>(ie->getOperand(2));
if (ci->isZero()) {
for (int i = 0; i < vectorWidth; ++i) {
elements[i] = ie->getOperand(1);
}
return;
return ie->getOperand(1);
}
}
}
}
return NULL;
}
@@ -726,12 +775,10 @@ lIsExactMultiple(llvm::Value *val, int baseValue, int vectorLength,
if (llvm::isa<llvm::InsertElementInst>(val) ||
llvm::isa<llvm::ShuffleVectorInst>(val)) {
llvm::Value *elts[ISPC_MAX_NVEC];
LLVMFlattenInsertChain(val, g->target->getVectorWidth(), elts);
llvm::Value *element = LLVMFlattenInsertChain(val, g->target->getVectorWidth());
// We just need to check the scalar first value, since we know that
// all elements are equal
return lIsExactMultiple(elts[0], baseValue, vectorLength,
seenPhis);
return lIsExactMultiple(element, baseValue, vectorLength, seenPhis);
}
llvm::PHINode *phi = llvm::dyn_cast<llvm::PHINode>(val);
@@ -995,32 +1042,7 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
llvm::InsertElementInst *ie = llvm::dyn_cast<llvm::InsertElementInst>(v);
if (ie != NULL) {
llvm::Value *elements[ISPC_MAX_NVEC];
LLVMFlattenInsertChain(ie, vectorLength, elements);
// We will ignore any values of elements[] that are NULL; as they
// correspond to undefined values--we just want to see if all of
// the defined values have the same value.
int lastNonNull = 0;
while (lastNonNull < vectorLength && elements[lastNonNull] == NULL)
++lastNonNull;
if (lastNonNull == vectorLength)
// all of them are undef!
return true;
for (int i = lastNonNull; i < vectorLength; ++i) {
if (elements[i] == NULL)
continue;
std::vector<llvm::PHINode *> seenPhi0;
std::vector<llvm::PHINode *> seenPhi1;
if (lValuesAreEqual(elements[lastNonNull], elements[i], seenPhi0,
seenPhi1) == false)
return false;
lastNonNull = i;
}
return true;
return (LLVMFlattenInsertChain(ie, vectorLength) != NULL);
}
llvm::PHINode *phi = llvm::dyn_cast<llvm::PHINode>(v);
@@ -1472,9 +1494,7 @@ lExtractFirstVectorElement(llvm::Value *v,
// flatten them out and grab the value for the first one.
if (llvm::isa<llvm::InsertElementInst>(v) ||
llvm::isa<llvm::ShuffleVectorInst>(v)) {
llvm::Value *elements[ISPC_MAX_NVEC];
LLVMFlattenInsertChain(v, vt->getNumElements(), elements);
return elements[0];
return LLVMFlattenInsertChain(v, vt->getNumElements(), false);
}
// Worst case, for everything else, just do a regular extract element

View File

@@ -269,9 +269,13 @@ extern bool LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts);
%broadcast_init.0 = insertelement <4 x i32> undef, i32 %val, i32 0
%broadcast.1 = shufflevector <4 x i32> %smear.0, <4 x i32> undef,
<4 x i32> zeroinitializer
Function returns:
Compare all elements and return one of them if all are equal, otherwise NULL.
If compare argument is false, don't do compare and return first element instead.
If undef argument is true, ignore undef elements (but all undef yields NULL anyway).
*/
extern void LLVMFlattenInsertChain(llvm::Value *inst, int vectorWidth,
llvm::Value **elements);
extern llvm::Value * LLVMFlattenInsertChain (llvm::Value *inst, int vectorWidth,
bool compare = true, bool undef = true);
/** This is a utility routine for debugging that dumps out the given LLVM
value as well as (recursively) all of the other values that it depends

27
opt.cpp
View File

@@ -505,7 +505,11 @@ Optimize(llvm::Module *module, int optLevel) {
optPM.add(llvm::createCFGSimplificationPass());
optPM.add(llvm::createArgumentPromotionPass());
#if defined(LLVM_3_1) || defined(LLVM_3_2) || defined(LLVM_3_3)
// Starting from 3.4 this functionality was moved to
// InstructionCombiningPass. See r184459 for details.
optPM.add(llvm::createSimplifyLibCallsPass());
#endif
optPM.add(llvm::createInstructionCombiningPass());
optPM.add(llvm::createJumpThreadingPass());
optPM.add(llvm::createCFGSimplificationPass());
@@ -1060,23 +1064,18 @@ static llvm::Value *
lGetBasePointer(llvm::Value *v) {
if (llvm::isa<llvm::InsertElementInst>(v) ||
llvm::isa<llvm::ShuffleVectorInst>(v)) {
llvm::Value *elements[ISPC_MAX_NVEC];
LLVMFlattenInsertChain(v, g->target->getVectorWidth(), elements);
// Make sure none of the elements is undefined.
llvm::Value *element = LLVMFlattenInsertChain
(v, g->target->getVectorWidth(), true, false);
// TODO: it's probably ok to allow undefined elements and return
// the base pointer if all of the other elements have the same
// value.
for (int i = 0; i < g->target->getVectorWidth(); ++i)
if (elements[i] == NULL)
return NULL;
// Do all of the elements have the same value?
for (int i = 0; i < g->target->getVectorWidth()-1; ++i)
if (elements[i] != elements[i+1])
return NULL;
return lCheckForActualPointer(elements[0]);
if (element != NULL) {
//all elements are the same and not NULLs
return lCheckForActualPointer(element);
}
else {
return NULL;
}
}
// This case comes up with global/static arrays

View File

@@ -17,6 +17,7 @@ import shlex
import platform
import tempfile
import os.path
import time
# disable fancy error/warning printing with ANSI colors, so grepping for error
# messages doesn't get confused
@@ -52,6 +53,8 @@ parser.add_option('-v', '--verbose', dest='verbose', help='Enable verbose output
parser.add_option('--wrap-exe', dest='wrapexe',
help='Executable to wrap test runs with (e.g. "valgrind")',
default="")
parser.add_option('--time', dest='time', help='Enable time output',
default=False, action="store_true")
(options, args) = parser.parse_args()
@@ -217,19 +220,60 @@ def run_cmds(compile_cmds, run_cmd, filename, expect_failure):
return (0, 0)
def run_test(testname):
def add_prefix(path):
global is_windows
if is_windows:
# On Windows we run tests in tmp dir, so the root is one level up.
# On Windows we run tests in tmp dir, so the root is one level up.
input_prefix = "..\\"
else:
input_prefix = ""
path = input_prefix + path
path = os.path.normpath(path)
return path
def check_test(filename):
prev_arch = False
prev_os = False
done_arch = True
done_os = True
done = True
global is_windows
if is_windows:
oss = "windows"
else:
oss = "linux"
b = buffer(file(add_prefix(filename)).read());
for run in re.finditer('// *rule: run on .*', b):
arch = re.match('.* arch=.*', run.group())
if arch != None:
if re.search(' arch='+options.arch+'$', arch.group()) != None:
prev_arch = True
if re.search(' arch='+options.arch+' ', arch.group()) != None:
prev_arch = True
done_arch = prev_arch
OS = re.match('.* OS=.*', run.group())
if OS != None:
if re.search(' OS='+oss, OS.group()) != None:
prev_os = True
done_os = prev_os
done = done_arch and done_os
for skip in re.finditer('// *rule: skip on .*', b):
if re.search(' arch=' + options.arch + '$', skip.group())!=None:
done = False
if re.search(' arch=' + options.arch + ' ', skip.group())!=None:
done = False
if re.search(' OS=' + oss, skip.group())!=None:
done = False
return done
def run_test(testname):
# testname is a path to the test from the root of ispc dir
# filename is a path to the test from the current dir
# ispc_exe_rel is a relative path to ispc
filename = os.path.normpath(input_prefix + testname)
ispc_exe_rel = os.path.normpath(input_prefix + ispc_exe)
filename = add_prefix(testname)
ispc_exe_rel = add_prefix(ispc_exe)
# is this a test to make sure an error is issued?
want_error = (filename.find("tests_errors") != -1)
@@ -290,8 +334,8 @@ def run_test(testname):
obj_name = "%s.obj" % os.path.basename(filename)
exe_name = "%s.exe" % os.path.basename(filename)
cc_cmd = "%s /I. /I../winstuff /Zi /nologo /DTEST_SIG=%d %stest_static.cpp %s /Fe%s" % \
(options.compiler_exe, match, input_prefix, obj_name, exe_name)
cc_cmd = "%s /I. /I../winstuff /Zi /nologo /DTEST_SIG=%d %s %s /Fe%s" % \
(options.compiler_exe, match, add_prefix("test_static.cpp"), obj_name, exe_name)
if should_fail:
cc_cmd += " /DEXPECT_FAILURE"
else:
@@ -327,7 +371,7 @@ def run_test(testname):
if options.no_opt:
ispc_cmd += " -O0"
if is_generic_target:
ispc_cmd += " --emit-c++ --c++-include-file=%s" % os.path.normpath(input_prefix + options.include_file)
ispc_cmd += " --emit-c++ --c++-include-file=%s" % add_prefix(options.include_file)
# compile the ispc code, make the executable, and run it...
(compile_error, run_error) = run_cmds([ispc_cmd, cc_cmd],
@@ -351,7 +395,7 @@ def run_test(testname):
# pull tests to run from the given queue and run them. Multiple copies of
# this function will be running in parallel across all of the CPU cores of
# the system.
def run_tasks_from_queue(queue, queue_ret, total_tests_arg, max_test_length_arg, counter, mutex):
def run_tasks_from_queue(queue, queue_ret, queue_skip, total_tests_arg, max_test_length_arg, counter, mutex):
if is_windows:
tmpdir = "tmp%d" % os.getpid()
os.mkdir(tmpdir)
@@ -380,14 +424,18 @@ def run_tasks_from_queue(queue, queue_ret, total_tests_arg, max_test_length_arg,
sys.exit(0)
(compile_error, run_error) = run_test(filename)
if compile_error != 0:
compile_error_files += [ filename ]
if run_error != 0:
run_error_files += [ filename ]
if check_test(filename):
(compile_error, run_error) = run_test(filename)
if compile_error != 0:
compile_error_files += [ filename ]
if run_error != 0:
run_error_files += [ filename ]
with mutex:
update_progress(filename, total_tests_arg, counter, max_test_length_arg)
else:
queue_skip.put(filename)
with mutex:
update_progress(filename, total_tests_arg, counter, max_test_length_arg)
task_threads = []
@@ -413,6 +461,7 @@ if __name__ == '__main__':
for x in range(nthreads):
q.put('STOP')
qret = multiprocessing.Queue()
qskip = multiprocessing.Queue()
# need to catch sigint so that we can terminate all of the tasks if
# we're interrupted
@@ -421,9 +470,10 @@ if __name__ == '__main__':
finished_tests_counter = multiprocessing.Value(c_int)
finished_tests_counter_lock = multiprocessing.Lock()
start_time = time.time()
# launch jobs to run tests
for x in range(nthreads):
t = multiprocessing.Process(target=run_tasks_from_queue, args=(q, qret, total_tests, max_test_length, finished_tests_counter, finished_tests_counter_lock))
t = multiprocessing.Process(target=run_tasks_from_queue, args=(q, qret, qskip, total_tests, max_test_length, finished_tests_counter, finished_tests_counter_lock))
task_threads.append(t)
t.start()
@@ -433,11 +483,21 @@ if __name__ == '__main__':
t.join()
sys.stdout.write("\n")
elapsed_time = time.time() - start_time
if options.time:
sys.stdout.write("Elapsed time: %d s\n" % elapsed_time)
while not qret.empty():
(c, r) = qret.get()
compile_error_files += c
run_error_files += r
skip = 0
if qskip.qsize() > 0:
sys.stdout.write("%d / %d tests SKIPPED:\n" % (qskip.qsize(), total_tests))
while not qskip.empty():
sys.stdout.write("\t%s\n" % qskip.get())
if len(compile_error_files) > 0:
compile_error_files.sort()
sys.stdout.write("%d / %d tests FAILED compilation:\n" % (len(compile_error_files), total_tests))

View File

@@ -1,4 +1,5 @@
// Pointer type cast of type "uniform int32 * uniform" to integer type "uniform int32" may lose information.
// rule: run on arch=x86-64
int32 foo(int * uniform x) {
return (int32) x;