Merged Upstream

This commit is contained in:
Jean-Luc Duprat
2012-06-01 11:13:16 -07:00
253 changed files with 10866 additions and 4233 deletions

View File

@@ -2,6 +2,15 @@
# ispc Makefile
#
# If you have your own special version of llvm and/or clang, change
# these variables to match.
LLVM_CONFIG=$(shell which llvm-config)
CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir)
# Add llvm bin to the path so any scripts run will go to the right llvm-config
LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir)
export PATH:=$(LLVM_BIN):$(PATH)
ARCH_OS = $(shell uname)
ifeq ($(ARCH_OS), Darwin)
ARCH_OS2 = "OSX"
@@ -10,10 +19,12 @@ else
endif
ARCH_TYPE = $(shell arch)
ifeq ($(shell llvm-config --version), 3.1svn)
ifeq ($(shell $(LLVM_CONFIG) --version), 3.0)
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs)
else
LLVM_LIBS=-lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker \
-lLLVMArchive -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo \
-lLLVMBitWriter -lLLVMTableGen -lLLVMCBackendInfo \
-lLLVMBitWriter -lLLVMTableGen \
-lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMSelectionDAG \
-lLLVMAsmPrinter -lLLVMX86AsmParser -lLLVMX86Desc -lLLVMX86Info \
-lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCDisassembler -lLLVMMCParser \
@@ -21,19 +32,17 @@ ifeq ($(shell llvm-config --version), 3.1svn)
-lLLVMipa -lLLVMAnalysis -lLLVMMCJIT -lLLVMRuntimeDyld \
-lLLVMExecutionEngine -lLLVMTarget -lLLVMMC -lLLVMObject -lLLVMCore \
-lLLVMSupport
else
LLVM_LIBS=$(shell llvm-config --libs)
endif
CLANG=clang
CLANG_LIBS = -lclangFrontend -lclangDriver \
-lclangSerialization -lclangParse -lclangSema \
-lclangAnalysis -lclangAST -lclangLex -lclangBasic
ifeq ($(shell llvm-config --version), 3.1svn)
ifneq ($(shell $(LLVM_CONFIG) --version), 3.0)
CLANG_LIBS += -lclangEdit
endif
ISPC_LIBS=$(shell llvm-config --ldflags) $(CLANG_LIBS) $(LLVM_LIBS) \
ISPC_LIBS=$(shell $(LLVM_CONFIG) --ldflags) $(CLANG_LIBS) $(LLVM_LIBS) \
-lpthread
ifeq ($(ARCH_OS),Linux)
@@ -44,8 +53,8 @@ ifeq ($(ARCH_OS2),Msys)
ISPC_LIBS += -lshlwapi -limagehlp -lpsapi
endif
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
LLVM_VERSION=LLVM_$(shell llvm-config --version | sed s/\\./_/)
LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//)
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
BUILD_DATE=$(shell date +%Y%m%d)
@@ -53,8 +62,9 @@ BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
CXX=g++
CPP=cpp
OPT=-g3
CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -Wall $(LLVM_VERSION_DEF) \
OPT=-O2
CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -I$(CLANG_INCLUDE) \
-Wall $(LLVM_VERSION_DEF) \
-DBUILD_DATE="\"$(BUILD_DATE)\"" -DBUILD_VERSION="\"$(BUILD_VERSION)\""
LDFLAGS=
@@ -75,7 +85,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
opt.h stmt.h sym.h type.h util.h
TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
generic-16 generic-1
generic-16 generic-32 generic-64 generic-1
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
builtins/dispatch.ll
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
@@ -114,7 +124,7 @@ doxygen:
ispc: print_llvm_src dirs $(OBJS)
@echo Creating ispc executable
@$(CXX) $(LDFLAGS) -o $@ $(OBJS) $(ISPC_LIBS)
@$(CXX) $(OPT) $(LDFLAGS) -o $@ $(OBJS) $(ISPC_LIBS)
objs/%.o: %.cpp
@echo Compiling $<

71
ast.cpp
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2011, Intel Corporation
Copyright (c) 2011-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -32,8 +32,10 @@
*/
/** @file ast.cpp
@brief
*/
@brief General functionality related to abstract syntax trees and
traversal of them.
*/
#include "ast.h"
#include "expr.h"
@@ -53,10 +55,10 @@ ASTNode::~ASTNode() {
// AST
void
AST::AddFunction(Symbol *sym, const std::vector<Symbol *> &args, Stmt *code) {
AST::AddFunction(Symbol *sym, Stmt *code) {
if (sym == NULL)
return;
functions.push_back(new Function(sym, args, code));
functions.push_back(new Function(sym, code));
}
@@ -151,7 +153,7 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
else if ((ls = dynamic_cast<LabeledStmt *>(node)) != NULL)
ls->stmt = (Stmt *)WalkAST(ls->stmt, preFunc, postFunc, data);
else if ((rs = dynamic_cast<ReturnStmt *>(node)) != NULL)
rs->val = (Expr *)WalkAST(rs->val, preFunc, postFunc, data);
rs->expr = (Expr *)WalkAST(rs->expr, preFunc, postFunc, data);
else if ((sl = dynamic_cast<StmtList *>(node)) != NULL) {
std::vector<Stmt *> &sls = sl->stmts;
for (unsigned int i = 0; i < sls.size(); ++i)
@@ -305,19 +307,39 @@ TypeCheck(Stmt *stmt) {
}
struct CostData {
CostData() { cost = foreachDepth = 0; }
int cost;
int foreachDepth;
};
static bool
lCostCallback(ASTNode *node, void *c) {
int *cost = (int *)c;
*cost += node->EstimateCost();
lCostCallbackPre(ASTNode *node, void *d) {
CostData *data = (CostData *)d;
if (dynamic_cast<ForeachStmt *>(node) != NULL)
++data->foreachDepth;
if (data->foreachDepth == 0)
data->cost += node->EstimateCost();
return true;
}
static ASTNode *
lCostCallbackPost(ASTNode *node, void *d) {
CostData *data = (CostData *)d;
if (dynamic_cast<ForeachStmt *>(node) != NULL)
--data->foreachDepth;
return node;
}
int
EstimateCost(ASTNode *root) {
int cost = 0;
WalkAST(root, lCostCallback, NULL, &cost);
return cost;
CostData data;
WalkAST(root, lCostCallbackPre, lCostCallbackPost, &data);
return data.cost;
}
@@ -334,10 +356,10 @@ lCheckAllOffSafety(ASTNode *node, void *data) {
return false;
const Type *type = fce->func->GetType();
const PointerType *pt = dynamic_cast<const PointerType *>(type);
const PointerType *pt = CastType<PointerType>(type);
if (pt != NULL)
type = pt->GetBaseType();
const FunctionType *ftype = dynamic_cast<const FunctionType *>(type);
const FunctionType *ftype = CastType<FunctionType>(type);
Assert(ftype != NULL);
if (ftype->isSafe == false) {
@@ -363,17 +385,22 @@ lCheckAllOffSafety(ASTNode *node, void *data) {
return false;
}
if (g->target.allOffMaskIsSafe == true)
// Don't worry about memory accesses if we have a target that can
// safely run them with the mask all off
return true;
if (dynamic_cast<ForeachStmt *>(node) != NULL) {
// foreach() statements also shouldn't be run with an all-off mask.
// Since they re-establish an 'all on' mask, this would be pretty
// unintuitive. (More generally, it's possibly a little strange to
// allow foreach() in the presence of any non-uniform control
// flow...)
*okPtr = false;
return false;
}
IndexExpr *ie;
if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL && ie->baseExpr != NULL) {
const Type *type = ie->baseExpr->GetType();
if (type == NULL)
return true;
if (dynamic_cast<const ReferenceType *>(type) != NULL)
if (CastType<ReferenceType>(type) != NULL)
type = type->GetReferenceTarget();
ConstExpr *ce = dynamic_cast<ConstExpr *>(ie->index);
@@ -383,16 +410,14 @@ lCheckAllOffSafety(ASTNode *node, void *data) {
return false;
}
const PointerType *pointerType =
dynamic_cast<const PointerType *>(type);
const PointerType *pointerType = CastType<PointerType>(type);
if (pointerType != NULL) {
// pointer[index] -> can't be sure -> not safe
*okPtr = false;
return false;
}
const SequentialType *seqType =
dynamic_cast<const SequentialType *>(type);
const SequentialType *seqType = CastType<SequentialType>(type);
Assert(seqType != NULL);
int nElements = seqType->GetElementCount();
if (nElements == 0) {

5
ast.h
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2011, Intel Corporation
Copyright (c) 2011-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -84,8 +84,7 @@ class AST {
public:
/** Add the AST for a function described by the given declaration
information and source code. */
void AddFunction(Symbol *sym, const std::vector<Symbol *> &args,
Stmt *code);
void AddFunction(Symbol *sym, Stmt *code);
/** Generate LLVM IR for all of the functions into the current
module. */

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -157,7 +157,7 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
static void
lCreateSymbol(const std::string &name, const Type *returnType,
const std::vector<const Type *> &argTypes,
llvm::SmallVector<const Type *, 8> &argTypes,
const llvm::FunctionType *ftype, llvm::Function *func,
SymbolTable *symbolTable) {
SourcePos noPos;
@@ -199,7 +199,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
// bool, so just have a one-off override for that one...
if (g->target.maskBitCount != 1 && name == "__sext_varying_bool") {
const Type *returnType = AtomicType::VaryingInt32;
std::vector<const Type *> argTypes;
llvm::SmallVector<const Type *, 8> argTypes;
argTypes.push_back(AtomicType::VaryingBool);
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
@@ -229,7 +229,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
// Iterate over the arguments and try to find their equivalent ispc
// types. Track if any of the arguments has an integer type.
bool anyIntArgs = false;
std::vector<const Type *> argTypes;
llvm::SmallVector<const Type *, 8> argTypes;
for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
const llvm::Type *llvmArgType = ftype->getParamType(j);
const Type *type = lLLVMTypeToISPCType(llvmArgType, intAsUnsigned);
@@ -291,7 +291,7 @@ lCheckModuleIntrinsics(llvm::Module *module) {
if (!strncmp(funcName.c_str(), "llvm.x86.", 9)) {
llvm::Intrinsic::ID id = (llvm::Intrinsic::ID)func->getIntrinsicID();
Assert(id != 0);
LLVM_TYPE_CONST llvm::Type *intrinsicType =
llvm::Type *intrinsicType =
llvm::Intrinsic::getType(*g->ctx, id);
intrinsicType = llvm::PointerType::get(intrinsicType, 0);
Assert(func->getType() == intrinsicType);
@@ -411,12 +411,16 @@ lSetInternalFunctions(llvm::Module *module) {
"__extract_int64",
"__extract_int8",
"__fastmath",
"__float_to_half_uniform",
"__float_to_half_varying",
"__floatbits_uniform_int32",
"__floatbits_varying_int32",
"__floor_uniform_double",
"__floor_uniform_float",
"__floor_varying_double",
"__floor_varying_float",
"__half_to_float_uniform",
"__half_to_float_varying",
"__insert_int16",
"__insert_int32",
"__insert_int64",
@@ -616,9 +620,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
std::string(linkError);
if (llvm::Linker::LinkModules(module, bcModule,
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
llvm::Linker::DestroySource,
#endif // LLVM_3_0
&linkError))
Error(SourcePos(), "Error linking stdlib bitcode: %s", linkError.c_str());
lSetInternalFunctions(module);
@@ -635,16 +637,36 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
static void
lDefineConstantInt(const char *name, int val, llvm::Module *module,
SymbolTable *symbolTable) {
Symbol *pw =
Symbol *sym =
new Symbol(name, SourcePos(), AtomicType::UniformInt32->GetAsConstType(),
SC_STATIC);
pw->constValue = new ConstExpr(pw->type, val, SourcePos());
LLVM_TYPE_CONST llvm::Type *ltype = LLVMTypes::Int32Type;
sym->constValue = new ConstExpr(sym->type, val, SourcePos());
llvm::Type *ltype = LLVMTypes::Int32Type;
llvm::Constant *linit = LLVMInt32(val);
pw->storagePtr = new llvm::GlobalVariable(*module, ltype, true,
llvm::GlobalValue::InternalLinkage,
linit, pw->name.c_str());
symbolTable->AddVariable(pw);
// Use WeakODRLinkage rather than InternalLinkage so that a definition
// survives even if it's not used in the module, so that the symbol is
// there in the debugger.
sym->storagePtr = new llvm::GlobalVariable(*module, ltype, true,
llvm::GlobalValue::WeakODRLinkage,
linit, name);
symbolTable->AddVariable(sym);
if (m->diBuilder != NULL) {
llvm::DIFile file;
llvm::DIType diType = sym->type->GetDIType(file);
Assert(diType.Verify());
// FIXME? DWARF says that this (and programIndex below) should
// have the DW_AT_artifical attribute. It's not clear if this
// matters for anything though.
llvm::DIGlobalVariable var =
m->diBuilder->createGlobalVariable(name,
file,
0 /* line */,
diType,
true /* static */,
sym->storagePtr);
Assert(var.Verify());
}
}
@@ -652,7 +674,7 @@ lDefineConstantInt(const char *name, int val, llvm::Module *module,
static void
lDefineConstantIntFunc(const char *name, int val, llvm::Module *module,
SymbolTable *symbolTable) {
std::vector<const Type *> args;
llvm::SmallVector<const Type *, 8> args;
FunctionType *ft = new FunctionType(AtomicType::UniformInt32, args, SourcePos());
Symbol *sym = new Symbol(name, SourcePos(), ft, SC_STATIC);
@@ -670,21 +692,37 @@ lDefineConstantIntFunc(const char *name, int val, llvm::Module *module,
static void
lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
Symbol *pidx =
Symbol *sym =
new Symbol("programIndex", SourcePos(),
AtomicType::VaryingInt32->GetAsConstType(), SC_STATIC);
int pi[ISPC_MAX_NVEC];
for (int i = 0; i < g->target.vectorWidth; ++i)
pi[i] = i;
pidx->constValue = new ConstExpr(pidx->type, pi, SourcePos());
sym->constValue = new ConstExpr(sym->type, pi, SourcePos());
LLVM_TYPE_CONST llvm::Type *ltype = LLVMTypes::Int32VectorType;
llvm::Type *ltype = LLVMTypes::Int32VectorType;
llvm::Constant *linit = LLVMInt32Vector(pi);
pidx->storagePtr = new llvm::GlobalVariable(*module, ltype, true,
llvm::GlobalValue::InternalLinkage, linit,
pidx->name.c_str());
symbolTable->AddVariable(pidx);
// See comment in lDefineConstantInt() for why WeakODRLinkage is used here
sym->storagePtr = new llvm::GlobalVariable(*module, ltype, true,
llvm::GlobalValue::WeakODRLinkage,
linit,
sym->name.c_str());
symbolTable->AddVariable(sym);
if (m->diBuilder != NULL) {
llvm::DIFile file;
llvm::DIType diType = sym->type->GetDIType(file);
Assert(diType.Verify());
llvm::DIGlobalVariable var =
m->diBuilder->createGlobalVariable(sym->name.c_str(),
file,
0 /* line */,
diType,
false /* static */,
sym->storagePtr);
Assert(var.Verify());
}
}
@@ -809,6 +847,20 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
builtins_bitcode_generic_16_length,
module, symbolTable);
break;
case 32:
extern unsigned char builtins_bitcode_generic_32[];
extern int builtins_bitcode_generic_32_length;
AddBitcodeToModule(builtins_bitcode_generic_32,
builtins_bitcode_generic_32_length,
module, symbolTable);
break;
case 64:
extern unsigned char builtins_bitcode_generic_64[];
extern int builtins_bitcode_generic_64_length;
AddBitcodeToModule(builtins_bitcode_generic_64,
builtins_bitcode_generic_64_length,
module, symbolTable);
break;
case 1:
extern unsigned char builtins_bitcode_generic_1[];
extern int builtins_bitcode_generic_1_length;
@@ -841,10 +893,12 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
symbolTable);
lDefineConstantInt("__math_lib_system", (int)Globals::Math_System, module,
symbolTable);
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload, module,
symbolTable);
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload,
module, symbolTable);
lDefineConstantInt("__have_native_half", (g->target.isa == Target::AVX2),
lDefineConstantInt("__have_native_half", g->target.hasHalf, module,
symbolTable);
lDefineConstantInt("__have_native_transcendentals", g->target.hasTranscendentals,
module, symbolTable);
if (includeStdlibISPC) {

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -70,7 +70,7 @@ typedef int Bool;
putchar('['); \
for (int i = 0; i < width; ++i) { \
/* only print the value if the current lane is executing */ \
if (mask & (1<<i)) \
if (mask & (1ull<<i)) \
printf(fmt, ((type *)ptr)[i]); \
else \
printf("((" fmt "))", ((type *)ptr)[i]); \
@@ -89,7 +89,7 @@ typedef int Bool;
@param mask Current lane mask when the print statemnt is called
@param args Array of pointers to the values to be printed
*/
void __do_print(const char *format, const char *types, int width, int mask,
void __do_print(const char *format, const char *types, int width, uint64_t mask,
void **args) {
if (mask == 0)
return;
@@ -113,7 +113,7 @@ void __do_print(const char *format, const char *types, int width, int mask,
case 'B': {
putchar('[');
for (int i = 0; i < width; ++i) {
if (mask & (1<<i))
if (mask & (1ull << i))
printf("%s", ((Bool *)ptr)[i] ? "true" : "false");
else
printf("_________");

View File

@@ -1,4 +1,4 @@
;; Copyright (c) 2010-2011, Intel Corporation
;; Copyright (c) 2010-2012, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
@@ -175,7 +175,7 @@ define <16 x float> @__min_varying_float(<16 x float>,
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
define i32 @__movmsk(<16 x i32>) nounwind readnone alwaysinline {
define i64 @__movmsk(<16 x i32>) nounwind readnone alwaysinline {
%floatmask = bitcast <16 x i32> %0 to <16 x float>
%mask0 = shufflevector <16 x float> %floatmask, <16 x float> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
@@ -186,7 +186,8 @@ define i32 @__movmsk(<16 x i32>) nounwind readnone alwaysinline {
%v1shift = shl i32 %v1, 8
%v = or i32 %v1shift, %v0
ret i32 %v
%v64 = zext i32 %v to i64
ret i64 %v64
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -1,4 +1,4 @@
;; Copyright (c) 2010-2011, Intel Corporation
;; Copyright (c) 2010-2012, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
@@ -175,10 +175,11 @@ define <8 x float> @__min_varying_float(<8 x float>,
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
define i64 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
%floatmask = bitcast <8 x i32> %0 to <8 x float>
%v = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask) nounwind readnone
ret i32 %v
%v64 = zext i32 %v to i64
ret i64 %v64
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -186,14 +186,14 @@ define void @__masked_store_blend_64(<1 x i64>* nocapture, <1 x i64>,
ret void
}
define i32 @__movmsk(<1 x i32>) nounwind readnone alwaysinline {
define i64 @__movmsk(<1 x i32>) nounwind readnone alwaysinline {
%item = extractelement <1 x i32> %0, i32 0
%v = lshr i32 %item, 31
ret i32 %v
%v64 = zext i32 %v to i64
ret i64 %v64
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rounding
;;

View File

@@ -0,0 +1,33 @@
;; Copyright (c) 2010-2012, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions are
;; met:
;;
;; * Redistributions of source code must retain the above copyright
;; notice, this list of conditions and the following disclaimer.
;;
;; * Redistributions in binary form must reproduce the above copyright
;; notice, this list of conditions and the following disclaimer in the
;; documentation and/or other materials provided with the distribution.
;;
;; * Neither the name of Intel Corporation nor the names of its
;; contributors may be used to endorse or promote products derived from
;; this software without specific prior written permission.
;;
;;
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
define(`WIDTH',`32')
include(`target-generic-common.ll')

View File

@@ -0,0 +1,33 @@
;; Copyright (c) 2010-2012, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions are
;; met:
;;
;; * Redistributions of source code must retain the above copyright
;; notice, this list of conditions and the following disclaimer.
;;
;; * Redistributions in binary form must reproduce the above copyright
;; notice, this list of conditions and the following disclaimer in the
;; documentation and/or other materials provided with the distribution.
;;
;; * Neither the name of Intel Corporation nor the names of its
;; contributors may be used to endorse or promote products derived from
;; this software without specific prior written permission.
;;
;;
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
define(`WIDTH',`64')
include(`target-generic-common.ll')

View File

@@ -1,4 +1,4 @@
;; Copyright (c) 2010-2011, Intel Corporation
;; Copyright (c) 2010-2012, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
@@ -39,12 +39,12 @@ reduce_equal(WIDTH)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; broadcast/rotate/shuffle
declare <WIDTH x float> @__smear_float(float) nounwind readnone
declare <WIDTH x double> @__smear_double(double) nounwind readnone
declare <WIDTH x i8> @__smear_i8(i8) nounwind readnone
declare <WIDTH x i16> @__smear_i16(i16) nounwind readnone
declare <WIDTH x i32> @__smear_i32(i32) nounwind readnone
declare <WIDTH x i64> @__smear_i64(i64) nounwind readnone
declare <WIDTH x float> @__smear_float(<WIDTH x float>, float) nounwind readnone
declare <WIDTH x double> @__smear_double(<WIDTH x double>, double) nounwind readnone
declare <WIDTH x i8> @__smear_i8(<WIDTH x i8>, i8) nounwind readnone
declare <WIDTH x i16> @__smear_i16(<WIDTH x i16>, i16) nounwind readnone
declare <WIDTH x i32> @__smear_i32(<WIDTH x i32>, i32) nounwind readnone
declare <WIDTH x i64> @__smear_i64(<WIDTH x i64>, i64) nounwind readnone
declare <WIDTH x float> @__broadcast_float(<WIDTH x float>, i32) nounwind readnone
declare <WIDTH x double> @__broadcast_double(<WIDTH x double>, i32) nounwind readnone
@@ -201,7 +201,7 @@ declare <WIDTH x float> @__svml_pow(<WIDTH x float>, <WIDTH x float>)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; reductions
declare i32 @__movmsk(<WIDTH x i1>) nounwind readnone
declare i64 @__movmsk(<WIDTH x i1>) nounwind readnone
declare float @__reduce_add_float(<WIDTH x float>) nounwind readnone
declare float @__reduce_min_float(<WIDTH x float>) nounwind readnone
@@ -249,7 +249,16 @@ declare void @__masked_store_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
declare void @__masked_store_64(<WIDTH x i64>* nocapture, <WIDTH x i64>,
<WIDTH x i1> %mask) nounwind
ifelse(LLVM_VERSION, `LLVM_3_1svn',`
ifelse(LLVM_VERSION, `LLVM_3_0', `
declare void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
<WIDTH x i1>) nounwind
declare void @__masked_store_blend_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
<WIDTH x i1>) nounwind
declare void @__masked_store_blend_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
<WIDTH x i1>) nounwind
declare void @__masked_store_blend_64(<WIDTH x i64>* nocapture, <WIDTH x i64>,
<WIDTH x i1> %mask) nounwind
', `
define void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
<WIDTH x i1>) nounwind alwaysinline {
%v = load <WIDTH x i8> * %0
@@ -281,15 +290,6 @@ define void @__masked_store_blend_64(<WIDTH x i64>* nocapture,
store <WIDTH x i64> %v1, <WIDTH x i64> * %0
ret void
}
',`
declare void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
<WIDTH x i1>) nounwind
declare void @__masked_store_blend_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
<WIDTH x i1>) nounwind
declare void @__masked_store_blend_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
<WIDTH x i1>) nounwind
declare void @__masked_store_blend_64(<WIDTH x i64>* nocapture, <WIDTH x i64>,
<WIDTH x i1> %mask) nounwind
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;

View File

@@ -1,4 +1,4 @@
;; Copyright (c) 2010-2011, Intel Corporation
;; Copyright (c) 2010-2012, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
@@ -295,7 +295,7 @@ define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
define i64 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
; first do two 4-wide movmsk calls
%floatmask = bitcast <8 x i32> %0 to <8 x float>
%m0 = shufflevector <8 x float> %floatmask, <8 x float> undef,
@@ -309,7 +309,8 @@ define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
; of the second one
%v1s = shl i32 %v1, 4
%v = or i32 %v0, %v1s
ret i32 %v
%v64 = zext i32 %v to i64
ret i64 %v64
}
define <4 x float> @__vec4_add_float(<4 x float> %v0,

View File

@@ -1,4 +1,4 @@
;; Copyright (c) 2010-2011, Intel Corporation
;; Copyright (c) 2010-2012, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
@@ -239,10 +239,11 @@ define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
define i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
define i64 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
%floatmask = bitcast <4 x i32> %0 to <4 x float>
%v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone
ret i32 %v
%v64 = zext i32 %v to i64
ret i64 %v64
}
define float @__reduce_add_float(<4 x float> %v) nounwind readonly alwaysinline {

View File

@@ -1,4 +1,4 @@
;; Copyright (c) 2010-2011, Intel Corporation
;; Copyright (c) 2010-2012, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
@@ -237,7 +237,7 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>,
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
define i64 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
; first do two 4-wide movmsk calls
%floatmask = bitcast <8 x i32> %0 to <8 x float>
%m0 = shufflevector <8 x float> %floatmask, <8 x float> undef,
@@ -251,7 +251,8 @@ define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
; of the second one
%v1s = shl i32 %v1, 4
%v = or i32 %v0, %v1s
ret i32 %v
%v64 = zext i32 %v to i64
ret i64 %v64
}
define float @__reduce_min_float(<8 x float>) nounwind readnone alwaysinline {

View File

@@ -1,4 +1,4 @@
;; Copyright (c) 2010-2011, Intel Corporation
;; Copyright (c) 2010-2012, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
@@ -271,10 +271,11 @@ define <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alway
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
define i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
define i64 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
%floatmask = bitcast <4 x i32> %0 to <4 x float>
%v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone
ret i32 %v
%v64 = zext i32 %v to i64
ret i64 %v64
}
declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone

View File

@@ -1,4 +1,4 @@
;; Copyright (c) 2010-2011, Intel Corporation
;; Copyright (c) 2010-2012, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
@@ -38,6 +38,18 @@ declare i1 @__is_compile_time_constant_uniform_int32(i32)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; It is a bit of a pain to compute this in m4 for 32 and 64-wide targets...
define(`ALL_ON_MASK',
`ifelse(WIDTH, `64', `-1',
WIDTH, `32', `4294967295',
`eval((1<<WIDTH)-1)')')
define(`MASK_HIGH_BIT_ON',
`ifelse(WIDTH, `64', `-9223372036854775808',
WIDTH, `32', `2147483648',
`eval(1<<(WIDTH-1))')')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Helper macro for calling various SSE instructions for scalar values
;; but where the instruction takes a vector parameter.
@@ -1529,7 +1541,7 @@ declare i32 @__fast_masked_vload()
declare i8* @ISPCAlloc(i8**, i64, i32) nounwind
declare void @ISPCLaunch(i8**, i8*, i8*, i32) nounwind
declare void @ISPCSync(i8*) nounwind
declare void @ISPCInstrument(i8*, i8*, i32, i32) nounwind
declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind
declare i1 @__is_compile_time_constant_mask(<WIDTH x MASK> %mask)
declare i1 @__is_compile_time_constant_varying_int32(<WIDTH x i32>)
@@ -1654,6 +1666,265 @@ declare void @__pseudo_scatter_base_offsets64_32(i8 * nocapture, <WIDTH x i64>,
declare void @__pseudo_scatter_base_offsets64_64(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
<WIDTH x i64>, <WIDTH x MASK>) nounwind
declare float @__log_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
declare float @__exp_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
declare float @__pow_uniform_float(float, float) nounwind readnone
declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
declare void @__use8(<WIDTH x i8>)
declare void @__use16(<WIDTH x i16>)
declare void @__use32(<WIDTH x i32>)
declare void @__use64(<WIDTH x i64>)
;; This is a temporary function that will be removed at the end of
;; compilation--the idea is that it calls out to all of the various
;; functions / pseudo-function declarations that we need to keep around
;; so that they are available to the various optimization passes. This
;; then prevents those functions from being removed as dead code when
;; we do early DCE...
define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
<WIDTH x i32> %v32, <WIDTH x i64> %v64,
<WIDTH x MASK> %mask) {
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; loads
%ml8 = call <WIDTH x i8> @__masked_load_8(i8 * %ptr, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %ml8)
%ml16 = call <WIDTH x i16> @__masked_load_16(i8 * %ptr, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %ml16)
%ml32 = call <WIDTH x i32> @__masked_load_32(i8 * %ptr, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %ml32)
%ml64 = call <WIDTH x i64> @__masked_load_64(i8 * %ptr, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %ml64)
%lb8 = call <WIDTH x i8> @__load_and_broadcast_8(i8 * %ptr, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %lb8)
%lb16 = call <WIDTH x i16> @__load_and_broadcast_16(i8 * %ptr, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %lb16)
%lb32 = call <WIDTH x i32> @__load_and_broadcast_32(i8 * %ptr, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %lb32)
%lb64 = call <WIDTH x i64> @__load_and_broadcast_64(i8 * %ptr, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %lb64)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; stores
%pv8 = bitcast i8 * %ptr to <WIDTH x i8> *
call void @__pseudo_masked_store_8(<WIDTH x i8> * %pv8, <WIDTH x i8> %v8,
<WIDTH x MASK> %mask)
%pv16 = bitcast i8 * %ptr to <WIDTH x i16> *
call void @__pseudo_masked_store_16(<WIDTH x i16> * %pv16, <WIDTH x i16> %v16,
<WIDTH x MASK> %mask)
%pv32 = bitcast i8 * %ptr to <WIDTH x i32> *
call void @__pseudo_masked_store_32(<WIDTH x i32> * %pv32, <WIDTH x i32> %v32,
<WIDTH x MASK> %mask)
%pv64 = bitcast i8 * %ptr to <WIDTH x i64> *
call void @__pseudo_masked_store_64(<WIDTH x i64> * %pv64, <WIDTH x i64> %v64,
<WIDTH x MASK> %mask)
call void @__masked_store_8(<WIDTH x i8> * %pv8, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__masked_store_16(<WIDTH x i16> * %pv16, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__masked_store_32(<WIDTH x i32> * %pv32, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__masked_store_64(<WIDTH x i64> * %pv64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__masked_store_blend_8(<WIDTH x i8> * %pv8, <WIDTH x i8> %v8,
<WIDTH x MASK> %mask)
call void @__masked_store_blend_16(<WIDTH x i16> * %pv16, <WIDTH x i16> %v16,
<WIDTH x MASK> %mask)
call void @__masked_store_blend_32(<WIDTH x i32> * %pv32, <WIDTH x i32> %v32,
<WIDTH x MASK> %mask)
call void @__masked_store_blend_64(<WIDTH x i64> * %pv64, <WIDTH x i64> %v64,
<WIDTH x MASK> %mask)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gathers
%pg32_8 = call <WIDTH x i8> @__pseudo_gather32_8(<WIDTH x i32> %v32,
<WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %pg32_8)
%pg32_16 = call <WIDTH x i16> @__pseudo_gather32_16(<WIDTH x i32> %v32,
<WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %pg32_16)
%pg32_32 = call <WIDTH x i32> @__pseudo_gather32_32(<WIDTH x i32> %v32,
<WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %pg32_32)
%pg32_64 = call <WIDTH x i64> @__pseudo_gather32_64(<WIDTH x i32> %v32,
<WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %pg32_64)
%pg64_8 = call <WIDTH x i8> @__pseudo_gather64_8(<WIDTH x i64> %v64,
<WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %pg64_8)
%pg64_16 = call <WIDTH x i16> @__pseudo_gather64_16(<WIDTH x i64> %v64,
<WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %pg64_16)
%pg64_32 = call <WIDTH x i32> @__pseudo_gather64_32(<WIDTH x i64> %v64,
<WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %pg64_32)
%pg64_64 = call <WIDTH x i64> @__pseudo_gather64_64(<WIDTH x i64> %v64,
<WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %pg64_64)
%g32_8 = call <WIDTH x i8> @__gather32_i8(<WIDTH x i32> %v32,
<WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %g32_8)
%g32_16 = call <WIDTH x i16> @__gather32_i16(<WIDTH x i32> %v32,
<WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %g32_16)
%g32_32 = call <WIDTH x i32> @__gather32_i32(<WIDTH x i32> %v32,
<WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %g32_32)
%g32_64 = call <WIDTH x i64> @__gather32_i64(<WIDTH x i32> %v32,
<WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %g32_64)
%g64_8 = call <WIDTH x i8> @__gather64_i8(<WIDTH x i64> %v64,
<WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %g64_8)
%g64_16 = call <WIDTH x i16> @__gather64_i16(<WIDTH x i64> %v64,
<WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %g64_16)
%g64_32 = call <WIDTH x i32> @__gather64_i32(<WIDTH x i64> %v64,
<WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %g64_32)
%g64_64 = call <WIDTH x i64> @__gather64_i64(<WIDTH x i64> %v64,
<WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %g64_64)
%pgbo32_8 = call <WIDTH x i8>
@__pseudo_gather_base_offsets32_8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %pgbo32_8)
%pgbo32_16 = call <WIDTH x i16>
@__pseudo_gather_base_offsets32_16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %pgbo32_16)
%pgbo32_32 = call <WIDTH x i32>
@__pseudo_gather_base_offsets32_32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %pgbo32_32)
%pgbo32_64 = call <WIDTH x i64>
@__pseudo_gather_base_offsets32_64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %pgbo32_64)
%gbo32_8 = call <WIDTH x i8>
@__gather_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %gbo32_8)
%gbo32_16 = call <WIDTH x i16>
@__gather_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %gbo32_16)
%gbo32_32 = call <WIDTH x i32>
@__gather_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %gbo32_32)
%gbo32_64 = call <WIDTH x i64>
@__gather_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %gbo32_64)
%pgbo64_8 = call <WIDTH x i8>
@__pseudo_gather_base_offsets64_8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %pgbo64_8)
%pgbo64_16 = call <WIDTH x i16>
@__pseudo_gather_base_offsets64_16(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %pgbo64_16)
%pgbo64_32 = call <WIDTH x i32>
@__pseudo_gather_base_offsets64_32(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %pgbo64_32)
%pgbo64_64 = call <WIDTH x i64>
@__pseudo_gather_base_offsets64_64(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %pgbo64_64)
%gbo64_8 = call <WIDTH x i8>
@__gather_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %gbo64_8)
%gbo64_16 = call <WIDTH x i16>
@__gather_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %gbo64_16)
%gbo64_32 = call <WIDTH x i32>
@__gather_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %gbo64_32)
%gbo64_64 = call <WIDTH x i64>
@__gather_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %gbo64_64)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; scatters
call void @__pseudo_scatter32_8(<WIDTH x i32> %v32, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__pseudo_scatter32_16(<WIDTH x i32> %v32, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__pseudo_scatter32_32(<WIDTH x i32> %v32, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__pseudo_scatter32_64(<WIDTH x i32> %v32, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__pseudo_scatter64_8(<WIDTH x i64> %v64, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__pseudo_scatter64_16(<WIDTH x i64> %v64, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__pseudo_scatter64_32(<WIDTH x i64> %v64, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__pseudo_scatter64_64(<WIDTH x i64> %v64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__scatter32_i8(<WIDTH x i32> %v32, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__scatter32_i16(<WIDTH x i32> %v32, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__scatter32_i32(<WIDTH x i32> %v32, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__scatter32_i64(<WIDTH x i32> %v32, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__scatter64_i8(<WIDTH x i64> %v64, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__scatter64_i16(<WIDTH x i64> %v64, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__scatter64_i32(<WIDTH x i64> %v64, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__scatter64_i64(<WIDTH x i64> %v64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets32_64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__pseudo_scatter_base_offsets64_64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
call void @__scatter_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
ret void
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; vector ops
@@ -1837,12 +2108,12 @@ ok:
define void @__do_assert_varying(i8 *%str, <WIDTH x MASK> %test,
<WIDTH x MASK> %mask) {
<WIDTH x MASK> %mask) {
%nottest = xor <WIDTH x MASK> %test,
< forloop(i, 1, eval(WIDTH-1), `MASK -1, ') MASK -1 >
%nottest_and_mask = and <WIDTH x MASK> %nottest, %mask
%mm = call i32 @__movmsk(<WIDTH x MASK> %nottest_and_mask)
%all_ok = icmp eq i32 %mm, 0
%mm = call i64 @__movmsk(<WIDTH x MASK> %nottest_and_mask)
%all_ok = icmp eq i64 %mm, 0
br i1 %all_ok, label %ok, label %fail
fail:
@@ -2244,14 +2515,18 @@ define <$1 x $2> @__load_and_broadcast_$3(i8 *, <$1 x MASK> %mask) nounwind alwa
;; $4: alignment for elements of type $2 (4, 8, ...)
define(`masked_load', `
define <$1 x $2> @__masked_load_$3(i8 *, <$1 x i32> %mask) nounwind alwaysinline {
define <$1 x $2> @__masked_load_$3(i8 *, <$1 x MASK> %mask) nounwind alwaysinline {
entry:
%mm = call i32 @__movmsk(<$1 x i32> %mask)
%mm = call i64 @__movmsk(<$1 x MASK> %mask)
; if the first lane and the last lane are on, then it is safe to do a vector load
; of the whole thing--what the lanes in the middle want turns out to not matter...
%mm_and = and i32 %mm, eval(1 | (1<<($1-1)))
%can_vload = icmp eq i32 %mm_and, eval(1 | (1<<($1-1)))
%mm_and_low = and i64 %mm, 1
%mm_and_high = and i64 %mm, MASK_HIGH_BIT_ON
%mm_and_high_shift = lshr i64 %mm_and_high, eval(WIDTH-1)
%mm_and_low_i1 = trunc i64 %mm_and_low to i1
%mm_and_high_shift_i1 = trunc i64 %mm_and_high_shift to i1
%can_vload = and i1 %mm_and_low_i1, %mm_and_high_shift_i1
%fast32 = call i32 @__fast_masked_vload()
%fast_i1 = trunc i32 %fast32 to i1
@@ -2270,9 +2545,10 @@ load:
loop:
; loop over the lanes and see if each one is on...
%lane = phi i32 [ 0, %entry ], [ %next_lane, %lane_done ]
%lanemask = shl i32 1, %lane
%mask_and = and i32 %mm, %lanemask
%do_lane = icmp ne i32 %mask_and, 0
%lane64 = zext i32 %lane to i64
%lanemask = shl i64 1, %lane64
%mask_and = and i64 %mm, %lanemask
%do_lane = icmp ne i64 %mask_and, 0
br i1 %do_lane, label %load_lane, label %lane_done
load_lane:
@@ -2484,12 +2760,12 @@ define(`packed_load_and_store', `
define i32 @__packed_load_active(i32 * %startptr, <WIDTH x i32> * %val_ptr,
<WIDTH x i32> %full_mask) nounwind alwaysinline {
entry:
%mask = call i32 @__movmsk(<WIDTH x i32> %full_mask)
%mask = call i64 @__movmsk(<WIDTH x i32> %full_mask)
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x i32> %full_mask)
br i1 %mask_known, label %known_mask, label %unknown_mask
known_mask:
%allon = icmp eq i32 %mask, eval((1 << WIDTH) -1)
%allon = icmp eq i64 %mask, ALL_ON_MASK
br i1 %allon, label %all_on, label %unknown_mask
all_on:
@@ -2505,12 +2781,12 @@ unknown_mask:
loop:
%lane = phi i32 [ 0, %unknown_mask ], [ %nextlane, %loopend ]
%lanemask = phi i32 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ]
%lanemask = phi i64 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ]
%offset = phi i32 [ 0, %unknown_mask ], [ %nextoffset, %loopend ]
; is the current lane on?
%and = and i32 %mask, %lanemask
%do_load = icmp eq i32 %and, %lanemask
%and = and i64 %mask, %lanemask
%do_load = icmp eq i64 %and, %lanemask
br i1 %do_load, label %load, label %loopend
load:
@@ -2525,7 +2801,7 @@ load:
loopend:
%nextoffset = phi i32 [ %offset1, %load ], [ %offset, %loop ]
%nextlane = add i32 %lane, 1
%nextlanemask = mul i32 %lanemask, 2
%nextlanemask = mul i64 %lanemask, 2
; are we done yet?
%test = icmp ne i32 %nextlane, WIDTH
@@ -2536,14 +2812,14 @@ done:
}
define i32 @__packed_store_active(i32 * %startptr, <WIDTH x i32> %vals,
<WIDTH x i32> %full_mask) nounwind alwaysinline {
<WIDTH x i32> %full_mask) nounwind alwaysinline {
entry:
%mask = call i32 @__movmsk(<WIDTH x i32> %full_mask)
%mask = call i64 @__movmsk(<WIDTH x i32> %full_mask)
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x i32> %full_mask)
br i1 %mask_known, label %known_mask, label %unknown_mask
known_mask:
%allon = icmp eq i32 %mask, eval((1 << WIDTH) -1)
%allon = icmp eq i64 %mask, ALL_ON_MASK
br i1 %allon, label %all_on, label %unknown_mask
all_on:
@@ -2556,12 +2832,12 @@ unknown_mask:
loop:
%lane = phi i32 [ 0, %unknown_mask ], [ %nextlane, %loopend ]
%lanemask = phi i32 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ]
%lanemask = phi i64 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ]
%offset = phi i32 [ 0, %unknown_mask ], [ %nextoffset, %loopend ]
; is the current lane on?
%and = and i32 %mask, %lanemask
%do_store = icmp eq i32 %and, %lanemask
%and = and i64 %mask, %lanemask
%do_store = icmp eq i64 %and, %lanemask
br i1 %do_store, label %store, label %loopend
store:
@@ -2574,7 +2850,7 @@ store:
loopend:
%nextoffset = phi i32 [ %offset1, %store ], [ %offset, %loop ]
%nextlane = add i32 %lane, 1
%nextlanemask = mul i32 %lanemask, 2
%nextlanemask = mul i64 %lanemask, 2
; are we done yet?
%test = icmp ne i32 %nextlane, WIDTH
@@ -2598,14 +2874,15 @@ define(`reduce_equal_aux', `
define i1 @__reduce_equal_$3(<$1 x $2> %v, $2 * %samevalue,
<$1 x MASK> %mask) nounwind alwaysinline {
entry:
%mm = call i32 @__movmsk(<$1 x MASK> %mask)
%allon = icmp eq i32 %mm, eval((1<<$1)-1)
%mm = call i64 @__movmsk(<$1 x MASK> %mask)
%allon = icmp eq i64 %mm, ALL_ON_MASK
br i1 %allon, label %check_neighbors, label %domixed
domixed:
; First, figure out which lane is the first active one
%first = call i32 @llvm.cttz.i32(i32 %mm)
%baseval = extractelement <$1 x $2> %v, i32 %first
%first = call i64 @llvm.cttz.i64(i64 %mm)
%first32 = trunc i64 %first to i32
%baseval = extractelement <$1 x $2> %v, i32 %first32
%basev1 = bitcast $2 %baseval to <1 x $2>
; get a vector that is that value smeared across all elements
%basesmear = shufflevector <1 x $2> %basev1, <1 x $2> undef,
@@ -2636,9 +2913,9 @@ check_neighbors:
%eq = $5 eq <$1 x $2> %vec, %vr
ifelse(MASK,i32, `
%eq32 = sext <$1 x i1> %eq to <$1 x i32>
%eqmm = call i32 @__movmsk(<$1 x i32> %eq32)', `
%eqmm = call i32 @__movmsk(<$1 x MASK> %eq)')
%alleq = icmp eq i32 %eqmm, eval((1<<$1)-1)
%eqmm = call i64 @__movmsk(<$1 x i32> %eq32)', `
%eqmm = call i64 @__movmsk(<$1 x MASK> %eq)')
%alleq = icmp eq i64 %eqmm, ALL_ON_MASK
br i1 %alleq, label %all_equal, label %not_all_equal
', `
; But for 64-bit elements, it turns out to be more efficient to just
@@ -2751,14 +3028,14 @@ define(`per_lane', `
br label %pl_entry
pl_entry:
%pl_mask = call i32 @__movmsk($2)
%pl_mask = call i64 @__movmsk($2)
%pl_mask_known = call i1 @__is_compile_time_constant_mask($2)
br i1 %pl_mask_known, label %pl_known_mask, label %pl_unknown_mask
pl_known_mask:
;; the mask is known at compile time; see if it is something we can
;; handle more efficiently
%pl_is_allon = icmp eq i32 %pl_mask, eval((1<<$1)-1)
%pl_is_allon = icmp eq i64 %pl_mask, ALL_ON_MASK
br i1 %pl_is_allon, label %pl_all_on, label %pl_unknown_mask
pl_all_on:
@@ -2780,11 +3057,11 @@ pl_unknown_mask:
pl_loop:
;; Loop over each lane and see if we want to do the work for this lane
%pl_lane = phi i32 [ 0, %pl_unknown_mask ], [ %pl_nextlane, %pl_loopend ]
%pl_lanemask = phi i32 [ 1, %pl_unknown_mask ], [ %pl_nextlanemask, %pl_loopend ]
%pl_lanemask = phi i64 [ 1, %pl_unknown_mask ], [ %pl_nextlanemask, %pl_loopend ]
; is the current lane on? if so, goto do work, otherwise to end of loop
%pl_and = and i32 %pl_mask, %pl_lanemask
%pl_doit = icmp eq i32 %pl_and, %pl_lanemask
%pl_and = and i64 %pl_mask, %pl_lanemask
%pl_doit = icmp eq i64 %pl_and, %pl_lanemask
br i1 %pl_doit, label %pl_dolane, label %pl_loopend
pl_dolane:
@@ -2795,7 +3072,7 @@ pl_dolane:
pl_loopend:
%pl_nextlane = add i32 %pl_lane, 1
%pl_nextlanemask = mul i32 %pl_lanemask, 2
%pl_nextlanemask = mul i64 %pl_lanemask, 2
; are we done yet?
%pl_test = icmp ne i32 %pl_nextlane, $1
@@ -2880,11 +3157,11 @@ define <$1 x $2> @__gather_base_offsets32_$2(i8 * %ptr, <$1 x i32> %offsets, i32
%newDelta = load <$1 x i32> * %deltaPtr
%ret0 = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr, <$1 x i32> %newOffsets,
i32 %offset_scale, <$1 x i32> %offset_delta,
i32 %offset_scale, <$1 x i32> %newDelta,
<$1 x $2> undef, i32 0)
forloop(lane, 1, eval($1-1),
`patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr,
<$1 x i32> %newOffsets, i32 %offset_scale, <$1 x i32> %offset_delta,
<$1 x i32> %newOffsets, i32 %offset_scale, <$1 x i32> %newDelta,
<$1 x $2> %retPREV, i32 LANE)
', `LANE', lane), `PREV', eval(lane-1))')
ret <$1 x $2> %ret`'eval($1-1)

View File

@@ -12,9 +12,7 @@
//
//===----------------------------------------------------------------------===//
#ifdef LLVM_2_9
#warning "The C++ backend isn't supported when building with LLVM 2.9"
#else
#include <stdio.h>
#ifndef _MSC_VER
#include <inttypes.h>
@@ -339,8 +337,6 @@ namespace {
bool IsVolatile, unsigned Alignment);
private :
std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
void lowerIntrinsics(Function &F);
/// Prints the definition of the intrinsic function F. Supports the
/// intrinsics which need to be explicitly defined in the CBackend.
@@ -363,7 +359,7 @@ namespace {
bool printConstExprCast(const ConstantExpr *CE, bool Static);
void printConstantArray(ConstantArray *CPA, bool Static);
void printConstantVector(ConstantVector *CV, bool Static);
#ifdef LLVM_3_1svn
#ifndef LLVM_3_0
void printConstantDataSequential(ConstantDataSequential *CDS, bool Static);
#endif
@@ -440,11 +436,11 @@ namespace {
void visitInvokeInst(InvokeInst &I) {
llvm_unreachable("Lowerinvoke pass didn't work!");
}
#if !defined(LLVM_3_1) && !defined(LLVM_3_1svn)
#ifdef LLVM_3_0
void visitUnwindInst(UnwindInst &I) {
llvm_unreachable("Lowerinvoke pass didn't work!");
}
#endif // !LLVM_3_1svn
#endif // LLVM_3_0
void visitResumeInst(ResumeInst &I) {
llvm_unreachable("DwarfEHPrepare pass didn't work!");
}
@@ -804,7 +800,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, Type *Ty,
}
void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
#ifndef LLVM_3_1svn
#ifdef LLVM_3_0
Type *ETy = CPA->getType()->getElementType();
// MMP: this looks like a bug: both sides of the || are the same
bool isString = ETy == Type::getInt8Ty(CPA->getContext());
@@ -857,7 +853,7 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
Out << "\"";
return;
}
#endif // !LLVM_3_1
#endif // LLVM_3_0
printConstant(cast<Constant>(CPA->getOperand(0)), Static);
for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
@@ -874,7 +870,7 @@ void CWriter::printConstantVector(ConstantVector *CP, bool Static) {
}
}
#ifdef LLVM_3_1svn
#ifndef LLVM_3_0
void CWriter::printConstantDataSequential(ConstantDataSequential *CDS,
bool Static) {
// As a special case, print the array as a string if it is an array of
@@ -931,7 +927,21 @@ void CWriter::printConstantDataSequential(ConstantDataSequential *CDS,
}
}
}
#endif // LLVM_3_1svn
#endif // !LLVM_3_0
#ifndef LLVM_3_0
static inline std::string ftostr(const APFloat& V) {
std::string Buf;
if (&V.getSemantics() == &APFloat::IEEEdouble) {
raw_string_ostream(Buf) << V.convertToDouble();
return Buf;
} else if (&V.getSemantics() == &APFloat::IEEEsingle) {
raw_string_ostream(Buf) << (double)V.convertToFloat();
return Buf;
}
return "<unknown format in ftostr>"; // error
}
#endif // !LLVM_3_0
// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out
// textually as a double (rather than as a reference to a stack-allocated
@@ -1084,6 +1094,26 @@ bool CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) {
return false;
}
// FIXME: generalize this/make it not so hard-coded?
static const char *lGetSmearFunc(Type *matchType) {
switch (matchType->getTypeID()) {
case Type::FloatTyID: return "__smear_float";
case Type::DoubleTyID: return "__smear_double";
case Type::IntegerTyID: {
switch (cast<IntegerType>(matchType)->getBitWidth()) {
case 1: return "__smear_i1";
case 8: return "__smear_i8";
case 16: return "__smear_i16";
case 32: return "__smear_i32";
case 64: return "__smear_i64";
}
}
default: return NULL;
}
}
// printConstant - The LLVM Constant to C Constant converter.
void CWriter::printConstant(Constant *CPV, bool Static) {
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
@@ -1400,11 +1430,11 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
}
if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) {
printConstantArray(CA, Static);
#ifdef LLVM_3_1svn
#ifndef LLVM_3_0
} else if (ConstantDataSequential *CDS =
dyn_cast<ConstantDataSequential>(CPV)) {
printConstantDataSequential(CDS, Static);
#endif // LLVM_3_1svn
#endif // !LLVM_3_0
} else {
assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
if (AT->getNumElements()) {
@@ -1423,30 +1453,68 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
Out << ")";
break;
}
case Type::VectorTyID:
printType(Out, CPV->getType());
Out << "(";
case Type::VectorTyID: {
VectorType *VT = dyn_cast<VectorType>(CPV->getType());
const char *smearFunc = lGetSmearFunc(VT->getElementType());
if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
printConstantVector(CV, Static);
#ifdef LLVM_3_1svn
} else if (ConstantDataSequential *CDS =
dyn_cast<ConstantDataSequential>(CPV)) {
printConstantDataSequential(CDS, Static);
#endif
} else {
assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
VectorType *VT = cast<VectorType>(CPV->getType());
if (isa<ConstantAggregateZero>(CPV)) {
assert(smearFunc != NULL);
Constant *CZ = Constant::getNullValue(VT->getElementType());
Out << smearFunc << "(";
printType(Out, VT);
Out << "(), ";
printConstant(CZ, Static);
Out << ")";
}
else if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
llvm::Constant *splatValue = CV->getSplatValue();
if (splatValue != NULL && smearFunc != NULL) {
Out << smearFunc << "(";
printType(Out, VT);
Out << "(), ";
printConstant(splatValue, Static);
Out << ")";
}
else {
printType(Out, CPV->getType());
Out << "(";
printConstantVector(CV, Static);
Out << ")";
}
}
#ifndef LLVM_3_0
else if (ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(CPV)) {
llvm::Constant *splatValue = CDV->getSplatValue();
if (splatValue != NULL && smearFunc != NULL) {
Out << smearFunc << "(";
printType(Out, VT);
Out << "(), ";
printConstant(splatValue, Static);
Out << ")";
}
else {
printType(Out, CPV->getType());
Out << "(";
printConstantDataSequential(CDV, Static);
Out << ")";
}
}
#endif // !LLVM_3_0
else {
assert(isa<UndefValue>(CPV));
Constant *CZ = Constant::getNullValue(VT->getElementType());
printType(Out, CPV->getType());
Out << "(";
printConstant(CZ, Static);
for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) {
Out << ", ";
printConstant(CZ, Static);
}
Out << ")";
}
Out << ")";
break;
}
case Type::StructTyID:
if (!Static) {
// call init func...
@@ -1639,7 +1707,12 @@ std::string CWriter::GetValueName(const Value *Operand) {
VarName += ch;
}
return VarName + "_llvm_cbe";
if (isa<BasicBlock>(Operand))
VarName += "_label";
else
VarName += "_";
return VarName;
}
/// writeInstComputationInline - Emit the computation for the specified
@@ -2071,69 +2144,18 @@ bool CWriter::doInitialization(Module &M) {
Out << "#include \"" << includeName << "\"\n";
generateCompilerSpecificCode(Out, TD);
// Function declarations
Out << "\n/* Function Declarations */\n";
Out << "\n/* Basic Library Function Declarations */\n";
Out << "extern \"C\" {\n";
Out << "int puts(unsigned char *);\n";
Out << "unsigned int putchar(unsigned int);\n";
Out << "int fflush(void *);\n";
Out << "int printf(const unsigned char *, ...);\n";
Out << "uint8_t *memcpy(uint8_t *, uint8_t *, uint64_t );\n";
Out << "uint8_t *memset(uint8_t *, uint8_t, uint64_t );\n";
Out << "void memset_pattern16(void *, const void *, uint64_t );\n";
Out << "}\n\n";
// Store the intrinsics which will be declared/defined below.
SmallVector<const Function*, 8> intrinsicsToDefine;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
// Don't print declarations for intrinsic functions.
// Store the used intrinsics, which need to be explicitly defined.
if (I->isIntrinsic()) {
switch (I->getIntrinsicID()) {
default:
break;
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
intrinsicsToDefine.push_back(I);
break;
}
continue;
}
if (I->getName() == "setjmp" || I->getName() == "abort" ||
I->getName() == "longjmp" || I->getName() == "_setjmp" ||
I->getName() == "memset" || I->getName() == "memset_pattern16" ||
I->getName() == "puts" ||
I->getName() == "printf" || I->getName() == "putchar" ||
I->getName() == "fflush" || I->getName() == "malloc" ||
I->getName() == "free")
continue;
// Don't redeclare ispc's own intrinsics
std::string name = I->getName();
if (name.size() > 2 && name[0] == '_' && name[1] == '_')
continue;
if (I->hasExternalWeakLinkage())
Out << "extern ";
printFunctionSignature(I, true);
if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
Out << " __ATTRIBUTE_WEAK__";
if (I->hasExternalWeakLinkage())
Out << " __EXTERNAL_WEAK__";
if (StaticCtors.count(I))
Out << " __ATTRIBUTE_CTOR__";
if (StaticDtors.count(I))
Out << " __ATTRIBUTE_DTOR__";
if (I->hasHiddenVisibility())
Out << " __HIDDEN__";
if (I->hasName() && I->getName()[0] == 1)
Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
Out << ";\n";
}
Out << "}\n";
generateCompilerSpecificCode(Out, TD);
// Provide a definition for `bool' if not compiling with a C++ compiler.
Out << "\n"
@@ -2240,6 +2262,106 @@ bool CWriter::doInitialization(Module &M) {
}
}
// Function declarations
Out << "\n/* Function Declarations */\n";
Out << "extern \"C\" {\n";
// Store the intrinsics which will be declared/defined below.
SmallVector<const Function*, 8> intrinsicsToDefine;
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
// Don't print declarations for intrinsic functions.
// Store the used intrinsics, which need to be explicitly defined.
if (I->isIntrinsic()) {
switch (I->getIntrinsicID()) {
default:
break;
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
intrinsicsToDefine.push_back(I);
break;
}
continue;
}
if (I->getName() == "setjmp" || I->getName() == "abort" ||
I->getName() == "longjmp" || I->getName() == "_setjmp" ||
I->getName() == "memset" || I->getName() == "memset_pattern16" ||
I->getName() == "puts" ||
I->getName() == "printf" || I->getName() == "putchar" ||
I->getName() == "fflush" || I->getName() == "malloc" ||
I->getName() == "free")
continue;
// Don't redeclare ispc's own intrinsics
std::string name = I->getName();
if (name.size() > 2 && name[0] == '_' && name[1] == '_')
continue;
if (I->hasExternalWeakLinkage())
Out << "extern ";
printFunctionSignature(I, true);
if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
Out << " __ATTRIBUTE_WEAK__";
if (I->hasExternalWeakLinkage())
Out << " __EXTERNAL_WEAK__";
if (StaticCtors.count(I))
Out << " __ATTRIBUTE_CTOR__";
if (StaticDtors.count(I))
Out << " __ATTRIBUTE_DTOR__";
if (I->hasHiddenVisibility())
Out << " __HIDDEN__";
if (I->hasName() && I->getName()[0] == 1)
Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
Out << ";\n";
}
Out << "}\n\n";
if (!M.empty())
Out << "\n\n/* Function Bodies */\n";
// Emit some helper functions for dealing with FCMP instruction's
// predicates
Out << "template <typename A, typename B> static inline int llvm_fcmp_ord(A X, B Y) { ";
Out << "return X == X && Y == Y; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_uno(A X, B Y) { ";
Out << "return X != X || Y != Y; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_ueq(A X, B Y) { ";
Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_une(A X, B Y) { ";
Out << "return X != Y; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_ult(A X, B Y) { ";
Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_ugt(A X, B Y) { ";
Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_ule(A X, B Y) { ";
Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_uge(A X, B Y) { ";
Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_oeq(A X, B Y) { ";
Out << "return X == Y ; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_one(A X, B Y) { ";
Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_olt(A X, B Y) { ";
Out << "return X < Y ; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_ogt(A X, B Y) { ";
Out << "return X > Y ; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_ole(A X, B Y) { ";
Out << "return X <= Y ; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_oge(A X, B Y) { ";
Out << "return X >= Y ; }\n";
Out << "template <typename A> A *Memset(A *ptr, int count, size_t len) { ";
Out << "return (A *)memset(ptr, count, len); }\n";
// Emit definitions of the intrinsics.
for (SmallVector<const Function*, 8>::const_iterator
I = intrinsicsToDefine.begin(),
E = intrinsicsToDefine.end(); I != E; ++I) {
printIntrinsicDefinition(**I, Out);
}
// Output the global variable definitions and contents...
if (!M.global_empty()) {
Out << "\n\n/* Global Variable Definitions and Initialization */\n";
@@ -2303,49 +2425,6 @@ bool CWriter::doInitialization(Module &M) {
}
}
if (!M.empty())
Out << "\n\n/* Function Bodies */\n";
// Emit some helper functions for dealing with FCMP instruction's
// predicates
Out << "template <typename A, typename B> static inline int llvm_fcmp_ord(A X, B Y) { ";
Out << "return X == X && Y == Y; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_uno(A X, B Y) { ";
Out << "return X != X || Y != Y; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_ueq(A X, B Y) { ";
Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_une(A X, B Y) { ";
Out << "return X != Y; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_ult(A X, B Y) { ";
Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_ugt(A X, B Y) { ";
Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_ule(A X, B Y) { ";
Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_uge(A X, B Y) { ";
Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_oeq(A X, B Y) { ";
Out << "return X == Y ; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_one(A X, B Y) { ";
Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_olt(A X, B Y) { ";
Out << "return X < Y ; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_ogt(A X, B Y) { ";
Out << "return X > Y ; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_ole(A X, B Y) { ";
Out << "return X <= Y ; }\n";
Out << "template <typename A, typename B> static inline int llvm_fcmp_oge(A X, B Y) { ";
Out << "return X >= Y ; }\n";
Out << "template <typename A> A *Memset(A *ptr, int count, size_t len) { ";
Out << "return (A *)memset(ptr, count, len); }\n";
// Emit definitions of the intrinsics.
for (SmallVector<const Function*, 8>::const_iterator
I = intrinsicsToDefine.begin(),
E = intrinsicsToDefine.end(); I != E; ++I) {
printIntrinsicDefinition(**I, Out);
}
return false;
}
@@ -2823,17 +2902,17 @@ void CWriter::visitSwitchInst(SwitchInst &SI) {
printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
Out << ";\n";
#ifdef LLVM_3_1svn
for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
ConstantInt* CaseVal = i.getCaseValue();
BasicBlock* Succ = i.getCaseSuccessor();
#else
#ifdef LLVM_3_0
// Skip the first item since that's the default case.
unsigned NumCases = SI.getNumCases();
for (unsigned i = 1; i < NumCases; ++i) {
ConstantInt* CaseVal = SI.getCaseValue(i);
BasicBlock* Succ = SI.getSuccessor(i);
#endif // LLVM_3_1svn
#else
for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
ConstantInt* CaseVal = i.getCaseValue();
BasicBlock* Succ = i.getCaseSuccessor();
#endif // !LLVM_3_0
Out << " case ";
writeOperand(CaseVal);
Out << ":\n";
@@ -3401,6 +3480,7 @@ void CWriter::lowerIntrinsics(Function &F) {
case Intrinsic::ppc_altivec_lvsl:
case Intrinsic::uadd_with_overflow:
case Intrinsic::sadd_with_overflow:
case Intrinsic::trap:
// We directly implement these intrinsics
break;
default:
@@ -3568,7 +3648,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
// If this is an intrinsic that directly corresponds to a GCC
// builtin, we emit it here.
const char *BuiltinName = "";
#ifdef LLVM_3_0
Function *F = I.getCalledFunction();
#endif // LLVM_3_0
#define GET_GCC_BUILTIN_NAME
#include "llvm/Intrinsics.gen"
#undef GET_GCC_BUILTIN_NAME
@@ -3711,184 +3793,17 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
writeOperand(I.getArgOperand(1));
Out << ")";
return true;
case Intrinsic::trap:
Out << "abort()";
return true;
}
}
//This converts the llvm constraint string to something gcc is expecting.
//TODO: work out platform independent constraints and factor those out
// of the per target tables
// handle multiple constraint codes
std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle");
// Grab the translation table from MCAsmInfo if it exists.
const MCAsmInfo *TargetAsm;
std::string Triple = TheModule->getTargetTriple();
if (Triple.empty())
#if defined(LLVM_3_1) || defined(LLVM_3_1svn)
Triple = llvm::sys::getDefaultTargetTriple();
#else
Triple = llvm::sys::getHostTriple();
#endif
std::string E;
if (const llvm::Target *Match = TargetRegistry::lookupTarget(Triple, E))
TargetAsm = Match->createMCAsmInfo(Triple);
else
return c.Codes[0];
const char *const *table = TargetAsm->getAsmCBE();
// Search the translation table if it exists.
for (int i = 0; table && table[i]; i += 2)
if (c.Codes[0] == table[i]) {
delete TargetAsm;
return table[i+1];
}
// Default is identity.
delete TargetAsm;
return c.Codes[0];
}
//TODO: import logic from AsmPrinter.cpp
static std::string gccifyAsm(std::string asmstr) {
for (std::string::size_type i = 0; i != asmstr.size(); ++i)
if (asmstr[i] == '\n')
asmstr.replace(i, 1, "\\n");
else if (asmstr[i] == '\t')
asmstr.replace(i, 1, "\\t");
else if (asmstr[i] == '$') {
if (asmstr[i + 1] == '{') {
std::string::size_type a = asmstr.find_first_of(':', i + 1);
std::string::size_type b = asmstr.find_first_of('}', i + 1);
std::string n = "%" +
asmstr.substr(a + 1, b - a - 1) +
asmstr.substr(i + 2, a - i - 2);
asmstr.replace(i, b - i + 1, n);
i += n.size() - 1;
} else
asmstr.replace(i, 1, "%");
}
else if (asmstr[i] == '%')//grr
{ asmstr.replace(i, 1, "%%"); ++i;}
return asmstr;
}
//TODO: assumptions about what consume arguments from the call are likely wrong
// handle communitivity
void CWriter::visitInlineAsm(CallInst &CI) {
InlineAsm* as = cast<InlineAsm>(CI.getCalledValue());
InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints();
std::vector<std::pair<Value*, int> > ResultVals;
if (CI.getType() == Type::getVoidTy(CI.getContext()))
;
else if (StructType *ST = dyn_cast<StructType>(CI.getType())) {
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
ResultVals.push_back(std::make_pair(&CI, (int)i));
} else {
ResultVals.push_back(std::make_pair(&CI, -1));
}
// Fix up the asm string for gcc and emit it.
Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n";
Out << " :";
unsigned ValueCount = 0;
bool IsFirst = true;
// Convert over all the output constraints.
for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
E = Constraints.end(); I != E; ++I) {
if (I->Type != InlineAsm::isOutput) {
++ValueCount;
continue; // Ignore non-output constraints.
}
assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
std::string C = InterpretASMConstraint(*I);
if (C.empty()) continue;
if (!IsFirst) {
Out << ", ";
IsFirst = false;
}
// Unpack the dest.
Value *DestVal;
int DestValNo = -1;
if (ValueCount < ResultVals.size()) {
DestVal = ResultVals[ValueCount].first;
DestValNo = ResultVals[ValueCount].second;
} else
DestVal = CI.getArgOperand(ValueCount-ResultVals.size());
if (I->isEarlyClobber)
C = "&"+C;
Out << "\"=" << C << "\"(" << GetValueName(DestVal);
if (DestValNo != -1)
Out << ".field" << DestValNo; // Multiple retvals.
Out << ")";
++ValueCount;
}
// Convert over all the input constraints.
Out << "\n :";
IsFirst = true;
ValueCount = 0;
for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
E = Constraints.end(); I != E; ++I) {
if (I->Type != InlineAsm::isInput) {
++ValueCount;
continue; // Ignore non-input constraints.
}
assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
std::string C = InterpretASMConstraint(*I);
if (C.empty()) continue;
if (!IsFirst) {
Out << ", ";
IsFirst = false;
}
assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size());
Out << "\"" << C << "\"(";
if (!I->isIndirect)
writeOperand(SrcVal);
else
writeOperandDeref(SrcVal);
Out << ")";
}
// Convert over the clobber constraints.
IsFirst = true;
for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
E = Constraints.end(); I != E; ++I) {
if (I->Type != InlineAsm::isClobber)
continue; // Ignore non-input constraints.
assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
std::string C = InterpretASMConstraint(*I);
if (C.empty()) continue;
if (!IsFirst) {
Out << ", ";
IsFirst = false;
}
Out << '\"' << C << '"';
}
Out << ")";
assert(!"Inline assembly not supported");
}
void CWriter::visitAllocaInst(AllocaInst &I) {
@@ -4240,14 +4155,14 @@ void CWriter::visitAtomicCmpXchgInst(AtomicCmpXchgInst &ACXI) {
class SmearCleanupPass : public llvm::BasicBlockPass {
public:
SmearCleanupPass(llvm::Module *m, int width)
SmearCleanupPass(Module *m, int width)
: BasicBlockPass(ID) { module = m; vectorWidth = width; }
const char *getPassName() const { return "Smear Cleanup Pass"; }
bool runOnBasicBlock(llvm::BasicBlock &BB);
static char ID;
llvm::Module *module;
Module *module;
int vectorWidth;
};
@@ -4303,41 +4218,28 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
assert(toMatch != NULL);
{
// FIXME: generalize this/make it not so hard-coded?
Type *matchType = toMatch->getType();
const char *smearFuncName = NULL;
switch (matchType->getTypeID()) {
case Type::FloatTyID: smearFuncName = "__smear_float"; break;
case Type::DoubleTyID: smearFuncName = "__smear_double"; break;
case Type::IntegerTyID: {
switch (cast<IntegerType>(matchType)->getBitWidth()) {
case 8: smearFuncName = "__smear_i8"; break;
case 16: smearFuncName = "__smear_i16"; break;
case 32: smearFuncName = "__smear_i32"; break;
case 64: smearFuncName = "__smear_i64"; break;
}
}
default: break;
}
const char *smearFuncName = lGetSmearFunc(matchType);
if (smearFuncName != NULL) {
Function *smearFunc = module->getFunction(smearFuncName);
if (smearFunc == NULL) {
Constant *sf =
module->getOrInsertFunction(smearFuncName, iter->getType(),
matchType, NULL);
iter->getType(), matchType, NULL);
smearFunc = dyn_cast<Function>(sf);
assert(smearFunc != NULL);
smearFunc->setDoesNotThrow(true);
smearFunc->setDoesNotAccessMemory(true);
}
llvm::Value *undefResult = llvm::UndefValue::get(vt);
assert(smearFunc != NULL);
Value *args[1] = { toMatch };
ArrayRef<llvm::Value *> argArray(&args[0], &args[1]);
Value *args[2] = { undefResult, toMatch };
ArrayRef<llvm::Value *> argArray(&args[0], &args[2]);
Instruction *smearCall =
CallInst::Create(smearFunc, argArray, "smear", (Instruction *)NULL);
CallInst::Create(smearFunc, argArray, LLVMGetName(toMatch, "_smear"),
(Instruction *)NULL);
ReplaceInstWithInst(iter, smearCall);
@@ -4401,6 +4303,155 @@ BitcastCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
return modifiedAny;
}
///////////////////////////////////////////////////////////////////////////
// MaskOpsCleanupPass
/** This pass does various peephole improvements to mask modification
operations. In particular, it converts mask XORs with "all true" to
calls to __not() and replaces operations like and(not(a), b) to
__and_not1(a, b) (and similarly if the second operand has not applied
to it...)
*/
class MaskOpsCleanupPass : public llvm::BasicBlockPass {
public:
MaskOpsCleanupPass(Module *m)
: BasicBlockPass(ID) {
Type *mt = LLVMTypes::MaskType;
// Declare the __not, __and_not1, and __and_not2 functions that we
// expect the target to end up providing.
notFunc =
dyn_cast<Function>(m->getOrInsertFunction("__not", mt, mt, NULL));
assert(notFunc != NULL);
notFunc->addFnAttr(Attribute::NoUnwind);
notFunc->addFnAttr(Attribute::ReadNone);
andNotFuncs[0] =
dyn_cast<Function>(m->getOrInsertFunction("__and_not1", mt, mt, mt,
NULL));
assert(andNotFuncs[0] != NULL);
andNotFuncs[0]->addFnAttr(Attribute::NoUnwind);
andNotFuncs[0]->addFnAttr(Attribute::ReadNone);
andNotFuncs[1] =
dyn_cast<Function>(m->getOrInsertFunction("__and_not2", mt, mt, mt,
NULL));
assert(andNotFuncs[1] != NULL);
andNotFuncs[1]->addFnAttr(Attribute::NoUnwind);
andNotFuncs[1]->addFnAttr(Attribute::ReadNone);
}
const char *getPassName() const { return "MaskOps Cleanup Pass"; }
bool runOnBasicBlock(llvm::BasicBlock &BB);
private:
Value *lGetNotOperand(Value *v) const;
Function *notFunc, *andNotFuncs[2];
static char ID;
};
char MaskOpsCleanupPass::ID = 0;
/** Returns true if the given value is a compile-time constant vector of
i1s with all elements 'true'.
*/
static bool
lIsAllTrue(Value *v) {
if (ConstantVector *cv = dyn_cast<ConstantVector>(v)) {
ConstantInt *ci;
return (cv->getSplatValue() != NULL &&
(ci = dyn_cast<ConstantInt>(cv->getSplatValue())) != NULL &&
ci->isOne());
}
#ifndef LLVM_3_0
if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(v)) {
ConstantInt *ci;
return (cdv->getSplatValue() != NULL &&
(ci = dyn_cast<ConstantInt>(cdv->getSplatValue())) != NULL &&
ci->isOne());
}
#endif
return false;
}
/** Checks to see if the given value is the NOT of some other value. If
so, it returns the operand of the NOT; otherwise returns NULL.
*/
Value *
MaskOpsCleanupPass::lGetNotOperand(Value *v) const {
if (CallInst *ci = dyn_cast<CallInst>(v))
if (ci->getCalledFunction() == notFunc)
// Direct call to __not()
return ci->getArgOperand(0);
if (BinaryOperator *bop = dyn_cast<BinaryOperator>(v))
if (bop->getOpcode() == Instruction::Xor &&
lIsAllTrue(bop->getOperand(1)))
// XOR of all-true vector.
return bop->getOperand(0);
return NULL;
}
bool
MaskOpsCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
bool modifiedAny = false;
restart:
for (BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
BinaryOperator *bop = dyn_cast<BinaryOperator>(&*iter);
if (bop == NULL)
continue;
if (bop->getType() != LLVMTypes::MaskType)
continue;
if (bop->getOpcode() == Instruction::Xor) {
// Check for XOR with all-true values
if (lIsAllTrue(bop->getOperand(1))) {
ArrayRef<Value *> arg(bop->getOperand(0));
CallInst *notCall = CallInst::Create(notFunc, arg,
bop->getName());
ReplaceInstWithInst(iter, notCall);
modifiedAny = true;
goto restart;
}
}
else if (bop->getOpcode() == Instruction::And) {
// Check each of the operands to see if they have NOT applied
// to them.
for (int i = 0; i < 2; ++i) {
if (Value *notOp = lGetNotOperand(bop->getOperand(i))) {
// In notOp we have the target of the NOT operation;
// put it in its appropriate spot in the operand array.
// Copy in the other operand directly.
Value *args[2];
args[i] = notOp;
args[i ^ 1] = bop->getOperand(i ^ 1);
ArrayRef<Value *> argsRef(&args[0], 2);
// Call the appropriate __and_not* function.
CallInst *andNotCall =
CallInst::Create(andNotFuncs[i], argsRef, bop->getName());
ReplaceInstWithInst(iter, andNotCall);
modifiedAny = true;
goto restart;
}
}
}
}
return modifiedAny;
}
//===----------------------------------------------------------------------===//
// External Interface declaration
@@ -4432,6 +4483,7 @@ WriteCXXFile(llvm::Module *module, const char *fn, int vectorWidth,
pm.add(createCFGSimplificationPass()); // clean up after lower invoke.
pm.add(new SmearCleanupPass(module, vectorWidth));
pm.add(new BitcastCleanupPass);
pm.add(new MaskOpsCleanupPass(module));
pm.add(createDeadCodeEliminationPass()); // clean up after smear pass
//CO pm.add(createPrintModulePass(&fos));
pm.add(new CWriter(fos, includeName, vectorWidth));
@@ -4442,5 +4494,3 @@ WriteCXXFile(llvm::Module *module, const char *fn, int vectorWidth,
return true;
}
#endif // LLVM_2_9

View File

@@ -17,7 +17,7 @@ syn keyword ispcStatement cbreak ccontinue creturn launch print reference soa sy
syn keyword ispcConditional cif
syn keyword ispcRepeat cdo cfor cwhile
syn keyword ispcBuiltin programCount programIndex
syn keyword ispcType export int8 int16 int32 int64
syn keyword ispcType export uniform varying int8 int16 int32 int64
" Default highlighting
command -nargs=+ HiLink hi def link <args>

8
contrib/ispc.vim.README Normal file
View File

@@ -0,0 +1,8 @@
To install vim syntax highlighting for ispc files:
1) Copy ispc.vim into ~/.vim/syntax/ispc.vim (create if necessary)
2) Create a filetype for ispc files to correspond to that syntax file
To do this, create and append the following line to ~/.vim/ftdetect/ispc.vim
au BufRead,BufNewFile *.ispc set filetype=ispc

834
ctx.cpp

File diff suppressed because it is too large Load Diff

32
ctx.h
View File

@@ -248,6 +248,10 @@ public:
new basic block that it starts. */
llvm::BasicBlock *GetLabeledBasicBlock(const std::string &label);
/** Returns a vector of all labels in the context. This is
simply the key set of the labelMap */
std::vector<std::string> GetLabels();
/** Called to generate code for 'return' statement; value is the
expression in the return statement (if non-NULL), and
doCoherenceCheck indicates whether instructions should be generated
@@ -272,7 +276,7 @@ public:
llvm::Value *None(llvm::Value *mask);
/** Given a boolean mask value of type LLVMTypes::MaskType, return an
i32 value wherein the i'th bit is on if and only if the i'th lane
i64 value wherein the i'th bit is on if and only if the i'th lane
of the mask is on. */
llvm::Value *LaneMask(llvm::Value *mask);
@@ -338,7 +342,7 @@ public:
/** Emits debugging information for the function parameter represented
by sym. */
void EmitFunctionParameterDebugInfo(Symbol *sym);
void EmitFunctionParameterDebugInfo(Symbol *sym, int parameterNum);
/** @} */
/** @name IR instruction emission
@@ -380,23 +384,23 @@ public:
array, for pointer types). */
llvm::Value *SmearUniform(llvm::Value *value, const char *name = NULL);
llvm::Value *BitCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
llvm::Value *BitCastInst(llvm::Value *value, llvm::Type *type,
const char *name = NULL);
llvm::Value *PtrToIntInst(llvm::Value *value, const char *name = NULL);
llvm::Value *PtrToIntInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
llvm::Value *PtrToIntInst(llvm::Value *value, llvm::Type *type,
const char *name = NULL);
llvm::Value *IntToPtrInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
llvm::Value *IntToPtrInst(llvm::Value *value, llvm::Type *type,
const char *name = NULL);
llvm::Instruction *TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
llvm::Instruction *TruncInst(llvm::Value *value, llvm::Type *type,
const char *name = NULL);
llvm::Instruction *CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
LLVM_TYPE_CONST llvm::Type *type, const char *name = NULL);
llvm::Instruction *FPCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
llvm::Type *type, const char *name = NULL);
llvm::Instruction *FPCastInst(llvm::Value *value, llvm::Type *type,
const char *name = NULL);
llvm::Instruction *SExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
llvm::Instruction *SExtInst(llvm::Value *value, llvm::Type *type,
const char *name = NULL);
llvm::Instruction *ZExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
llvm::Instruction *ZExtInst(llvm::Value *value, llvm::Type *type,
const char *name = NULL);
/** Given two integer-typed values (but possibly one vector and the
@@ -448,7 +452,7 @@ public:
instruction is added at the start of the function in the entry
basic block; if it should be added to the current basic block, then
the atEntryBlock parameter should be false. */
llvm::Value *AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType,
llvm::Value *AllocaInst(llvm::Type *llvmType,
const char *name = NULL, int align = 0,
bool atEntryBlock = true);
@@ -485,7 +489,7 @@ public:
llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
const char *name = NULL);
llvm::PHINode *PhiNode(LLVM_TYPE_CONST llvm::Type *type, int count,
llvm::PHINode *PhiNode(llvm::Type *type, int count,
const char *name = NULL);
llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0,
llvm::Value *val1, const char *name = NULL);
@@ -632,12 +636,12 @@ private:
std::vector<CFInfo *> controlFlowInfo;
/** DIFile object corresponding to the source file where the current
function was defined (used for debugging info0. */
function was defined (used for debugging info). */
llvm::DIFile diFile;
/** DISubprogram corresponding to this function (used for debugging
info). */
llvm::DISubprogram diFunction;
llvm::DISubprogram diSubprogram;
/** These correspond to the current set of nested scopes in the
function. */

484
decl.cpp
View File

@@ -33,7 +33,7 @@
/** @file decl.cpp
@brief Implementations of classes related to turning declarations into
symbols and types.
symbol names and types.
*/
#include "decl.h"
@@ -44,6 +44,7 @@
#include "stmt.h"
#include "expr.h"
#include <stdio.h>
#include <string.h>
#include <set>
static void
@@ -55,6 +56,7 @@ lPrintTypeQualifiers(int typeQualifiers) {
if (typeQualifiers & TYPEQUAL_TASK) printf("task ");
if (typeQualifiers & TYPEQUAL_SIGNED) printf("signed ");
if (typeQualifiers & TYPEQUAL_UNSIGNED) printf("unsigned ");
if (typeQualifiers & TYPEQUAL_EXPORT) printf("export ");
}
@@ -134,7 +136,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
}
if (vectorSize > 0) {
const AtomicType *atomicType = dynamic_cast<const AtomicType *>(retType);
const AtomicType *atomicType = CastType<AtomicType>(retType);
if (atomicType == NULL) {
Error(pos, "Only atomic types (int, float, ...) are legal for vector "
"types.");
@@ -146,7 +148,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
retType = lApplyTypeQualifiers(typeQualifiers, retType, pos);
if (soaWidth > 0) {
const StructType *st = dynamic_cast<const StructType *>(retType);
const StructType *st = CastType<StructType>(retType);
if (st == NULL) {
Error(pos, "Illegal to provide soa<%d> qualifier with non-struct "
@@ -188,7 +190,6 @@ lGetStorageClassName(StorageClass storageClass) {
case SC_NONE: return "";
case SC_EXTERN: return "extern";
case SC_EXTERN_C: return "extern \"C\"";
case SC_EXPORT: return "export";
case SC_STATIC: return "static";
case SC_TYPEDEF: return "typedef";
default: FATAL("Unhandled storage class in lGetStorageClassName");
@@ -217,50 +218,44 @@ Declarator::Declarator(DeclaratorKind dk, SourcePos p)
: pos(p), kind(dk) {
child = NULL;
typeQualifiers = 0;
storageClass = SC_NONE;
arraySize = -1;
sym = NULL;
type = NULL;
initExpr = NULL;
}
void
Declarator::InitFromDeclSpecs(DeclSpecs *ds) {
const Type *t = GetType(ds);
if (t == NULL) {
Assert(m->errorCount > 0);
const Type *baseType = ds->GetBaseType(pos);
InitFromType(baseType, ds);
if (type == NULL) {
AssertPos(pos, m->errorCount > 0);
return;
}
Symbol *sym = GetSymbol();
if (sym != NULL) {
sym->type = t;
sym->storageClass = ds->storageClass;
storageClass = ds->storageClass;
if (ds->declSpecList.size() > 0 &&
CastType<FunctionType>(type) == NULL) {
Error(pos, "__declspec specifiers for non-function type \"%s\" are "
"not used.", type->GetString().c_str());
}
}
Symbol *
Declarator::GetSymbol() const {
// The symbol lives at the last child in the chain, so walk down there
// and return the one there.
const Declarator *d = this;
while (d->child != NULL)
d = d->child;
return d->sym;
}
void
Declarator::Print(int indent) const {
printf("%*cdeclarator: [", indent, ' ');
pos.Print();
lPrintTypeQualifiers(typeQualifiers);
Symbol *sym = GetSymbol();
if (sym != NULL)
printf("%s", sym->name.c_str());
printf("%s ", lGetStorageClassName(storageClass));
if (name.size() > 0)
printf("%s", name.c_str());
else
printf("(null symbol)");
printf("(unnamed)");
printf(", array size = %d", arraySize);
@@ -294,66 +289,26 @@ Declarator::Print(int indent) const {
}
Symbol *
Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *funArgs) {
const FunctionType *type =
dynamic_cast<const FunctionType *>(GetType(ds));
if (type == NULL)
return NULL;
Symbol *declSym = GetSymbol();
Assert(declSym != NULL);
// Get the symbol for the function from the symbol table. (It should
// already have been added to the symbol table by AddGlobal() by the
// time we get here.)
Symbol *funSym = m->symbolTable->LookupFunction(declSym->name.c_str(), type);
if (funSym == NULL)
// May be NULL due to error earlier in compilation
Assert(m->errorCount > 0);
else
funSym->pos = pos;
// Walk down to the declarator for the function. (We have to get past
// the stuff that specifies the function's return type before we get to
// the function's declarator.)
Declarator *d = this;
while (d != NULL && d->kind != DK_FUNCTION)
d = d->child;
Assert(d != NULL);
for (unsigned int i = 0; i < d->functionParams.size(); ++i) {
Symbol *sym = d->GetSymbolForFunctionParameter(i);
if (sym->type == NULL) {
Assert(m->errorCount > 0);
continue;
}
else
sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
funArgs->push_back(sym);
}
if (funSym != NULL)
funSym->type = funSym->type->ResolveUnboundVariability(Variability::Varying);
return funSym;
}
const Type *
Declarator::GetType(const Type *base, DeclSpecs *ds) const {
void
Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
bool hasUniformQual = ((typeQualifiers & TYPEQUAL_UNIFORM) != 0);
bool hasVaryingQual = ((typeQualifiers & TYPEQUAL_VARYING) != 0);
bool isTask = ((typeQualifiers & TYPEQUAL_TASK) != 0);
bool isExported = ((typeQualifiers & TYPEQUAL_EXPORT) != 0);
bool isConst = ((typeQualifiers & TYPEQUAL_CONST) != 0);
if (hasUniformQual && hasVaryingQual) {
Error(pos, "Can't provide both \"uniform\" and \"varying\" qualifiers.");
return NULL;
return;
}
if (kind != DK_FUNCTION && isTask)
if (kind != DK_FUNCTION && isTask) {
Error(pos, "\"task\" qualifier illegal in variable declaration.");
return;
}
if (kind != DK_FUNCTION && isExported) {
Error(pos, "\"export\" qualifier illegal in variable declaration.");
return;
}
Variability variability(Variability::Unbound);
if (hasUniformQual)
@@ -361,91 +316,125 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
else if (hasVaryingQual)
variability = Variability::Varying;
const Type *type = base;
switch (kind) {
case DK_BASE:
if (kind == DK_BASE) {
// All of the type qualifiers should be in the DeclSpecs for the
// base declarator
Assert(typeQualifiers == 0);
Assert(child == NULL);
return type;
case DK_POINTER:
AssertPos(pos, typeQualifiers == 0);
AssertPos(pos, child == NULL);
type = baseType;
}
else if (kind == DK_POINTER) {
/* For now, any pointer to an SOA type gets the slice property; if
we add the capability to declare pointers as slices or not,
we'll want to set this based on a type qualifier here. */
type = new PointerType(type, variability, isConst, type->IsSOAType());
if (child != NULL)
return child->GetType(type, ds);
const Type *ptrType = new PointerType(baseType, variability, isConst,
baseType->IsSOAType());
if (child != NULL) {
child->InitFromType(ptrType, ds);
type = child->type;
name = child->name;
}
else
return type;
break;
case DK_REFERENCE:
if (hasUniformQual)
type = ptrType;
}
else if (kind == DK_REFERENCE) {
if (hasUniformQual) {
Error(pos, "\"uniform\" qualifier is illegal to apply to references.");
if (hasVaryingQual)
return;
}
if (hasVaryingQual) {
Error(pos, "\"varying\" qualifier is illegal to apply to references.");
if (isConst)
return;
}
if (isConst) {
Error(pos, "\"const\" qualifier is to illegal apply to references.");
return;
}
// The parser should disallow this already, but double check.
if (dynamic_cast<const ReferenceType *>(type) != NULL) {
if (CastType<ReferenceType>(baseType) != NULL) {
Error(pos, "References to references are illegal.");
return NULL;
return;
}
type = new ReferenceType(type);
if (child != NULL)
return child->GetType(type, ds);
const Type *refType = new ReferenceType(baseType);
if (child != NULL) {
child->InitFromType(refType, ds);
type = child->type;
name = child->name;
}
else
return type;
break;
case DK_ARRAY:
if (Type::Equal(type, AtomicType::Void)) {
type = refType;
}
else if (kind == DK_ARRAY) {
if (Type::Equal(baseType, AtomicType::Void)) {
Error(pos, "Arrays of \"void\" type are illegal.");
return NULL;
return;
}
if (dynamic_cast<const ReferenceType *>(type)) {
if (CastType<ReferenceType>(baseType)) {
Error(pos, "Arrays of references (type \"%s\") are illegal.",
type->GetString().c_str());
return NULL;
baseType->GetString().c_str());
return;
}
type = new ArrayType(type, arraySize);
if (child)
return child->GetType(type, ds);
const Type *arrayType = new ArrayType(baseType, arraySize);
if (child != NULL) {
child->InitFromType(arrayType, ds);
type = child->type;
name = child->name;
}
else
return type;
break;
case DK_FUNCTION: {
std::vector<const Type *> args;
std::vector<std::string> argNames;
std::vector<ConstExpr *> argDefaults;
std::vector<SourcePos> argPos;
type = arrayType;
}
else if (kind == DK_FUNCTION) {
llvm::SmallVector<const Type *, 8> args;
llvm::SmallVector<std::string, 8> argNames;
llvm::SmallVector<Expr *, 8> argDefaults;
llvm::SmallVector<SourcePos, 8> argPos;
// Loop over the function arguments and store the names, types,
// default values (if any), and source file positions each one in
// the corresponding vector.
for (unsigned int i = 0; i < functionParams.size(); ++i) {
Declaration *d = functionParams[i];
Symbol *sym = GetSymbolForFunctionParameter(i);
if (d->declSpecs->storageClass != SC_NONE)
Error(sym->pos, "Storage class \"%s\" is illegal in "
"function parameter declaration for parameter \"%s\".",
lGetStorageClassName(d->declSpecs->storageClass),
sym->name.c_str());
if (Type::Equal(sym->type, AtomicType::Void)) {
Error(sym->pos, "Parameter with type \"void\" illegal in function "
"parameter list.");
sym->type = NULL;
if (d == NULL) {
AssertPos(pos, m->errorCount > 0);
continue;
}
if (d->declarators.size() == 0) {
// function declaration like foo(float), w/o a name for the
// parameter; wire up a placeholder Declarator for it
d->declarators.push_back(new Declarator(DK_BASE, pos));
d->declarators[0]->InitFromDeclSpecs(d->declSpecs);
}
const ArrayType *at = dynamic_cast<const ArrayType *>(sym->type);
AssertPos(pos, d->declarators.size() == 1);
Declarator *decl = d->declarators[0];
if (decl == NULL || decl->type == NULL) {
AssertPos(pos, m->errorCount > 0);
continue;
}
if (decl->name == "") {
// Give a name to any anonymous parameter declarations
char buf[32];
sprintf(buf, "__anon_parameter_%d", i);
decl->name = buf;
}
decl->type = decl->type->ResolveUnboundVariability(Variability::Varying);
if (d->declSpecs->storageClass != SC_NONE)
Error(decl->pos, "Storage class \"%s\" is illegal in "
"function parameter declaration for parameter \"%s\".",
lGetStorageClassName(d->declSpecs->storageClass),
decl->name.c_str());
if (Type::Equal(decl->type, AtomicType::Void)) {
Error(decl->pos, "Parameter with type \"void\" illegal in function "
"parameter list.");
decl->type = NULL;
}
const ArrayType *at = CastType<ArrayType>(decl->type);
if (at != NULL) {
// As in C, arrays are passed to functions as pointers to
// their element type. We'll just immediately make this
@@ -455,93 +444,94 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
// report this differently than it was originally declared
// in the function, but it's not clear that this is a
// significant problem.)
if (at->GetElementType() == NULL) {
Assert(m->errorCount > 0);
return NULL;
const Type *targetType = at->GetElementType();
if (targetType == NULL) {
AssertPos(pos, m->errorCount > 0);
return;
}
const Type *targetType = at->GetElementType();
targetType =
targetType->ResolveUnboundVariability(Variability::Varying);
sym->type = PointerType::GetUniform(targetType);
decl->type = PointerType::GetUniform(targetType);
// Make sure there are no unsized arrays (other than the
// first dimension) in function parameter lists.
at = dynamic_cast<const ArrayType *>(at->GetElementType());
at = CastType<ArrayType>(targetType);
while (at != NULL) {
if (at->GetElementCount() == 0)
Error(sym->pos, "Arrays with unsized dimensions in "
Error(decl->pos, "Arrays with unsized dimensions in "
"dimensions after the first one are illegal in "
"function parameter lists.");
at = dynamic_cast<const ArrayType *>(at->GetElementType());
at = CastType<ArrayType>(at->GetElementType());
}
}
args.push_back(sym->type);
argNames.push_back(sym->name);
argPos.push_back(sym->pos);
args.push_back(decl->type);
argNames.push_back(decl->name);
argPos.push_back(decl->pos);
ConstExpr *init = NULL;
if (d->declarators.size()) {
// Try to find an initializer expression; if there is one,
// it lives down to the base declarator.
Declarator *decl = d->declarators[0];
while (decl->child != NULL) {
Assert(decl->initExpr == NULL);
Expr *init = NULL;
// Try to find an initializer expression.
while (decl != NULL) {
if (decl->initExpr != NULL) {
decl->initExpr = TypeCheck(decl->initExpr);
decl->initExpr = Optimize(decl->initExpr);
if (decl->initExpr != NULL) {
init = dynamic_cast<ConstExpr *>(decl->initExpr);
if (init == NULL)
init = dynamic_cast<NullPointerExpr *>(decl->initExpr);
if (init == NULL)
Error(decl->initExpr->pos, "Default value for parameter "
"\"%s\" must be a compile-time constant.",
decl->name.c_str());
}
break;
}
else
decl = decl->child;
}
if (decl->initExpr != NULL &&
(decl->initExpr = TypeCheck(decl->initExpr)) != NULL &&
(decl->initExpr = Optimize(decl->initExpr)) != NULL &&
(init = dynamic_cast<ConstExpr *>(decl->initExpr)) == NULL) {
Error(decl->initExpr->pos, "Default value for parameter "
"\"%s\" must be a compile-time constant.",
sym->name.c_str());
}
}
argDefaults.push_back(init);
}
const Type *returnType = type;
const Type *returnType = baseType;
if (returnType == NULL) {
Error(pos, "No return type provided in function declaration.");
return NULL;
return;
}
if (dynamic_cast<const FunctionType *>(returnType) != NULL) {
if (CastType<FunctionType>(returnType) != NULL) {
Error(pos, "Illegal to return function type from function.");
return NULL;
return;
}
bool isExported = ds && (ds->storageClass == SC_EXPORT);
returnType = returnType->ResolveUnboundVariability(Variability::Varying);
bool isExternC = ds && (ds->storageClass == SC_EXTERN_C);
bool isExported = ds && ((ds->typeQualifiers & TYPEQUAL_EXPORT) != 0);
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
if (isExported && isTask) {
Error(pos, "Function can't have both \"task\" and \"export\" "
"qualifiers");
return NULL;
return;
}
if (isExternC && isTask) {
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
"qualifiers");
return NULL;
return;
}
if (isExternC && isExported) {
Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" "
"qualifiers");
return NULL;
return;
}
if (child == NULL) {
Assert(m->errorCount > 0);
return NULL;
AssertPos(pos, m->errorCount > 0);
return;
}
const FunctionType *functionType =
new FunctionType(returnType, args, argNames, argDefaults,
argPos, isTask, isExported, isExternC);
functionType = functionType->ResolveUnboundVariability(Variability::Varying);
// handle any explicit __declspecs on the function
if (ds != NULL) {
@@ -563,60 +553,12 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
}
}
return child->GetType(functionType, ds);
}
default:
FATAL("Unexpected decl kind");
return NULL;
child->InitFromType(functionType, ds);
type = child->type;
name = child->name;
}
}
const Type *
Declarator::GetType(DeclSpecs *ds) const {
const Type *baseType = ds->GetBaseType(pos);
const Type *type = GetType(baseType, ds);
if (ds->declSpecList.size() > 0 &&
type != NULL &&
dynamic_cast<const FunctionType *>(type) == NULL) {
Error(pos, "__declspec specifiers for non-function type \"%s\" are "
"not used.", type->GetString().c_str());
}
return type;
}
Symbol *
Declarator::GetSymbolForFunctionParameter(int paramNum) const {
Assert(paramNum < (int)functionParams.size());
Declaration *d = functionParams[paramNum];
char buf[32];
Symbol *sym;
if (d->declarators.size() == 0) {
// function declaration like foo(float), w/o a name for
// the parameter
sprintf(buf, "__anon_parameter_%d", paramNum);
sym = new Symbol(buf, pos);
sym->type = d->declSpecs->GetBaseType(pos);
}
else {
Assert(d->declarators.size() == 1);
sym = d->declarators[0]->GetSymbol();
if (sym == NULL) {
// Handle more complex anonymous declarations like
// float (float **).
sprintf(buf, "__anon_parameter_%d", paramNum);
sym = new Symbol(buf, d->declarators[0]->pos);
sym->type = d->declarators[0]->GetType(d->declSpecs);
}
}
return sym;
}
///////////////////////////////////////////////////////////////////////////
// Declaration
@@ -646,27 +588,23 @@ Declaration::GetVariableDeclarations() const {
for (unsigned int i = 0; i < declarators.size(); ++i) {
Declarator *decl = declarators[i];
if (decl == NULL) {
if (decl == NULL || decl->type == NULL) {
// Ignore earlier errors
Assert(m->errorCount > 0);
continue;
}
Symbol *sym = decl->GetSymbol();
if (sym == NULL || sym->type == NULL) {
// Ignore errors
Assert(m->errorCount > 0);
continue;
}
sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
if (Type::Equal(sym->type, AtomicType::Void))
Error(sym->pos, "\"void\" type variable illegal in declaration.");
else if (dynamic_cast<const FunctionType *>(sym->type) == NULL) {
if (Type::Equal(decl->type, AtomicType::Void))
Error(decl->pos, "\"void\" type variable illegal in declaration.");
else if (CastType<FunctionType>(decl->type) == NULL) {
decl->type = decl->type->ResolveUnboundVariability(Variability::Varying);
Symbol *sym = new Symbol(decl->name, decl->pos, decl->type,
decl->storageClass);
m->symbolTable->AddVariable(sym);
vars.push_back(VariableDeclaration(sym, decl->initExpr));
}
}
return vars;
}
@@ -677,25 +615,19 @@ Declaration::DeclareFunctions() {
for (unsigned int i = 0; i < declarators.size(); ++i) {
Declarator *decl = declarators[i];
if (decl == NULL) {
if (decl == NULL || decl->type == NULL) {
// Ignore earlier errors
Assert(m->errorCount > 0);
continue;
}
Symbol *sym = decl->GetSymbol();
if (sym == NULL || sym->type == NULL) {
// Ignore errors
Assert(m->errorCount > 0);
continue;
}
sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
if (dynamic_cast<const FunctionType *>(sym->type) == NULL)
const FunctionType *ftype = CastType<FunctionType>(decl->type);
if (ftype == NULL)
continue;
bool isInline = (declSpecs->typeQualifiers & TYPEQUAL_INLINE);
m->AddFunctionDeclaration(sym, isInline);
m->AddFunctionDeclaration(decl->name, ftype, decl->storageClass,
isInline, decl->pos);
}
}
@@ -709,13 +641,14 @@ Declaration::Print(int indent) const {
declarators[i]->Print(indent+4);
}
///////////////////////////////////////////////////////////////////////////
void
GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
std::vector<const Type *> *elementTypes,
std::vector<std::string> *elementNames,
std::vector<SourcePos> *elementPositions) {
llvm::SmallVector<const Type *, 8> *elementTypes,
llvm::SmallVector<std::string, 8> *elementNames,
llvm::SmallVector<SourcePos, 8> *elementPositions) {
std::set<std::string> seenNames;
for (unsigned int i = 0; i < sd.size(); ++i) {
const Type *type = sd[i]->type;
@@ -725,38 +658,41 @@ GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
// FIXME: making this fake little DeclSpecs here is really
// disgusting
DeclSpecs ds(type);
if (type->IsUniformType())
ds.typeQualifiers |= TYPEQUAL_UNIFORM;
else if (type->IsVaryingType())
ds.typeQualifiers |= TYPEQUAL_VARYING;
if (Type::Equal(type, AtomicType::Void) == false) {
if (type->IsUniformType())
ds.typeQualifiers |= TYPEQUAL_UNIFORM;
else if (type->IsVaryingType())
ds.typeQualifiers |= TYPEQUAL_VARYING;
else if (type->GetSOAWidth() != 0)
ds.soaWidth = type->GetSOAWidth();
// FIXME: ds.vectorSize?
}
for (unsigned int j = 0; j < sd[i]->declarators->size(); ++j) {
Declarator *d = (*sd[i]->declarators)[j];
d->InitFromDeclSpecs(&ds);
Symbol *sym = d->GetSymbol();
if (Type::Equal(sym->type, AtomicType::Void))
if (Type::Equal(d->type, AtomicType::Void))
Error(d->pos, "\"void\" type illegal for struct member.");
const ArrayType *arrayType =
dynamic_cast<const ArrayType *>(sym->type);
if (arrayType != NULL && arrayType->GetElementCount() == 0) {
Error(d->pos, "Unsized arrays aren't allowed in struct "
"definitions.");
elementTypes->push_back(NULL);
}
else
elementTypes->push_back(sym->type);
elementTypes->push_back(d->type);
if (seenNames.find(sym->name) != seenNames.end())
if (seenNames.find(d->name) != seenNames.end())
Error(d->pos, "Struct member \"%s\" has same name as a "
"previously-declared member.", sym->name.c_str());
"previously-declared member.", d->name.c_str());
else
seenNames.insert(sym->name);
seenNames.insert(d->name);
elementNames->push_back(sym->name);
elementPositions->push_back(sym->pos);
elementNames->push_back(d->name);
elementPositions->push_back(d->pos);
}
}
for (int i = 0; i < (int)elementTypes->size() - 1; ++i) {
const ArrayType *arrayType = CastType<ArrayType>((*elementTypes)[i]);
if (arrayType != NULL && arrayType->GetElementCount() == 0)
Error((*elementPositions)[i], "Unsized arrays aren't allowed except "
"for the last member in a struct definition.");
}
}

54
decl.h
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -47,30 +47,21 @@
variables--here, that the declaration has the 'static' and 'uniform'
qualifiers, and that it's basic type is 'int'. Then for each variable
declaration, the Declaraiton class holds an instance of a Declarator,
which in turn records the per-variable information like the symbol
name, array size (if any), initializer expression, etc.
which in turn records the per-variable information like the name, array
size (if any), initializer expression, etc.
*/
#ifndef ISPC_DECL_H
#define ISPC_DECL_H
#include "ispc.h"
#include <llvm/ADT/SmallVector.h>
struct VariableDeclaration;
class Declaration;
class Declarator;
enum StorageClass {
SC_NONE,
SC_EXTERN,
SC_EXPORT,
SC_STATIC,
SC_TYPEDEF,
SC_EXTERN_C
};
/* Multiple qualifiers can be provided with types in declarations;
therefore, they are set up so that they can be ANDed together into an
int. */
@@ -82,6 +73,7 @@ enum StorageClass {
#define TYPEQUAL_SIGNED (1<<4)
#define TYPEQUAL_UNSIGNED (1<<5)
#define TYPEQUAL_INLINE (1<<6)
#define TYPEQUAL_EXPORT (1<<7)
/** @brief Representation of the declaration specifiers in a declaration.
@@ -141,25 +133,11 @@ public:
Declarator(DeclaratorKind dk, SourcePos p);
/** Once a DeclSpecs instance is available, this method completes the
initialization of the Symbol, setting its Type accordingly.
initialization of the type member.
*/
void InitFromDeclSpecs(DeclSpecs *ds);
/** Get the actual type of the combination of Declarator and the given
DeclSpecs. If an explicit base type is provided, the declarator is
applied to that type; otherwise the base type from the DeclSpecs is
used. */
const Type *GetType(DeclSpecs *ds) const;
const Type *GetType(const Type *base, DeclSpecs *ds) const;
/** Returns the symbol corresponding to the function declared by this
declarator and symbols for its arguments in *args. */
Symbol *GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *args);
Symbol *GetSymbolForFunctionParameter(int paramNum) const;
/** Returns the symbol associated with the declarator. */
Symbol *GetSymbol() const;
void InitFromType(const Type *base, DeclSpecs *ds);
void Print(int indent) const;
@@ -180,18 +158,24 @@ public:
/** Type qualifiers provided with the declarator. */
int typeQualifiers;
StorageClass storageClass;
/** For array declarators, this gives the declared size of the array.
Unsized arrays have arraySize == 0. */
int arraySize;
/** Symbol associated with the declarator. */
Symbol *sym;
/** Name associated with the declarator. */
std::string name;
/** Initialization expression for the variable. May be NULL. */
Expr *initExpr;
/** Type of the declarator. This is NULL until InitFromDeclSpecs() or
InitFromType() is called. */
const Type *type;
/** For function declarations, this holds the Declaration *s for the
funciton's parameters. */
function's parameters. */
std::vector<Declaration *> functionParams;
};
@@ -236,8 +220,8 @@ struct StructDeclaration {
/** Given a set of StructDeclaration instances, this returns the types of
the elements of the corresponding struct and their names. */
extern void GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
std::vector<const Type *> *elementTypes,
std::vector<std::string> *elementNames,
std::vector<SourcePos> *elementPositions);
llvm::SmallVector<const Type *, 8> *elementTypes,
llvm::SmallVector<std::string, 8> *elementNames,
llvm::SmallVector<SourcePos, 8> *elementPositions);
#endif // ISPC_DECL_H

View File

@@ -1,3 +1,81 @@
=== v1.2.2 === (20 April 2012)
This release includes a number of small additions to functionality and a
number of bugfixes. New functionality includes:
* It's now possible to forward declare structures as in C/C++: "struct
Foo;". After such a declaration, structs with pointers to "Foo" and
functions that take pointers or references to Foo structs can be declared
without the entire definition of Foo being available.
* New built-in types size_t, ptrdiff_t, and [u]intptr_t are now available,
corresponding to the equivalent types in C.
* The standard library now provides atomic_swap*() and
atomic_compare_exchange*() functions for void * types.
* The C++ backend has seen a number of improvements to the quality and
readability of generated code.
A number of bugs have been fixed in this release as well. The most
significant are:
* Fixed a bug where nested loops could cause a compiler crash in some
circumstances (issues #240, and #229)
* Gathers could access invlaid mamory (and cause the program to crash) in
some circumstances (#235)
* References to temporary values are now handled properly when passed to a
function that takes a reference typed parameter.
* A case where incorrect code could be generated for compile-time-constant
initializers has been fixed (#234).
=== v1.2.1 === (6 April 2012)
This release contains only minor new functionality and is mostly for many
small bugfixes and improvements to error handling and error reporting.
The new functionality that is present is:
* Significantly more efficient versions of the float / half conversion
routines are now available in the standard library, thanks to Fabian
Giesen.
* The last member of a struct can now be a zero-length array; this allows
the trick of dynamically allocating enough storage for the struct and
some number of array elements at the end of it.
Significant bugs fixed include:
* Issue #205: When a target ISA isn't specified, use the host system's
capabilities to choose a target for which it will be able to run the
generated code.
* Issues #215 and #217: Don't allocate storage for global variables that
are declared "extern".
* Issue #197: Allow NULL as a default argument value in a function
declaration.
* Issue #223: Fix bugs where taking the address of a function wouldn't work
as expected.
* Issue #224: When there are overloaded variants of a function that take
both reference and const reference parameters, give the non-const
reference preference when matching values of that underlying type.
* Issue #225: An error is issed when a varying lvalue is assigned to a
reference type (rather than crashing).
* Issue #193: Permit conversions from array types to void *, not just the
pointer type of the underlying array element.
* Issue #199: Still evaluate expressions that are cast to (void).
The documentation has also been improved, with FAQs added to clarify some
aspects of the ispc pointer model.
=== v1.2.0 === (20 March 2012)
This is a major new release of ispc, with a number of significant

View File

@@ -14,12 +14,19 @@ distribution.
+ `Why are there multiple versions of exported ispc functions in the assembly output?`_
+ `How can I more easily see gathers and scatters in generated assembly?`_
* Language Details
+ `What is the difference between "int *foo" and "int foo[]"?`_
+ `Why are pointed-to types "uniform" by default?`_
+ `What am I getting an error about assigning a varying lvalue to a reference type?`_
* Interoperability
+ `How can I supply an initial execution mask in the call from the application?`_
+ `How can I generate a single binary executable with support for multiple instruction sets?`_
+ `How can I determine at run-time which vector instruction set's instructions were selected to execute?`_
+ `Is it possible to inline ispc functions in C/C++ code?`_
+ `Why is it illegal to pass "varying" values from C/C++ to ispc functions?`_
* Programming Techniques
@@ -27,6 +34,7 @@ distribution.
+ `How can a gang of program instances generate variable amounts of output efficiently?`_
+ `Is it possible to use ispc for explicit vector programming?`_
+ `How can I debug my ispc programs using Valgrind?`_
+ `foreach statements generate more complex assembly than I'd expect; what's going on?`_
Understanding ispc's Output
===========================
@@ -213,6 +221,125 @@ easier to understand:
jmp ___pseudo_scatter_base_offsets32_32 ## TAILCALL
Language Details
================
What is the difference between "int \*foo" and "int foo[]"?
-----------------------------------------------------------
In C and C++, declaring a function to take a parameter ``int *foo`` and
``int foo[]`` results in the same type for the parameter. Both are
pointers to integers. In ``ispc``, these are different types. The first
one is a varying pointer to a uniform integer value in memory, while the
second results in a uniform pointer to the start of an array of varying
integer values in memory.
To understand why the first is a varying pointer to a uniform integer,
first recall that types without explicit rate qualifiers (``uniform``,
``varying``, or ``soa<>``) are ``varying`` by default. Second, recall from
the `discussion of pointer types in the ispc User's Guide`_ that pointed-to
types without rate qualifiers are ``uniform`` by default. (This second
rule is discussed further below, in `Why are pointed-to types "uniform" by
default?`_.) The type of ``int *foo`` follows from these.
.. _discussion of pointer types in the ispc User's Guide: ispc.html#pointer-types
Conversely, in a function body, ``int foo[10]`` represents a declaration of
a 10-element array of varying ``int`` values. In that we'd certainly like
to be able to pass such an array to a function that takes a ``int []``
parameter, the natural type for an ``int []`` parameter is a uniform
pointer to varying integer values.
In terms of compatibility with C/C++, it's unfortunate that this
distinction exists, though any other set of rules seems to introduce more
awkwardness than this one. (Though we're interested to hear ideas to
improve these rules!).
Why are pointed-to types "uniform" by default?
----------------------------------------------
In ``ispc``, types without rate qualifiers are "varying" by default, but
types pointed to by pointers without rate qualifiers are "uniform" by
default. Why this difference?
::
int foo; // no rate qualifier, "varying int".
uniform int *foo; // pointer type has no rate qualifier, pointed-to does.
// "varying pointer to uniform int".
int *foo; // neither pointer type nor pointed-to type ("int") have
// rate qualifiers. Pointer type is varying by default,
// pointed-to is uniform. "varying pointer to uniform int".
varying int *foo; // varying pointer to varying int
The first rule, having types without rate qualifiers be varying by default,
is a default that keeps the number of "uniform" or "varying" qualifiers in
``ispc`` programs low. Most ``ispc`` programs use mostly "varying"
variables, so this rule allows most variables to be declared without also
requiring rate qualifiers.
On a related note, this rule allows many C/C++ functions to be used to
define equivalent functions in the SPMD execution model that ``ispc``
provides with little or no modification:
::
// scalar add in C/C++, SPMD/vector add in ispc
int add(int a, int b) { return a + b; }
This motivation also explains why ``uniform int *foo`` represents a varying
pointer; having pointers be varying by default if they don't have rate
qualifiers similarly helps with porting code from C/C++ to ``ispc``.
The tricker issue is why pointed-to types are "uniform" by default. In our
experience, data in memory that is accessed via pointers is most often
uniform; this generally includes all data that has been allocated and
initialized by the C/C++ application code. In practice, "varying" types are
more generally (but not exclusively) used for local data in ``ispc``
functions. Thus, making the pointed-to type uniform by default leads to
more concise code for the most common cases.
What am I getting an error about assigning a varying lvalue to a reference type?
--------------------------------------------------------------------------------
Given code like the following:
::
uniform float a[...];
int index = ...;
float &r = a[index];
``ispc`` issues the error "Initializer for reference-type variable "r" must
have a uniform lvalue type.". The underlying issue stems from how
references are represented in the code generated by ``ispc``. Recall that
``ispc`` supports both uniform and varying pointer types--a uniform pointer
points to the same location in memory for all program instances in the
gang, while a varying pointer allows each program instance to have its own
pointer value.
References are represented a pointer in the code generated by ``ispc``,
though this is generally opaque to the user; in ``ispc``, they are
specifically uniform pointers. This design decision was made so that given
code like this:
::
extern void func(float &val);
float foo = ...;
func(foo);
Then the reference would be handled efficiently as a single pointer, rather
than unnecessarily being turned into a gang-size of pointers.
However, an implication of this decision is that it's not possible for
references to refer to completely different things for each of the program
instances. (And hence the error that is issued). In cases where a unique
per-program-instance pointer is needed, a varying pointer should be used
instead of a reference.
Interoperability
================
@@ -391,6 +518,48 @@ linking your applicaiton.
``-mattr=+avx`` flag to ``llc``.)
Why is it illegal to pass "varying" values from C/C++ to ispc functions?
------------------------------------------------------------------------
If any of the types in the parameter list to an exported function is
"varying" (including recursively, and members of structure types, etc.),
then ``ispc`` will issue an error and refuse to compile the function:
::
% echo "export int add(int x) { return ++x; }" | ispc
<stdin>:1:12: Error: Illegal to return a "varying" type from exported function "foo"
<stdin>:1:20: Error: Varying parameter "x" is illegal in an exported function.
While there's no fundamental reason why this isn't possible, recall the
definition of "varying" variables: they have one value for each program
instance in the gang. As such, the number of values and amount of storage
required to represent a varying variable depends on the gang size
(i.e. ``programCount``), which can have different values depending on the
compilation target.
``ispc`` therefore prohibits passing "varying" values between the
application and the ``ispc`` program in order to prevent the
application-side code from depending on a particular gang size, in order to
encourage portability to different gang sizes. (A generally desirable
programming practice.)
For cases where the size of data is actually fixed from the application
side, the value can be passed via a pointer to a short ``uniform`` array,
as follows:
::
export void add4(uniform int ptr[4]) {
foreach (i = 0 ... 4)
ptr[i]++;
}
On the 4-wide SSE instruction set, this compiles to a single vector add
instruction (and associated move instructions), while it still also
efficiently computes the correct result on 8-wide AVX targets.
Programming Techniques
======================
@@ -525,3 +694,79 @@ you can use ``--target=sse4`` when compiling to run with ``valgrind``.
Note that ``valgrind`` does not yet support programs that use the AVX
instruction set.
foreach statements generate more complex assembly than I'd expect; what's going on?
-----------------------------------------------------------------------------------
Given a simple ``foreach`` loop like the following:
::
void foo(uniform float a[], uniform int count) {
foreach (i = 0 ... count)
a[i] *= 2;
}
the ``ispc`` compiler generates approximately 40 instructions--why isn't
the generated code simpler?
There are two main components to the code: one handles
``programCount``-sized chunks of elements of the array, and the other
handles any excess elements at the end of the array that don't completely
fill a gang. The code for the main loop is essentially what one would
expect: a vector of values are laoded from the array, the multiply is done,
and the result is stored.
::
LBB0_2: ## %foreach_full_body
movslq %edx, %rdx
vmovups (%rdi,%rdx), %ymm1
vmulps %ymm0, %ymm1, %ymm1
vmovups %ymm1, (%rdi,%rdx)
addl $32, %edx
addl $8, %eax
cmpl %ecx, %eax
jl LBB0_2
Then, there is a sequence of instructions that handles any additional
elements at the end of the array. (These instructions don't execute if
there aren't any left-over values to process, but they do lengthen the
amount of generated code.)
::
## BB#4: ## %partial_inner_only
vmovd %eax, %xmm0
vinsertf128 $1, %xmm0, %ymm0, %ymm0
vpermilps $0, %ymm0, %ymm0 ## ymm0 = ymm0[0,0,0,0,4,4,4,4]
vextractf128 $1, %ymm0, %xmm3
vmovd %esi, %xmm2
vmovaps LCPI0_1(%rip), %ymm1
vextractf128 $1, %ymm1, %xmm4
vpaddd %xmm4, %xmm3, %xmm3
# ....
vmulps LCPI0_0(%rip), %ymm1, %ymm1
vmaskmovps %ymm1, %ymm0, (%rdi,%rax)
If you know that the number of elements to be processed will always be an
exact multiple of the 8, 16, etc., then adding a simple assignment to
``count`` like the one below gives the compiler enough information to be
able to eliminate the code for the additional array elements.
::
void foo(uniform float a[], uniform int count) {
// This assignment doesn't change the value of count
// if it's a multiple of 16, but it gives the compiler
// insight into this fact, allowing for simpler code to
// be generated for the foreach loop.
count = (count & ~(16-1));
foreach (i = 0 ... count)
a[i] *= 2;
}
With this new version of ``foo()``, only the code for the first loop above
is generated.

View File

@@ -121,10 +121,14 @@ Contents:
* `The ISPC Standard Library`_
+ `Basic Operations On Data`_
* `Logical and Selection Operations`_
* `Bit Operations`_
+ `Math Functions`_
* `Basic Math Functions`_
* `Bit-Level Operations`_
* `Transcendental Functions`_
* `Pseudo-Random Numbers`_
@@ -143,6 +147,7 @@ Contents:
* `Converting Between Array-of-Structures and Structure-of-Arrays Layout`_
* `Conversions To and From Half-Precision Floats`_
* `Converting to sRGB8`_
+ `Systems Programming Support`_
@@ -538,7 +543,7 @@ preprocessor runs:
* - ISPC
- 1
- Detecting that the ``ispc`` compiler is processing the file
* - ISPC_TARGET_{SSE2,SSE4,AVX}
* - ISPC_TARGET_{SSE2,SSE4,AVX,AVX2}
- 1
- One of these will be set, depending on the compilation target.
* - ISPC_POINTER_SIZE
@@ -1390,8 +1395,8 @@ Types
Basic Types and Type Qualifiers
-------------------------------
``ispc`` is a statically-typed language. It supports a variety of basic
types.
``ispc`` is a statically-typed language. It supports a variety of core
basic types:
* ``void``: "empty" type representing no value.
* ``bool``: boolean value; may be assigned ``true``, ``false``, or the
@@ -1408,6 +1413,15 @@ types.
* ``unsigned int64``: 64-bit unsigned integer.
* ``double``: 64-bit double-precision floating point value.
There are also a few built-in types related to pointers and memory:
* ``size_t``: the maximum size of any object (structure or array)
* ``ptrdiff_t``: an integer type large enough to represent the difference
between two pointers
* ``intptr_t``: signed integer type that is large enough to represent
a pointer value
* ``uintptr_t``: unsigned integer type large enough to represent a pointer
Implicit type conversion between values of different types is done
automatically by the ``ispc`` compiler. Thus, a value of ``float`` type
can be assigned to a variable of ``int`` type directly. In binary
@@ -1492,13 +1506,17 @@ Defining New Names For Types
The ``typedef`` keyword can be used to name types:
::
typedef int64 BigInt;
typedef float Float3[3];
typedef Float3 float[3];
Following C's syntax, the code above defines ``BigInt`` to have ``int64``
type and ``Float3`` to have ``float[3]`` type.
``typedef`` doesn't create a new type: it just provides an alternative name
for an existing type. Thus, in the above example, it is legal to pass a
value with ``float[3]`` type to a function that has been declared to take a
``Float3`` parameter.
Also as in C, ``typedef`` doesn't create a new type: it just provides an
alternative name for an existing type. Thus, in the above example, it is
legal to pass a value with ``float[3]`` type to a function that has been
declared to take a ``Float3`` parameter.
Pointer Types
@@ -2150,6 +2168,12 @@ greater than or equal to ``NUM_ITEMS``.
// ...
}
Short-circuiting may impose some overhead in the generated code; for cases
where short-circuiting is undesirable due to performance impact, see
the section `Logical and Selection Operations`_, which introduces helper
functions in the standard library that provide these operations without
short-circuiting.
Dynamic Memory Allocation
-------------------------
@@ -2827,6 +2851,123 @@ The ISPC Standard Library
compiling ``ispc`` programs. (To disable the standard library, pass the
``--nostdlib`` command-line flag to the compiler.)
Basic Operations On Data
------------------------
Logical and Selection Operations
--------------------------------
Recall from `Expressions`_ that ``ispc`` short-circuits the evaluation of
logical and selection operators: given an expression like ``(index < count
&& array[index] == 0)``, then ``array[index] == 0`` is only evaluated if
``index < count`` is true. This property is useful for writing expressions
like the preceeding one, where the second expression may not be safe to
evaluate in some cases.
This short-circuiting can impose overhead in the generated code; additional
operations are required to test the first value and to conditionally jump
over the code that evaluates the second value. The ``ispc`` compiler does
try to mitigate this cost by detecting cases where it is both safe and
inexpensive to evaluate both expressions, and skips short-circuiting in the
generated code in this case (without there being any programmer-visible
change in program behavior.)
For cases where the compiler can't detect this case but the programmer
wants to avoid short-circuiting behavior, the standard library provides a
few helper functions. First, ``and()`` and ``or()`` provide
non-short-circuiting logical AND and OR operations.
::
bool and(bool a, bool b)
bool or(bool a, bool b)
uniform bool and(uniform bool a, uniform bool b)
uniform bool or(uniform bool a, uniform bool b)
And there are three variants of ``select()`` that select between two values
based on a boolean condition. These are the variants of ``select()`` for
the ``int8`` type:
::
int8 select(bool v, int8 a, int8 b)
int8 select(uniform bool v, int8 a, int8 b)
uniform int8 select(uniform bool v, uniform int8 a, uniform int8 b)
There are also variants for ``int16``, ``int32``, ``int64``, ``float``, and
``double`` types.
Bit Operations
--------------
The various variants of ``popcnt()`` return the population count--the
number of bits set in the given value.
::
uniform int popcnt(uniform int v)
int popcnt(int v)
uniform int popcnt(bool v)
A few functions determine how many leading bits in the given value are zero
and how many of the trailing bits are zero; there are also ``unsigned``
variants of these functions and variants that take ``int64`` and ``unsigned
int64`` types.
::
int32 count_leading_zeros(int32 v)
uniform int32 count_leading_zeros(uniform int32 v)
int32 count_trailing_zeros(int32 v)
uniform int32 count_trailing_zeros(uniform int32 v)
Sometimes it's useful to convert a ``bool`` value to an integer using sign
extension so that the integer's bits are all on if the ``bool`` has the
value ``true`` (rather than just having the value one). The
``sign_extend()`` functions provide this functionality:
::
int sign_extend(bool value)
uniform int sign_extend(uniform bool value)
The ``intbits()`` and ``floatbits()`` functions can be used to implement
low-level floating-point bit twiddling. For example, ``intbits()`` returns
an ``unsigned int`` that is a bit-for-bit copy of the given ``float``
value. (Note: it is **not** the same as ``(int)a``, but corresponds to
something like ``*((int *)&a)`` in C.
::
float floatbits(unsigned int a);
uniform float floatbits(uniform unsigned int a);
unsigned int intbits(float a);
uniform unsigned int intbits(uniform float a);
The ``intbits()`` and ``floatbits()`` functions have no cost at runtime;
they just let the compiler know how to interpret the bits of the given
value. They make it possible to efficiently write functions that take
advantage of the low-level bit representation of floating-point values.
For example, the ``abs()`` function in the standard library is implemented
as follows:
::
float abs(float a) {
unsigned int i = intbits(a);
i &= 0x7fffffff;
return floatbits(i);
}
This code directly clears the high order bit to ensure that the given
floating-point value is positive. This compiles down to a single ``andps``
instruction when used with an Intel® SSE target, for example.
Math Functions
--------------
@@ -2919,77 +3060,6 @@ quite efficient.)
uniform unsigned int low,
uniform unsigned int high)
Bit-Level Operations
--------------------
The various variants of ``popcnt()`` return the population count--the
number of bits set in the given value.
::
uniform int popcnt(uniform int v)
int popcnt(int v)
uniform int popcnt(bool v)
A few functions determine how many leading bits in the given value are zero
and how many of the trailing bits are zero; there are also ``unsigned``
variants of these functions and variants that take ``int64`` and ``unsigned
int64`` types.
::
int32 count_leading_zeros(int32 v)
uniform int32 count_leading_zeros(uniform int32 v)
int32 count_trailing_zeros(int32 v)
uniform int32 count_trailing_zeros(uniform int32 v)
Sometimes it's useful to convert a ``bool`` value to an integer using sign
extension so that the integer's bits are all on if the ``bool`` has the
value ``true`` (rather than just having the value one). The
``sign_extend()`` functions provide this functionality:
::
int sign_extend(bool value)
uniform int sign_extend(uniform bool value)
The ``intbits()`` and ``floatbits()`` functions can be used to implement
low-level floating-point bit twiddling. For example, ``intbits()`` returns
an ``unsigned int`` that is a bit-for-bit copy of the given ``float``
value. (Note: it is **not** the same as ``(int)a``, but corresponds to
something like ``*((int *)&a)`` in C.
::
float floatbits(unsigned int a);
uniform float floatbits(uniform unsigned int a);
unsigned int intbits(float a);
uniform unsigned int intbits(uniform float a);
The ``intbits()`` and ``floatbits()`` functions have no cost at runtime;
they just let the compiler know how to interpret the bits of the given
value. They make it possible to efficiently write functions that take
advantage of the low-level bit representation of floating-point values.
For example, the ``abs()`` function in the standard library is implemented
as follows:
::
float abs(float a) {
unsigned int i = intbits(a);
i &= 0x7fffffff;
return floatbits(i);
}
This code directly clears the high order bit to ensure that the given
floating-point value is positive. This compiles down to a single ``andps``
instruction when used with an Intel® SSE target, for example.
Transcendental Functions
------------------------
@@ -3027,8 +3097,8 @@ The corresponding inverse functions are also available:
uniform float acos(uniform float x)
float atan(float x)
uniform float atan(uniform float x)
float atan2(float x, float y)
uniform float atan2(uniform float x, uniform float y)
float atan2(float y, float x)
uniform float atan2(uniform float y, uniform float x)
If both sine and cosine are needed, then the ``sincos()`` call computes
both more efficiently than two calls to the respective individual
@@ -3077,7 +3147,7 @@ library. State for the RNG is maintained in an instance of the
::
struct RNGState;
void seed_rng(varying RNGState * uniform state, uniform int seed)
void seed_rng(varying RNGState * uniform state, int seed)
void seed_rng(uniform RNGState * uniform state, uniform int seed)
After the RNG is seeded, the ``random()`` function can be used to get a
@@ -3622,6 +3692,22 @@ precise.
uniform int16 float_to_half_fast(uniform float f)
Converting to sRGB8
-------------------
The sRGB color space is used in many applications in graphics and imaging;
see the `Wikipedia page on sRGB`_ for more information. The ``ispc``
standard library provides two functions for converting floating-point color
values to 8-bit values in the sRGB space.
.. _Wikipedia page on sRGB: http://en.wikipedia.org/wiki/SRGB
::
int float_to_srgb8(float v)
uniform int float_to_srgb8(uniform float v)
Systems Programming Support
---------------------------
@@ -3732,6 +3818,13 @@ For global atomics, only atomic swap is available for these types:
float atomic_swap_global(uniform float * uniform ptr, float value)
double atomic_swap_global(uniform double * uniform ptr, double value)
Finally, "swap" (but none of these other atomics) is available for pointer
types:
::
void *atomic_swap_{local,global}(void * * uniform ptr, void * value)
There are also variants of the atomic that take ``uniform`` values for the
operand and return a ``uniform`` result. These correspond to a single
atomic operation being performed for the entire gang of program instances,
@@ -3756,6 +3849,13 @@ rather than one per program instance.
uniform int32 atomic_swap_{local,global}(uniform int32 * uniform ptr,
uniform int32 newval)
And similarly for pointers:
::
uniform void *atomic_swap_{local,global}(void * * uniform ptr,
void *newval)
Be careful that you use the atomic function that you mean to; consider the
following code:
@@ -3797,12 +3897,18 @@ the same location in memory!)
int32 atomic_xor_{local,global}(uniform int32 * varying ptr, int32 value)
int32 atomic_swap_{local,global}(uniform int32 * varying ptr, int32 value)
And:
::
void *atomic_swap_{local,global}(void * * ptr, void *value)
There are also atomic "compare and exchange" functions. Compare and
exchange atomically compares the value in "val" to "compare"--if they
match, it assigns "newval" to "val". In either case, the old value of
"val" is returned. (As with the other atomic operations, there are also
``unsigned`` and 64-bit variants of this function. Furthermore, there are
``float`` and ``double`` variants as well.)
``float``, ``double``, and ``void *`` variants as well.)
::
@@ -3824,6 +3930,11 @@ code.
void memory_barrier();
Note that this barrier is *not* needed for coordinating reads and writes
among the program instances in a gang; it's only needed for coordinating
between multiple hardware threads running on different cores. See the
section `Data Races Within a Gang`_ for the guarantees provided about
memory read/write ordering across a gang.
Prefetches
----------

View File

@@ -2,6 +2,24 @@
ispc News
=========
ispc 1.2.1 is Released
----------------------
This is a bugfix release, fixing approximately 20 bugs in the system and
improving error handling and error reporting. New functionality includes
very efficient float/half conversion routines thanks to Fabian
Giesen. See the `1.2.1 release notes`_ for details.
.. _1.2.1 release notes: https://github.com/ispc/ispc/tree/master/docs/ReleaseNotes.txt
ispc 1.2.0 is Released
-----------------------
A new major release was posted on March 20, 2012. This release includes
significant new functionality for cleanly handling "structure of arrays"
(SoA) data layout and a new model for how uniform and varying are handled
with structure types.
Paper on ispc To Appear in InPar 2012
-------------------------------------

View File

@@ -624,7 +624,7 @@ gathers happen.)
extern "C" {
void ISPCInstrument(const char *fn, const char *note,
int line, int mask);
int line, uint64_t mask);
}
This function is passed the file name of the ``ispc`` file running, a short
@@ -637,7 +637,7 @@ as follows:
::
ISPCInstrument("foo.ispc", "function entry", 55, 0xf);
ISPCInstrument("foo.ispc", "function entry", 55, 0xfull);
This call indicates that at the currently executing program has just
entered the function defined at line 55 of the file ``foo.ispc``, with a

View File

@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
# This could be handy for archiving the generated documentation or
# if some version control system is used.
PROJECT_NUMBER = 1.2.0
PROJECT_NUMBER = 1.2.2
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
# base path where the generated documentation will be put.

View File

@@ -210,7 +210,7 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
{ { 1.0f, 0.0f, -2.2f }, 0.5f } };
RNGState rngstate;
seed_rng(&rngstate, y0);
seed_rng(&rngstate, programIndex + (y0 << (programIndex & 15)));
float invSamples = 1.f / nsubsamples;
foreach_tiled(y = y0 ... y1, x = 0 ... w,

View File

@@ -211,7 +211,7 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
{ { 1.0f, 0.0f, -2.2f }, 0.5f } };
RNGState rngstate;
seed_rng(&rngstate, y0);
seed_rng(&rngstate, programIndex + (y0 << (programIndex & 15)));
// Compute the mapping between the 'programCount'-wide program
// instances running in parallel and samples in the image.

View File

@@ -87,7 +87,7 @@ int main(int argc, char** argv) {
framebuffer.clear();
reset_and_start_timer();
for (int j = 0; j < nframes; ++j)
ispc::RenderStatic(&input->header, &input->arrays,
ispc::RenderStatic(input->header, input->arrays,
VISUALIZE_LIGHT_COUNT,
framebuffer.r, framebuffer.g, framebuffer.b);
double mcycles = get_elapsed_mcycles() / nframes;

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -259,13 +259,13 @@ static FORCEINLINE TYPE NAME(TYPE a, int32_t b) { \
return ret; \
}
#define SMEAR(VTYPE, NAME, STYPE) \
static FORCEINLINE VTYPE __smear_##NAME(STYPE v) { \
VTYPE ret; \
for (int i = 0; i < 16; ++i) \
ret.v[i] = v; \
return ret; \
} \
#define SMEAR(VTYPE, NAME, STYPE) \
static FORCEINLINE VTYPE __smear_##NAME(VTYPE retType, STYPE v) { \
VTYPE ret; \
for (int i = 0; i < 16; ++i) \
ret.v[i] = v; \
return ret; \
}
#define BROADCAST(VTYPE, NAME, STYPE) \
static FORCEINLINE VTYPE __broadcast_##NAME(VTYPE v, int index) { \
@@ -311,8 +311,8 @@ INSERT_EXTRACT(__vec1_d, double)
///////////////////////////////////////////////////////////////////////////
// mask ops
static FORCEINLINE uint32_t __movmsk(__vec16_i1 mask) {
return mask.v;
static FORCEINLINE uint64_t __movmsk(__vec16_i1 mask) {
return (uint64_t)mask.v;
}
static FORCEINLINE __vec16_i1 __equal(__vec16_i1 a, __vec16_i1 b) {
@@ -339,6 +339,24 @@ static FORCEINLINE __vec16_i1 __or(__vec16_i1 a, __vec16_i1 b) {
return r;
}
static FORCEINLINE __vec16_i1 __not(__vec16_i1 v) {
__vec16_i1 r;
r.v = ~v.v;
return r;
}
static FORCEINLINE __vec16_i1 __and_not1(__vec16_i1 a, __vec16_i1 b) {
__vec16_i1 r;
r.v = ~a.v & b.v;
return r;
}
static FORCEINLINE __vec16_i1 __and_not2(__vec16_i1 a, __vec16_i1 b) {
__vec16_i1 r;
r.v = a.v & ~b.v;
return r;
}
static FORCEINLINE __vec16_i1 __select(__vec16_i1 mask, __vec16_i1 a,
__vec16_i1 b) {
__vec16_i1 r;
@@ -374,6 +392,12 @@ static FORCEINLINE void __store(__vec16_i1 *p, __vec16_i1 v, int align) {
*ptr = v.v;
}
static FORCEINLINE __vec16_i1 __smear_i1(__vec16_i1, int v) {
return __vec16_i1(v, v, v, v, v, v, v, v,
v, v, v, v, v, v, v, v);
}
///////////////////////////////////////////////////////////////////////////
// int8
@@ -581,6 +605,121 @@ ROTATE(__vec16_f, float, float)
SHUFFLES(__vec16_f, float, float)
LOAD_STORE(__vec16_f, float)
static FORCEINLINE float __exp_uniform_float(float v) {
return expf(v);
}
static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = expf(v.v[i]);
return ret;
}
static FORCEINLINE float __log_uniform_float(float v) {
return logf(v);
}
static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = logf(v.v[i]);
return ret;
}
static FORCEINLINE float __pow_uniform_float(float a, float b) {
return powf(a, b);
}
static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = powf(a.v[i], b.v[i]);
return ret;
}
static FORCEINLINE int __intbits(float v) {
union {
float f;
int i;
} u;
u.f = v;
return u.i;
}
static FORCEINLINE float __floatbits(int v) {
union {
float f;
int i;
} u;
u.i = v;
return u.f;
}
static FORCEINLINE float __half_to_float_uniform(int16_t h) {
static const uint32_t shifted_exp = 0x7c00 << 13; // exponent mask after shift
int32_t o = ((int32_t)(h & 0x7fff)) << 13; // exponent/mantissa bits
uint32_t exp = shifted_exp & o; // just the exponent
o += (127 - 15) << 23; // exponent adjust
// handle exponent special cases
if (exp == shifted_exp) // Inf/NaN?
o += (128 - 16) << 23; // extra exp adjust
else if (exp == 0) { // Zero/Denormal?
o += 1 << 23; // extra exp adjust
o = __intbits(__floatbits(o) - __floatbits(113 << 23)); // renormalize
}
o |= ((int32_t)(h & 0x8000)) << 16; // sign bit
return __floatbits(o);
}
static FORCEINLINE __vec16_f __half_to_float_varying(__vec16_i16 v) {
__vec16_f ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = __half_to_float_uniform(v.v[i]);
return ret;
}
static FORCEINLINE int16_t __float_to_half_uniform(float f) {
uint32_t sign_mask = 0x80000000u;
int32_t o;
int32_t fint = __intbits(f);
int32_t sign = fint & sign_mask;
fint ^= sign;
int32_t f32infty = 255 << 23;
o = (fint > f32infty) ? 0x7e00 : 0x7c00;
// (De)normalized number or zero
// update fint unconditionally to save the blending; we don't need it
// anymore for the Inf/NaN case anyway.
const uint32_t round_mask = ~0xfffu;
const int32_t magic = 15 << 23;
const int32_t f16infty = 31 << 23;
int32_t fint2 = __intbits(__floatbits(fint & round_mask) * __floatbits(magic)) - round_mask;
fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed
if (fint < f32infty)
o = fint2 >> 13; // Take the bits!
return (o | (sign >> 16));
}
static FORCEINLINE __vec16_i16 __float_to_half_varying(__vec16_f v) {
__vec16_i16 ret;
for (int i = 0; i < 16; ++i)
ret.v[i] = __float_to_half_uniform(v.v[i]);
return ret;
}
///////////////////////////////////////////////////////////////////////////
// double

File diff suppressed because it is too large Load Diff

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -224,8 +224,8 @@ CAST_BITS_SCALAR(double, int64_t)
///////////////////////////////////////////////////////////////////////////
// mask ops
static FORCEINLINE uint32_t __movmsk(__vec4_i1 mask) {
return _mm_movemask_ps(mask.v);
static FORCEINLINE uint64_t __movmsk(__vec4_i1 mask) {
return (uint64_t)_mm_movemask_ps(mask.v);
}
static FORCEINLINE __vec4_i1 __equal(__vec4_i1 a, __vec4_i1 b) {
@@ -266,6 +266,10 @@ static FORCEINLINE void __store(__vec4_i1 *p, __vec4_i1 value, int align) {
_mm_storeu_ps((float *)(&p->v), value.v);
}
static FORCEINLINE __vec4_i1 __smear_i1(__vec4_i1, int v) {
return __vec4_i1(v, v, v, v);
}
///////////////////////////////////////////////////////////////////////////
// int8
@@ -489,7 +493,7 @@ static FORCEINLINE void __insert_element(__vec4_i8 *v, int index, int8_t val) {
((int8_t *)v)[index] = val;
}
static FORCEINLINE __vec4_i8 __smear_i8(int8_t v) {
static FORCEINLINE __vec4_i8 __smear_i8(__vec4_i8, int8_t v) {
return _mm_set1_epi8(v);
}
@@ -748,7 +752,7 @@ static FORCEINLINE void __insert_element(__vec4_i16 *v, int index, int16_t val)
((int16_t *)v)[index] = val;
}
static FORCEINLINE __vec4_i16 __smear_i16(int16_t v) {
static FORCEINLINE __vec4_i16 __smear_i16(__vec4_i16, int16_t v) {
return _mm_set1_epi16(v);
}
@@ -985,7 +989,7 @@ static FORCEINLINE __vec4_i32 __select(__vec4_i1 mask, __vec4_i32 a, __vec4_i32
_mm_castsi128_ps(a.v), mask.v));
}
static FORCEINLINE __vec4_i32 __smear_i32(int32_t v) {
static FORCEINLINE __vec4_i32 __smear_i32(__vec4_i32, int32_t v) {
return _mm_set1_epi32(v);
}
@@ -1246,7 +1250,7 @@ static FORCEINLINE __vec4_i64 __select(__vec4_i1 mask, __vec4_i64 a, __vec4_i64
return __vec4_i64(_mm_castpd_si128(r0), _mm_castpd_si128(r1));
}
static FORCEINLINE __vec4_i64 __smear_i64(int64_t v) {
static FORCEINLINE __vec4_i64 __smear_i64(__vec4_i64, int64_t v) {
return __vec4_i64(v, v, v, v);
}
@@ -1350,7 +1354,7 @@ static FORCEINLINE __vec4_f __select(__vec4_i1 mask, __vec4_f a, __vec4_f b) {
return _mm_blendv_ps(b.v, a.v, mask.v);
}
static FORCEINLINE __vec4_f __smear_float(float v) {
static FORCEINLINE __vec4_f __smear_float(__vec4_f, float v) {
return _mm_set1_ps(v);
}
@@ -1482,7 +1486,7 @@ static FORCEINLINE __vec4_d __select(__vec4_i1 mask, __vec4_d a, __vec4_d b) {
return __vec4_d(r0, r1);
}
static FORCEINLINE __vec4_d __smear_double(double v) {
static FORCEINLINE __vec4_d __smear_double(__vec4_d, double v) {
return __vec4_d(_mm_set1_pd(v), _mm_set1_pd(v));
}
@@ -1582,11 +1586,13 @@ static FORCEINLINE __vec4_i16 __cast_sext(__vec4_i16, __vec4_i8 val) {
}
static FORCEINLINE __vec4_i8 __cast_sext(__vec4_i8, __vec4_i1 v) {
return __select(v, __smear_i8(0xff), __smear_i8(0));
return __select(v, __smear_i8(__vec4_i8(), 0xff),
__smear_i8(__vec4_i8(), 0));
}
static FORCEINLINE __vec4_i16 __cast_sext(__vec4_i16, __vec4_i1 v) {
return __select(v, __smear_i16(0xffff), __smear_i16(0));
return __select(v, __smear_i16(__vec4_i16(), 0xffff),
__smear_i16(__vec4_i16(), 0));
}
static FORCEINLINE __vec4_i32 __cast_sext(__vec4_i32, __vec4_i1 v) {
@@ -1646,11 +1652,12 @@ static FORCEINLINE __vec4_i16 __cast_zext(__vec4_i16, __vec4_i8 val) {
}
static FORCEINLINE __vec4_i8 __cast_zext(__vec4_i8, __vec4_i1 v) {
return __select(v, __smear_i8(1), __smear_i8(0));
return __select(v, __smear_i8(__vec4_i8(), 1), __smear_i8(__vec4_i8(), 0));
}
static FORCEINLINE __vec4_i16 __cast_zext(__vec4_i16, __vec4_i1 v) {
return __select(v, __smear_i16(1), __smear_i16(0));
return __select(v, __smear_i16(__vec4_i16(), 1),
__smear_i16(__vec4_i16(), 0));
}
static FORCEINLINE __vec4_i32 __cast_zext(__vec4_i32, __vec4_i1 v) {
@@ -1658,7 +1665,7 @@ static FORCEINLINE __vec4_i32 __cast_zext(__vec4_i32, __vec4_i1 v) {
}
static FORCEINLINE __vec4_i64 __cast_zext(__vec4_i64, __vec4_i1 v) {
return __select(v, __smear_i64(1), __smear_i64(0));
return __select(v, __smear_i64(__vec4_i64(), 1), __smear_i64(__vec4_i64(), 0));
}
// truncations
@@ -1818,11 +1825,11 @@ static FORCEINLINE __vec4_d __cast_uitofp(__vec4_d, __vec4_i64 val) {
}
static FORCEINLINE __vec4_f __cast_uitofp(__vec4_f, __vec4_i1 v) {
return __select(v, __smear_float(1.), __smear_float(0.));
return __select(v, __smear_float(__vec4_f(), 1.), __smear_float(__vec4_f(), 0.));
}
static FORCEINLINE __vec4_d __cast_uitofp(__vec4_d, __vec4_i1 v) {
return __select(v, __smear_double(1.), __smear_double(0.));
return __select(v, __smear_double(__vec4_d(), 1.), __smear_double(__vec4_d(), 0.));
}
// float/double to signed int
@@ -2613,8 +2620,8 @@ lGatherBaseOffsets32(RetVec, RetScalar, unsigned char *p, __vec4_i32 offsets,
RetScalar r[4];
#if 1
// "Fast gather" trick...
offsets = __select(mask, offsets, __smear_i32(0));
constOffset = __select(mask, constOffset, __smear_i32(0));
offsets = __select(mask, offsets, __smear_i32(__vec4_i32(), 0));
constOffset = __select(mask, constOffset, __smear_i32(__vec4_i32(), 0));
int offset = scale * _mm_extract_epi32(offsets.v, 0) + _mm_extract_epi32(constOffset.v, 0);
RetScalar *ptr = (RetScalar *)(p + offset);
@@ -2671,8 +2678,8 @@ lGatherBaseOffsets64(RetVec, RetScalar, unsigned char *p, __vec4_i64 offsets,
RetScalar r[4];
#if 1
// "Fast gather" trick...
offsets = __select(mask, offsets, __smear_i64(0));
constOffset = __select(mask, constOffset, __smear_i64(0));
offsets = __select(mask, offsets, __smear_i64(__vec4_i64(), 0));
constOffset = __select(mask, constOffset, __smear_i64(__vec4_i64(), 0));
int64_t offset = scale * _mm_extract_epi64(offsets.v[0], 0) + _mm_extract_epi64(constOffset.v[0], 0);
RetScalar *ptr = (RetScalar *)(p + offset);
@@ -2756,8 +2763,8 @@ __gather_base_offsets32_i32(uint8_t *p, __vec4_i32 offsets, uint32_t scale,
__m128i r = _mm_set_epi32(0, 0, 0, 0);
#if 1
// "Fast gather"...
offsets = __select(mask, offsets, __smear_i32(0));
constOffset = __select(mask, constOffset, __smear_i32(0));
offsets = __select(mask, offsets, __smear_i32(__vec4_i32(), 0));
constOffset = __select(mask, constOffset, __smear_i32(__vec4_i32(), 0));
int offset = scale * _mm_extract_epi32(offsets.v, 0) +
_mm_extract_epi32(constOffset.v, 0);

View File

@@ -43,9 +43,15 @@ extern "C" {
#endif /* __cplusplus */
__inline__ uint64_t rdtsc() {
uint32_t low, high;
#ifdef __x86_64
__asm__ __volatile__ (
"xorl %%eax,%%eax \n cpuid"
::: "%rax", "%rbx", "%rcx", "%rdx" );
#else
__asm__ __volatile__ (
"xorl %%eax,%%eax \n cpuid"
::: "%eax", "%ebx", "%ecx", "%edx" );
#endif
__asm__ __volatile__ (
"rdtsc" : "=a" (low), "=d" (high));
return (uint64_t)high << 32 | low;

1936
expr.cpp

File diff suppressed because it is too large Load Diff

30
expr.h
View File

@@ -284,6 +284,10 @@ public:
int EstimateCost() const;
Expr *baseExpr, *index;
private:
mutable const Type *type;
mutable const PointerType *lvalueType;
};
@@ -320,6 +324,9 @@ public:
member is found. (i.e. this is true if the MemberExpr was a '->'
operator, and is false if it was a '.' operator. */
bool dereferenceExpr;
protected:
mutable const Type *type, *lvalueType;
};
@@ -584,6 +591,7 @@ public:
Expr *TypeCheck();
Expr *Optimize();
int EstimateCost() const;
llvm::Constant *GetConstant(const Type *type) const;
Expr *expr;
};
@@ -651,20 +659,26 @@ public:
function overloading, this method resolves which actual function
the arguments match best. If the argCouldBeNULL parameter is
non-NULL, each element indicates whether the corresponding argument
is the number zero, indicating that it could be a NULL pointer.
This parameter may be NULL (for cases where overload resolution is
being done just given type information without the parameter
argument expressions being available. It returns true on success.
is the number zero, indicating that it could be a NULL pointer, and
if argIsConstant is non-NULL, each element indicates whether the
corresponding argument is a compile-time constant value. Both of
these parameters may be NULL (for cases where overload resolution
is being done just given type information without the parameter
argument expressions being available. This function returns true
on success.
*/
bool ResolveOverloads(SourcePos argPos,
const std::vector<const Type *> &argTypes,
const std::vector<bool> *argCouldBeNULL = NULL);
const std::vector<bool> *argCouldBeNULL = NULL,
const std::vector<bool> *argIsConstant = NULL);
Symbol *GetMatchingFunction();
private:
bool tryResolve(int (*matchFunc)(const Type *, const Type *),
SourcePos argPos, const std::vector<const Type *> &argTypes,
const std::vector<bool> *argCouldBeNULL);
std::vector<Symbol *> getCandidateFunctions(int argCount) const;
static int computeOverloadCost(const FunctionType *ftype,
const std::vector<const Type *> &argTypes,
const std::vector<bool> *argCouldBeNULL,
const std::vector<bool> *argIsConstant);
/** Name of the function that is being called. */
std::string name;

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2011, Intel Corporation
Copyright (c) 2011-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -66,9 +66,8 @@
#include <llvm/Support/ToolOutputFile.h>
#include <llvm/Assembly/PrintModulePass.h>
Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
Function::Function(Symbol *s, Stmt *c) {
sym = s;
args = a;
code = c;
maskSymbol = m->symbolTable->LookupVariable("__mask");
@@ -101,12 +100,20 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
printf("\n\n\n");
}
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
const FunctionType *type = CastType<FunctionType>(sym->type);
Assert(type != NULL);
for (unsigned int i = 0; i < args.size(); ++i)
if (dynamic_cast<const ReferenceType *>(args[i]->type) == NULL)
args[i]->parentFunction = this;
for (int i = 0; i < type->GetNumParameters(); ++i) {
const char *paramName = type->GetParameterName(i).c_str();
Symbol *sym = m->symbolTable->LookupVariable(paramName);
if (sym == NULL)
Assert(strncmp(paramName, "__anon_parameter_", 17) == 0);
args.push_back(sym);
const Type *t = type->GetParameterType(i);
if (sym != NULL && CastType<ReferenceType>(t) == NULL)
sym->parentFunction = this;
}
if (type->isTask) {
threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
@@ -125,7 +132,7 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
const Type *
Function::GetReturnType() const {
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
const FunctionType *type = CastType<FunctionType>(sym->type);
Assert(type != NULL);
return type->GetReturnType();
}
@@ -133,7 +140,7 @@ Function::GetReturnType() const {
const FunctionType *
Function::GetType() const {
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
const FunctionType *type = CastType<FunctionType>(sym->type);
Assert(type != NULL);
return type;
}
@@ -145,7 +152,8 @@ Function::GetType() const {
'mem2reg' pass will in turn promote to SSA registers..
*/
static void
lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol *> &args,
lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const
std::vector<Symbol *> &args,
FunctionEmitContext *ctx) {
// We expect the argument structure to come in as a poitner to a
// structure. Confirm and figure out its type here.
@@ -157,9 +165,13 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol
llvm::dyn_cast<const llvm::StructType>(pt->getElementType());
// Get the type of the argument we're copying in and its Symbol pointer
LLVM_TYPE_CONST llvm::Type *argType = argStructType->getElementType(i);
llvm::Type *argType = argStructType->getElementType(i);
Symbol *sym = args[i];
if (sym == NULL)
// anonymous parameter, so don't worry about it
return;
// allocate space to copy the parameter in to
sym->storagePtr = ctx->AllocaInst(argType, sym->name.c_str());
@@ -170,7 +182,7 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol
// memory
llvm::Value *ptrval = ctx->LoadInst(ptr, sym->name.c_str());
ctx->StoreInst(ptrval, sym->storagePtr);
ctx->EmitFunctionParameterDebugInfo(sym);
ctx->EmitFunctionParameterDebugInfo(sym, i);
}
@@ -186,14 +198,14 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
// value
maskSymbol->storagePtr = ctx->GetFullMaskPointer();
// add debugging info for __mask, programIndex, ...
// add debugging info for __mask
maskSymbol->pos = firstStmtPos;
ctx->EmitVariableDebugInfo(maskSymbol);
#if 0
llvm::BasicBlock *entryBBlock = ctx->GetCurrentBasicBlock();
#endif
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
const FunctionType *type = CastType<FunctionType>(sym->type);
Assert(type != NULL);
if (type->isTask == true) {
// For tasks, we there should always be three parmeters: the
@@ -240,13 +252,17 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
llvm::Function::arg_iterator argIter = function->arg_begin();
for (unsigned int i = 0; i < args.size(); ++i, ++argIter) {
Symbol *sym = args[i];
if (sym == NULL)
// anonymous function parameter
continue;
argIter->setName(sym->name.c_str());
// Allocate stack storage for the parameter and emit code
// to store the its value there.
sym->storagePtr = ctx->AllocaInst(argIter->getType(), sym->name.c_str());
ctx->StoreInst(argIter, sym->storagePtr);
ctx->EmitFunctionParameterDebugInfo(sym);
ctx->EmitFunctionParameterDebugInfo(sym, i);
}
// If the number of actual function arguments is equal to the
@@ -415,11 +431,11 @@ Function::GenerateIR() {
// If the function is 'export'-qualified, emit a second version of
// it without a mask parameter and without name mangling so that
// the application can call it
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
const FunctionType *type = CastType<FunctionType>(sym->type);
Assert(type != NULL);
if (type->isExported) {
if (!type->isTask) {
LLVM_TYPE_CONST llvm::FunctionType *ftype =
llvm::FunctionType *ftype =
type->LLVMFunctionType(g->ctx);
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
std::string functionName = sym->name;

4
func.h
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2011, Intel Corporation
Copyright (c) 2011-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -43,7 +43,7 @@
class Function {
public:
Function(Symbol *sym, const std::vector<Symbol *> &args, Stmt *code);
Function(Symbol *sym, Stmt *code);
const Type *GetReturnType() const;
const FunctionType *GetType() const;

278
ispc.cpp
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -54,14 +54,8 @@
#include <llvm/Target/TargetMachine.h>
#include <llvm/Target/TargetOptions.h>
#include <llvm/Target/TargetData.h>
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
#include <llvm/Support/TargetRegistry.h>
#include <llvm/Support/TargetSelect.h>
#else
#include <llvm/Target/TargetRegistry.h>
#include <llvm/Target/TargetSelect.h>
#include <llvm/Target/SubtargetFeature.h>
#endif
#include <llvm/Support/TargetRegistry.h>
#include <llvm/Support/TargetSelect.h>
#include <llvm/Support/Host.h>
Globals *g;
@@ -70,9 +64,82 @@ Module *m;
///////////////////////////////////////////////////////////////////////////
// Target
#ifndef ISPC_IS_WINDOWS
static void __cpuid(int info[4], int infoType) {
__asm__ __volatile__ ("cpuid"
: "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])
: "0" (infoType));
}
/* Save %ebx in case it's the PIC register */
static void __cpuidex(int info[4], int level, int count) {
__asm__ __volatile__ ("xchg{l}\t{%%}ebx, %1\n\t"
"cpuid\n\t"
"xchg{l}\t{%%}ebx, %1\n\t"
: "=a" (info[0]), "=r" (info[1]), "=c" (info[2]), "=d" (info[3])
: "0" (level), "2" (count));
}
#endif // ISPC_IS_WINDOWS
static const char *
lGetSystemISA() {
int info[4];
__cpuid(info, 1);
if ((info[2] & (1 << 28)) != 0) {
// AVX1 for sure. Do we have AVX2?
// Call cpuid with eax=7, ecx=0
__cpuidex(info, 7, 0);
if ((info[1] & (1 << 5)) != 0)
return "avx2";
else
return "avx";
}
else if ((info[2] & (1 << 19)) != 0)
return "sse4";
else if ((info[3] & (1 << 26)) != 0)
return "sse2";
else {
fprintf(stderr, "Unable to detect supported SSE/AVX ISA. Exiting.\n");
exit(1);
}
}
static const char *supportedCPUs[] = {
"atom", "penryn", "core2", "corei7", "corei7-avx"
};
bool
Target::GetTarget(const char *arch, const char *cpu, const char *isa,
bool pic, Target *t) {
if (isa == NULL) {
if (cpu != NULL) {
// If a CPU was specified explicitly, try to pick the best
// possible ISA based on that.
if (!strcmp(cpu, "sandybridge") ||
!strcmp(cpu, "corei7-avx"))
isa = "avx";
else if (!strcmp(cpu, "corei7") ||
!strcmp(cpu, "penryn"))
isa = "sse4";
else
isa = "sse2";
fprintf(stderr, "Notice: no --target specified on command-line. "
"Using ISA \"%s\" based on specified CPU \"%s\".\n", isa,
cpu);
}
else {
// No CPU and no ISA, so use CPUID to figure out what this CPU
// supports.
isa = lGetSystemISA();
fprintf(stderr, "Notice: no --target specified on command-line. "
"Using system ISA \"%s\".\n", isa);
}
}
if (cpu == NULL) {
std::string hostCPU = llvm::sys::getHostCPUName();
if (hostCPU.size() > 0)
@@ -82,19 +149,24 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
cpu = "generic";
}
}
else {
bool foundCPU = false;
for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0]));
++i) {
if (!strcmp(cpu, supportedCPUs[i])) {
foundCPU = true;
break;
}
}
if (foundCPU == false) {
fprintf(stderr, "Error: CPU type \"%s\" unknown. Supported CPUs: "
"%s.\n", cpu, SupportedTargetCPUs().c_str());
return false;
}
}
t->cpu = cpu;
if (isa == NULL) {
if (!strcasecmp(cpu, "atom"))
isa = "sse2";
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
else if (!strcasecmp(cpu, "sandybridge") ||
!strcasecmp(cpu, "corei7-avx"))
isa = "avx";
#endif // LLVM_3_0
else
isa = "sse4";
}
if (arch == NULL)
arch = "x86-64";
@@ -125,13 +197,15 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->arch = arch;
}
// This is the case for most of them
t->hasHalf = t->hasTranscendentals = false;
if (!strcasecmp(isa, "sse2")) {
t->isa = Target::SSE2;
t->nativeVectorWidth = 4;
t->vectorWidth = 4;
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
else if (!strcasecmp(isa, "sse2-x2")) {
@@ -140,7 +214,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->vectorWidth = 8;
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
else if (!strcasecmp(isa, "sse4")) {
@@ -149,7 +222,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->vectorWidth = 4;
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2")) {
@@ -158,7 +230,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->vectorWidth = 8;
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
else if (!strcasecmp(isa, "generic-4")) {
@@ -166,41 +237,59 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->nativeVectorWidth = 4;
t->vectorWidth = 4;
t->maskingIsFree = true;
t->allOffMaskIsSafe = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
}
else if (!strcasecmp(isa, "generic-8")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->maskingIsFree = true;
t->allOffMaskIsSafe = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
}
else if (!strcasecmp(isa, "generic-16")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 16;
t->vectorWidth = 16;
t->maskingIsFree = true;
t->allOffMaskIsSafe = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
}
else if (!strcasecmp(isa, "generic-32")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 32;
t->vectorWidth = 32;
t->maskingIsFree = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
}
else if (!strcasecmp(isa, "generic-64")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 64;
t->vectorWidth = 64;
t->maskingIsFree = true;
t->maskBitCount = 1;
t->hasHalf = true;
t->hasTranscendentals = true;
}
else if (!strcasecmp(isa, "generic-1")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 1;
t->vectorWidth = 1;
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
else if (!strcasecmp(isa, "avx")) {
t->isa = Target::AVX;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->attributes = "+avx,+popcnt,+cmov";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
else if (!strcasecmp(isa, "avx-x2")) {
@@ -209,19 +298,17 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->vectorWidth = 16;
t->attributes = "+avx,+popcnt,+cmov";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
#endif // LLVM 3.0+
#if defined(LLVM_3_1svn)
#ifndef LLVM_3_0
else if (!strcasecmp(isa, "avx2")) {
t->isa = Target::AVX2;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
t->hasHalf = true;
}
else if (!strcasecmp(isa, "avx2-x2")) {
t->isa = Target::AVX2;
@@ -229,10 +316,10 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->vectorWidth = 16;
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
t->hasHalf = true;
}
#endif // LLVM 3.1
#endif // !LLVM_3_0
else {
fprintf(stderr, "Target ISA \"%s\" is unknown. Choices are: %s\n",
isa, SupportedTargetISAs());
@@ -243,23 +330,23 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
llvm::TargetMachine *targetMachine = t->GetTargetMachine();
const llvm::TargetData *targetData = targetMachine->getTargetData();
t->is32Bit = (targetData->getPointerSize() == 4);
Assert(t->vectorWidth <= ISPC_MAX_NVEC);
}
return !error;
}
const char *
std::string
Target::SupportedTargetCPUs() {
return "atom, barcelona, core2, corei7, "
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
"corei7-avx, "
#endif
"istanbul, nocona, penryn, "
#ifdef LLVM_2_9
"sandybridge, "
#endif
"westmere";
std::string ret;
int count = sizeof(supportedCPUs) / sizeof(supportedCPUs[0]);
for (int i = 0; i < count; ++i) {
ret += supportedCPUs[i];
if (i != count - 1)
ret += ", ";
}
return ret;
}
@@ -271,14 +358,11 @@ Target::SupportedTargetArchs() {
const char *
Target::SupportedTargetISAs() {
return "sse2, sse2-x2, sse4, sse4-x2"
#ifndef LLVM_2_9
", avx, avx-x2"
#endif // !LLVM_2_9
#ifdef LLVM_3_1svn
return "sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2"
#ifndef LLVM_3_0
", avx2, avx2-x2"
#endif // LLVM_3_1svn
", generic-4, generic-8, generic-16, generic-1";
#endif // !LLVM_3_0
", generic-1, generic-4, generic-8, generic-16, generic-32";
}
@@ -286,10 +370,10 @@ std::string
Target::GetTripleString() const {
llvm::Triple triple;
// Start with the host triple as the default
#if defined(LLVM_3_1) || defined(LLVM_3_1svn)
triple.setTriple(llvm::sys::getDefaultTargetTriple());
#else
#ifdef LLVM_3_0
triple.setTriple(llvm::sys::getHostTriple());
#else
triple.setTriple(llvm::sys::getDefaultTargetTriple());
#endif
// And override the arch in the host triple based on what the user
@@ -315,30 +399,17 @@ Target::GetTargetMachine() const {
llvm::Reloc::Model relocModel = generatePIC ? llvm::Reloc::PIC_ :
llvm::Reloc::Default;
#if defined(LLVM_3_1svn)
std::string featuresString = attributes;
llvm::TargetOptions options;
if (g->opt.fastMath == true)
options.UnsafeFPMath = 1;
llvm::TargetMachine *targetMachine =
target->createTargetMachine(triple, cpu, featuresString, options,
relocModel);
#elif defined(LLVM_3_0)
#ifdef LLVM_3_0
std::string featuresString = attributes;
llvm::TargetMachine *targetMachine =
target->createTargetMachine(triple, cpu, featuresString, relocModel);
#else // LLVM 2.9
#ifdef ISPC_IS_APPLE
relocModel = llvm::Reloc::PIC_;
#endif // ISPC_IS_APPLE
std::string featuresString = cpu + std::string(",") + attributes;
#else
std::string featuresString = attributes;
llvm::TargetOptions options;
llvm::TargetMachine *targetMachine =
target->createTargetMachine(triple, featuresString);
#ifndef ISPC_IS_WINDOWS
targetMachine->setRelocationModel(relocModel);
#endif // !ISPC_IS_WINDOWS
#endif // LLVM_2_9
target->createTargetMachine(triple, cpu, featuresString, options,
relocModel);
#endif // !LLVM_3_0
Assert(targetMachine != NULL);
targetMachine->setAsmVerbosityDefault(true);
@@ -367,7 +438,7 @@ Target::GetISAString() const {
static bool
lGenericTypeLayoutIndeterminate(LLVM_TYPE_CONST llvm::Type *type) {
lGenericTypeLayoutIndeterminate(llvm::Type *type) {
if (type->isPrimitiveType() || type->isIntegerTy())
return false;
@@ -376,18 +447,18 @@ lGenericTypeLayoutIndeterminate(LLVM_TYPE_CONST llvm::Type *type) {
type == LLVMTypes::Int1VectorType)
return true;
LLVM_TYPE_CONST llvm::ArrayType *at =
llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(type);
llvm::ArrayType *at =
llvm::dyn_cast<llvm::ArrayType>(type);
if (at != NULL)
return lGenericTypeLayoutIndeterminate(at->getElementType());
LLVM_TYPE_CONST llvm::PointerType *pt =
llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(type);
llvm::PointerType *pt =
llvm::dyn_cast<llvm::PointerType>(type);
if (pt != NULL)
return false;
LLVM_TYPE_CONST llvm::StructType *st =
llvm::dyn_cast<LLVM_TYPE_CONST llvm::StructType>(type);
llvm::StructType *st =
llvm::dyn_cast<llvm::StructType>(type);
if (st != NULL) {
for (int i = 0; i < (int)st->getNumElements(); ++i)
if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
@@ -395,29 +466,24 @@ lGenericTypeLayoutIndeterminate(LLVM_TYPE_CONST llvm::Type *type) {
return false;
}
Assert(llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(type));
Assert(llvm::isa<llvm::VectorType>(type));
return true;
}
llvm::Value *
Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type,
Target::SizeOf(llvm::Type *type,
llvm::BasicBlock *insertAtEnd) {
if (isa == Target::GENERIC &&
lGenericTypeLayoutIndeterminate(type)) {
llvm::Value *index[1] = { LLVMInt32(1) };
LLVM_TYPE_CONST llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
llvm::Instruction *gep =
llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep",
insertAtEnd);
#else
llvm::Instruction *gep =
llvm::GetElementPtrInst::Create(voidPtr, &index[0], &index[1],
"sizeof_gep", insertAtEnd);
#endif
if (is32Bit || g->opt.force32BitAddressing)
return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
"sizeof_int", insertAtEnd);
@@ -428,7 +494,9 @@ Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type,
const llvm::TargetData *td = GetTargetMachine()->getTargetData();
Assert(td != NULL);
uint64_t byteSize = td->getTypeSizeInBits(type) / 8;
uint64_t bitSize = td->getTypeSizeInBits(type);
Assert((bitSize % 8) == 0);
uint64_t byteSize = bitSize / 8;
if (is32Bit || g->opt.force32BitAddressing)
return LLVMInt32((int32_t)byteSize);
else
@@ -437,23 +505,18 @@ Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type,
llvm::Value *
Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element,
Target::StructOffset(llvm::Type *type, int element,
llvm::BasicBlock *insertAtEnd) {
if (isa == Target::GENERIC &&
lGenericTypeLayoutIndeterminate(type) == true) {
llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) };
LLVM_TYPE_CONST llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
llvm::Instruction *gep =
llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep",
insertAtEnd);
#else
llvm::Instruction *gep =
llvm::GetElementPtrInst::Create(voidPtr, &indices[0], &indices[2],
"offset_gep", insertAtEnd);
#endif
if (is32Bit || g->opt.force32BitAddressing)
return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
"offset_int", insertAtEnd);
@@ -464,9 +527,12 @@ Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element,
const llvm::TargetData *td = GetTargetMachine()->getTargetData();
Assert(td != NULL);
LLVM_TYPE_CONST llvm::StructType *structType =
llvm::dyn_cast<LLVM_TYPE_CONST llvm::StructType>(type);
Assert(structType != NULL);
llvm::StructType *structType =
llvm::dyn_cast<llvm::StructType>(type);
if (structType == NULL || structType->isSized() == false) {
Assert(m->errorCount > 0);
return NULL;
}
const llvm::StructLayout *sl = td->getStructLayout(structType);
Assert(sl != NULL);
@@ -552,7 +618,9 @@ llvm::DIFile
SourcePos::GetDIFile() const {
std::string directory, filename;
GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename);
return m->diBuilder->createFile(filename, directory);
llvm::DIFile ret = m->diBuilder->createFile(filename, directory);
Assert(ret.Verify());
return ret;
}

67
ispc.h
View File

@@ -38,10 +38,10 @@
#ifndef ISPC_H
#define ISPC_H
#define ISPC_VERSION "1.2.1dev"
#define ISPC_VERSION "1.2.3dev"
#if !defined(LLVM_2_9) && !defined(LLVM_3_0) && !defined(LLVM_3_0svn) && !defined(LLVM_3_1svn)
#error "Only LLVM 2.9, 3.0, and the 3.1 development branch are supported"
#if !defined(LLVM_3_0) && !defined(LLVM_3_1) && !defined(LLVM_3_2)
#error "Only LLVM 3.0, 3.1, and the 3.2 development branch are supported"
#endif
#if defined(_WIN32) || defined(_WIN64)
@@ -58,20 +58,10 @@
#include <vector>
#include <string>
#define Assert(expr) \
((void)((expr) ? 0 : __Assert (#expr, __FILE__, __LINE__)))
#define __Assert(expr, file, line) \
((void)fprintf(stderr, "%s:%u: Assertion failed: \"%s\"\n" \
"***\n*** Please file a bug report at " \
"https://github.com/ispc/ispc/issues\n*** (Including as much " \
"information as you can about how to reproduce this error).\n" \
"*** You have apparently encountered a bug in the compiler that " \
"we'd like to fix!\n***\n", file, line, expr), abort(), 0)
/** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
targets.
*/
#define ISPC_MAX_NVEC 16
#define ISPC_MAX_NVEC 64
// Forward declarations of a number of widely-used LLVM types
namespace llvm {
@@ -92,12 +82,6 @@ namespace llvm {
class Value;
}
// llvm::Type *s are no longer const in llvm 3.0
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
#define LLVM_TYPE_CONST
#else
#define LLVM_TYPE_CONST const
#endif
class ArrayType;
class AST;
@@ -116,6 +100,15 @@ class SymbolTable;
class Type;
struct VariableDeclaration;
enum StorageClass {
SC_NONE,
SC_EXTERN,
SC_STATIC,
SC_TYPEDEF,
SC_EXTERN_C
};
/** @brief Representation of a range of positions in a source file.
This class represents a range of characters in a source file
@@ -142,11 +135,25 @@ struct SourcePos {
bool operator==(const SourcePos &p2) const;
};
/** Returns a SourcePos that encompasses the extent of both of the given
extents. */
SourcePos Union(const SourcePos &p1, const SourcePos &p2);
// Assert
extern void DoAssert(const char *file, int line, const char *expr);
extern void DoAssertPos(SourcePos pos, const char *file, int line, const char *expr);
#define Assert(expr) \
((void)((expr) ? 0 : ((void)DoAssert (__FILE__, __LINE__, #expr), 0)))
#define AssertPos(pos, expr) \
((void)((expr) ? 0 : ((void)DoAssertPos (pos, __FILE__, __LINE__, #expr), 0)))
/** @brief Structure that defines a compilation target
This structure defines a compilation target for the ispc compiler.
@@ -164,7 +171,7 @@ struct Target {
/** Returns a comma-delimited string giving the names of the currently
supported target CPUs. */
static const char *SupportedTargetCPUs();
static std::string SupportedTargetCPUs();
/** Returns a comma-delimited string giving the names of the currently
supported target architectures. */
@@ -182,13 +189,13 @@ struct Target {
const char *GetISAString() const;
/** Returns the size of the given type */
llvm::Value *SizeOf(LLVM_TYPE_CONST llvm::Type *type,
llvm::Value *SizeOf(llvm::Type *type,
llvm::BasicBlock *insertAtEnd);
/** Given a structure type and an element number in the structure,
returns a value corresponding to the number of bytes from the start
of the structure where the element is located. */
llvm::Value *StructOffset(LLVM_TYPE_CONST llvm::Type *type,
llvm::Value *StructOffset(llvm::Type *type,
int element, llvm::BasicBlock *insertAtEnd);
/** llvm Target object representing this target. */
@@ -236,16 +243,18 @@ struct Target {
natively. */
bool maskingIsFree;
/** Is it safe to run code with the mask all if: e.g. on SSE, the fast
gather trick assumes that at least one program instance is running
(so that it can safely assume that the array base pointer is
valid). */
bool allOffMaskIsSafe;
/** How many bits are used to store each element of the mask: e.g. this
is 32 on SSE/AVX, since that matches the HW better, but it's 1 for
the generic target. */
int maskBitCount;
/** Indicates whether the target has native support for float/half
conversions. */
bool hasHalf;
/** Indicates whether the target has support for transcendentals (beyond
sqrt, which we assume that all of them handle). */
bool hasTranscendentals;
};

View File

@@ -29,6 +29,8 @@
<ClCompile Include="gen-bitcode-generic-4.cpp" />
<ClCompile Include="gen-bitcode-generic-8.cpp" />
<ClCompile Include="gen-bitcode-generic-16.cpp" />
<ClCompile Include="gen-bitcode-generic-32.cpp" />
<ClCompile Include="gen-bitcode-generic-64.cpp" />
<ClCompile Include="gen-bitcode-sse2.cpp" />
<ClCompile Include="gen-bitcode-sse2-x2.cpp" />
<ClCompile Include="gen-bitcode-sse4.cpp" />
@@ -264,6 +266,32 @@
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-16.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-generic-32.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll &gt; gen-bitcode-generic-32.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-32.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll &gt; gen-bitcode-generic-32.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-32.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-32.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-32.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-generic-64.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll &gt; gen-bitcode-generic-64.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-64.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll &gt; gen-bitcode-generic-64.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-64.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-64.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-64.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="lex.ll">
<FileType>Document</FileType>

123
lex.ll
View File

@@ -43,6 +43,7 @@
#include <stdint.h>
static uint64_t lParseBinary(const char *ptr, SourcePos pos, char **endPtr);
static int lParseInteger(bool dotdotdot);
static void lCComment(SourcePos *);
static void lCppComment(SourcePos *);
static void lHandleCppHash(SourcePos *);
@@ -322,7 +323,8 @@ inline int ispcRand() {
%option nounistd
WHITESPACE [ \t\r]+
INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[kMG]?
INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[uUlL]*[kMG]?[uUlL]*
INT_NUMBER_DOTDOTDOT (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[uUlL]*[kMG]?[uUlL]*\.\.\.
FLOAT_NUMBER (([0-9]+|(([0-9]+\.[0-9]*[fF]?)|(\.[0-9]+)))([eE][-+]?[0-9]+)?[fF]?)
HEX_FLOAT_NUMBER (0x[01](\.[0-9a-fA-F]*)?p[-+]?[0-9]+[fF]?)
@@ -406,53 +408,14 @@ L?\"(\\.|[^\\"])*\" { lStringConst(&yylval, &yylloc); return TOKEN_STRING_LITERA
return TOKEN_IDENTIFIER;
}
{INT_NUMBER}+(u|U|l|L)*? {
{INT_NUMBER} {
RT;
int ls = 0, us = 0;
return lParseInteger(false);
}
char *endPtr = NULL;
if (yytext[0] == '0' && yytext[1] == 'b')
yylval.intVal = lParseBinary(yytext+2, yylloc, &endPtr);
else {
#if defined(ISPC_IS_WINDOWS) && !defined(__MINGW32__)
yylval.intVal = _strtoui64(yytext, &endPtr, 0);
#else
// FIXME: should use strtouq and then issue an error if we can't
// fit into 64 bits...
yylval.intVal = strtoull(yytext, &endPtr, 0);
#endif
}
bool kilo = false, mega = false, giga = false;
for (; *endPtr; endPtr++) {
if (*endPtr == 'k')
kilo = true;
else if (*endPtr == 'M')
mega = true;
else if (*endPtr == 'G')
giga = true;
else if (*endPtr == 'l' || *endPtr == 'L')
ls++;
else if (*endPtr == 'u' || *endPtr == 'U')
us++;
}
if (kilo)
yylval.intVal *= 1024;
if (mega)
yylval.intVal *= 1024*1024;
if (giga)
yylval.intVal *= 1024*1024*1024;
if (ls >= 2)
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
else if (ls == 1)
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
// See if we can fit this into a 32-bit integer...
if ((yylval.intVal & 0xffffffff) == yylval.intVal)
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
else
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
{INT_NUMBER_DOTDOTDOT} {
RT;
return lParseInteger(true);
}
@@ -562,6 +525,72 @@ lParseBinary(const char *ptr, SourcePos pos, char **endPtr) {
}
static int
lParseInteger(bool dotdotdot) {
int ls = 0, us = 0;
char *endPtr = NULL;
if (yytext[0] == '0' && yytext[1] == 'b')
yylval.intVal = lParseBinary(yytext+2, yylloc, &endPtr);
else {
#if defined(ISPC_IS_WINDOWS) && !defined(__MINGW32__)
yylval.intVal = _strtoui64(yytext, &endPtr, 0);
#else
// FIXME: should use strtouq and then issue an error if we can't
// fit into 64 bits...
yylval.intVal = strtoull(yytext, &endPtr, 0);
#endif
}
bool kilo = false, mega = false, giga = false;
for (; *endPtr; endPtr++) {
if (*endPtr == 'k')
kilo = true;
else if (*endPtr == 'M')
mega = true;
else if (*endPtr == 'G')
giga = true;
else if (*endPtr == 'l' || *endPtr == 'L')
ls++;
else if (*endPtr == 'u' || *endPtr == 'U')
us++;
else
Assert(dotdotdot && *endPtr == '.');
}
if (kilo)
yylval.intVal *= 1024;
if (mega)
yylval.intVal *= 1024*1024;
if (giga)
yylval.intVal *= 1024*1024*1024;
if (dotdotdot) {
if (ls >= 2)
return us ? TOKEN_UINT64DOTDOTDOT_CONSTANT : TOKEN_INT64DOTDOTDOT_CONSTANT;
else if (ls == 1)
return us ? TOKEN_UINT32DOTDOTDOT_CONSTANT : TOKEN_INT32DOTDOTDOT_CONSTANT;
// See if we can fit this into a 32-bit integer...
if ((yylval.intVal & 0xffffffff) == yylval.intVal)
return us ? TOKEN_UINT32DOTDOTDOT_CONSTANT : TOKEN_INT32DOTDOTDOT_CONSTANT;
else
return us ? TOKEN_UINT64DOTDOTDOT_CONSTANT : TOKEN_INT64DOTDOTDOT_CONSTANT;
}
else {
if (ls >= 2)
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
else if (ls == 1)
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
// See if we can fit this into a 32-bit integer...
if ((yylval.intVal & 0xffffffff) == yylval.intVal)
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
else
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
}
}
/** Handle a C-style comment in the source.
*/
static void
@@ -675,7 +704,7 @@ lEscapeChar(char *str, char *pChar, SourcePos *pos)
str = tail - 1;
break;
default:
Error(*pos, "Bad character escape sequence: '%s'\n.", str);
Error(*pos, "Bad character escape sequence: '%s'.", str);
break;
}
}

View File

@@ -43,44 +43,44 @@
#include <set>
#include <map>
LLVM_TYPE_CONST llvm::Type *LLVMTypes::VoidType = NULL;
LLVM_TYPE_CONST llvm::PointerType *LLVMTypes::VoidPointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::PointerIntType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::BoolType = NULL;
llvm::Type *LLVMTypes::VoidType = NULL;
llvm::PointerType *LLVMTypes::VoidPointerType = NULL;
llvm::Type *LLVMTypes::PointerIntType = NULL;
llvm::Type *LLVMTypes::BoolType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8Type = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16Type = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32Type = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64Type = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleType = NULL;
llvm::Type *LLVMTypes::Int8Type = NULL;
llvm::Type *LLVMTypes::Int16Type = NULL;
llvm::Type *LLVMTypes::Int32Type = NULL;
llvm::Type *LLVMTypes::Int64Type = NULL;
llvm::Type *LLVMTypes::FloatType = NULL;
llvm::Type *LLVMTypes::DoubleType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8PointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16PointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32PointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64PointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatPointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoublePointerType = NULL;
llvm::Type *LLVMTypes::Int8PointerType = NULL;
llvm::Type *LLVMTypes::Int16PointerType = NULL;
llvm::Type *LLVMTypes::Int32PointerType = NULL;
llvm::Type *LLVMTypes::Int64PointerType = NULL;
llvm::Type *LLVMTypes::FloatPointerType = NULL;
llvm::Type *LLVMTypes::DoublePointerType = NULL;
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::MaskType = NULL;
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::BoolVectorType = NULL;
llvm::VectorType *LLVMTypes::MaskType = NULL;
llvm::VectorType *LLVMTypes::BoolVectorType = NULL;
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int1VectorType = NULL;
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int8VectorType = NULL;
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int16VectorType = NULL;
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int32VectorType = NULL;
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int64VectorType = NULL;
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::FloatVectorType = NULL;
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::DoubleVectorType = NULL;
llvm::VectorType *LLVMTypes::Int1VectorType = NULL;
llvm::VectorType *LLVMTypes::Int8VectorType = NULL;
llvm::VectorType *LLVMTypes::Int16VectorType = NULL;
llvm::VectorType *LLVMTypes::Int32VectorType = NULL;
llvm::VectorType *LLVMTypes::Int64VectorType = NULL;
llvm::VectorType *LLVMTypes::FloatVectorType = NULL;
llvm::VectorType *LLVMTypes::DoubleVectorType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8VectorPointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16VectorPointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32VectorPointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64VectorPointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatVectorPointerType = NULL;
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL;
llvm::Type *LLVMTypes::Int8VectorPointerType = NULL;
llvm::Type *LLVMTypes::Int16VectorPointerType = NULL;
llvm::Type *LLVMTypes::Int32VectorPointerType = NULL;
llvm::Type *LLVMTypes::Int64VectorPointerType = NULL;
llvm::Type *LLVMTypes::FloatVectorPointerType = NULL;
llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL;
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::VoidPointerVectorType = NULL;
llvm::VectorType *LLVMTypes::VoidPointerVectorType = NULL;
llvm::Constant *LLVMTrue = NULL;
llvm::Constant *LLVMFalse = NULL;
@@ -473,9 +473,9 @@ LLVMBoolVector(const bool *bvec) {
llvm::Constant *
LLVMIntAsType(int64_t val, LLVM_TYPE_CONST llvm::Type *type) {
LLVM_TYPE_CONST llvm::VectorType *vecType =
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(type);
LLVMIntAsType(int64_t val, llvm::Type *type) {
llvm::VectorType *vecType =
llvm::dyn_cast<llvm::VectorType>(type);
if (vecType != NULL) {
llvm::Constant *v = llvm::ConstantInt::get(vecType->getElementType(),
@@ -491,9 +491,9 @@ LLVMIntAsType(int64_t val, LLVM_TYPE_CONST llvm::Type *type) {
llvm::Constant *
LLVMUIntAsType(uint64_t val, LLVM_TYPE_CONST llvm::Type *type) {
LLVM_TYPE_CONST llvm::VectorType *vecType =
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(type);
LLVMUIntAsType(uint64_t val, llvm::Type *type) {
llvm::VectorType *vecType =
llvm::dyn_cast<llvm::VectorType>(type);
if (vecType != NULL) {
llvm::Constant *v = llvm::ConstantInt::get(vecType->getElementType(),
@@ -642,8 +642,8 @@ LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
bool
LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts) {
// Make sure we do in fact have a vector of integer values here
LLVM_TYPE_CONST llvm::VectorType *vt =
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v->getType());
llvm::VectorType *vt =
llvm::dyn_cast<llvm::VectorType>(v->getType());
Assert(vt != NULL);
Assert(llvm::isa<llvm::IntegerType>(vt->getElementType()));
@@ -657,7 +657,7 @@ LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts) {
// Deal with the fact that LLVM3.1 and previous versions have different
// representations for vectors of constant ints...
#ifdef LLVM_3_1svn
#ifndef LLVM_3_0
llvm::ConstantDataVector *cv = llvm::dyn_cast<llvm::ConstantDataVector>(v);
if (cv == NULL)
return false;
@@ -678,7 +678,7 @@ LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts) {
ret[i] = ci->getSExtValue();
}
return true;
#endif // LLVM_3_1svn
#endif // !LLVM_3_0
}
@@ -696,7 +696,7 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
static bool
lIsExactMultiple(llvm::Value *val, int baseValue, int vectorLength,
std::vector<llvm::PHINode *> &seenPhis) {
if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(val->getType()) == false) {
if (llvm::isa<llvm::VectorType>(val->getType()) == false) {
// If we've worked down to a constant int, then the moment of truth
// has arrived...
llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(val);
@@ -780,7 +780,7 @@ static bool
lAllDivBaseEqual(llvm::Value *val, int64_t baseValue, int vectorLength,
std::vector<llvm::PHINode *> &seenPhis,
bool &canAdd) {
Assert(llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(val->getType()));
Assert(llvm::isa<llvm::VectorType>(val->getType()));
// Make sure the base value is a positive power of 2
Assert(baseValue > 0 && (baseValue & (baseValue-1)) == 0);
@@ -790,7 +790,7 @@ lAllDivBaseEqual(llvm::Value *val, int64_t baseValue, int vectorLength,
int64_t vecVals[ISPC_MAX_NVEC];
int nElts;
if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(val->getType()) &&
if (llvm::isa<llvm::VectorType>(val->getType()) &&
LLVMExtractVectorInts(val, vecVals, &nElts)) {
// If we have a vector of compile-time constant integer values,
// then go ahead and check them directly..
@@ -880,7 +880,7 @@ lAllDivBaseEqual(llvm::Value *val, int64_t baseValue, int vectorLength,
// the addConstants[], mod baseValue. If we round that up to the
// next power of 2, we'll have a value that will be no greater than
// baseValue and sometimes less.
int maxMod = addConstants[0] % baseValue;
int maxMod = int(addConstants[0] % baseValue);
for (int i = 1; i < vectorLength; ++i)
maxMod = std::max(maxMod, int(addConstants[i] % baseValue));
int requiredAlignment = lRoundUpPow2(maxMod);
@@ -947,7 +947,7 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
if (cv != NULL)
return (cv->getSplatValue() != NULL);
#ifdef LLVM_3_1svn
#ifndef LLVM_3_0
llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(v);
if (cdv != NULL)
return (cdv->getSplatValue() != NULL);
@@ -1074,8 +1074,8 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
*/
bool
LLVMVectorValuesAllEqual(llvm::Value *v) {
LLVM_TYPE_CONST llvm::VectorType *vt =
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v->getType());
llvm::VectorType *vt =
llvm::dyn_cast<llvm::VectorType>(v->getType());
Assert(vt != NULL);
int vectorLength = vt->getNumElements();
@@ -1102,7 +1102,7 @@ lVectorIsLinear(llvm::Value *v, int vectorLength, int stride,
*/
static bool
lVectorIsLinearConstantInts(
#ifdef LLVM_3_1svn
#ifndef LLVM_3_0
llvm::ConstantDataVector *cv,
#else
llvm::ConstantVector *cv,
@@ -1111,7 +1111,7 @@ lVectorIsLinearConstantInts(
int stride) {
// Flatten the vector out into the elements array
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
#ifdef LLVM_3_1svn
#ifndef LLVM_3_0
for (int i = 0; i < (int)cv->getNumElements(); ++i)
elements.push_back(cv->getElementAsConstant(i));
#else
@@ -1152,7 +1152,7 @@ lCheckMulForLinear(llvm::Value *op0, llvm::Value *op1, int vectorLength,
int stride, std::vector<llvm::PHINode *> &seenPhis) {
// Is the first operand a constant integer value splatted across all of
// the lanes?
#ifdef LLVM_3_1svn
#ifndef LLVM_3_0
llvm::ConstantDataVector *cv = llvm::dyn_cast<llvm::ConstantDataVector>(op0);
#else
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(op0);
@@ -1226,7 +1226,7 @@ lVectorIsLinear(llvm::Value *v, int vectorLength, int stride,
std::vector<llvm::PHINode *> &seenPhis) {
// First try the easy case: if the values are all just constant
// integers and have the expected stride between them, then we're done.
#ifdef LLVM_3_1svn
#ifndef LLVM_3_0
llvm::ConstantDataVector *cv = llvm::dyn_cast<llvm::ConstantDataVector>(v);
#else
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v);
@@ -1344,8 +1344,8 @@ lVectorIsLinear(llvm::Value *v, int vectorLength, int stride,
*/
bool
LLVMVectorIsLinear(llvm::Value *v, int stride) {
LLVM_TYPE_CONST llvm::VectorType *vt =
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v->getType());
llvm::VectorType *vt =
llvm::dyn_cast<llvm::VectorType>(v->getType());
Assert(vt != NULL);
int vectorLength = vt->getNumElements();
@@ -1390,19 +1390,38 @@ LLVMDumpValue(llvm::Value *v) {
static llvm::Value *
lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore,
lExtractFirstVectorElement(llvm::Value *v,
std::map<llvm::PHINode *, llvm::PHINode *> &phiMap) {
// If it's not an instruction (i.e. is a constant), then we can just
// emit an extractelement instruction and let the regular optimizer do
// the rest.
if (llvm::isa<llvm::Instruction>(v) == false)
return llvm::ExtractElementInst::Create(v, LLVMInt32(0), "first_elt",
insertBefore);
LLVM_TYPE_CONST llvm::VectorType *vt =
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v->getType());
llvm::VectorType *vt =
llvm::dyn_cast<llvm::VectorType>(v->getType());
Assert(vt != NULL);
// First, handle various constant types; do the extraction manually, as
// appropriate.
if (llvm::isa<llvm::ConstantAggregateZero>(v) == true) {
Assert(vt->getElementType()->isIntegerTy());
return llvm::ConstantInt::get(vt->getElementType(), 0);
}
if (llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v)) {
#ifndef LLVM_3_0
return cv->getOperand(0);
#else
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
cv->getVectorElements(elements);
return elements[0];
#endif // !LLVM_3_0
}
#ifndef LLVM_3_0
if (llvm::ConstantDataVector *cdv =
llvm::dyn_cast<llvm::ConstantDataVector>(v))
return cdv->getElementAsConstant(0);
#endif // !LLVM_3_0
// Otherwise, all that we should have at this point is an instruction
// of some sort
Assert(llvm::isa<llvm::Constant>(v) == false);
Assert(llvm::isa<llvm::Instruction>(v) == true);
std::string newName = v->getName().str() + std::string(".elt0");
// Rewrite regular binary operators and casts to the scalarized
@@ -1410,20 +1429,24 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore,
llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(v);
if (bop != NULL) {
llvm::Value *v0 = lExtractFirstVectorElement(bop->getOperand(0),
insertBefore, phiMap);
phiMap);
llvm::Value *v1 = lExtractFirstVectorElement(bop->getOperand(1),
insertBefore, phiMap);
phiMap);
// Note that the new binary operator is inserted immediately before
// the previous vector one
return llvm::BinaryOperator::Create(bop->getOpcode(), v0, v1,
newName, insertBefore);
newName, bop);
}
llvm::CastInst *cast = llvm::dyn_cast<llvm::CastInst>(v);
if (cast != NULL) {
llvm::Value *v = lExtractFirstVectorElement(cast->getOperand(0),
insertBefore, phiMap);
phiMap);
// Similarly, the equivalent scalar cast instruction goes right
// before the vector cast
return llvm::CastInst::Create(cast->getOpcode(), v,
vt->getElementType(), newName,
insertBefore);
cast);
}
llvm::PHINode *phi = llvm::dyn_cast<llvm::PHINode>(v);
@@ -1438,18 +1461,17 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore,
// return the pointer and not get stuck in an infinite loop.
//
// The insertion point for the new phi node also has to be the
// start of the bblock of the original phi node, which isn't
// necessarily the same bblock as insertBefore is in!
// start of the bblock of the original phi node.
llvm::Instruction *phiInsertPos = phi->getParent()->begin();
llvm::PHINode *scalarPhi =
llvm::PHINode::Create(vt->getElementType(),
phi->getNumIncomingValues(), newName,
phiInsertPos);
phi->getNumIncomingValues(),
newName, phiInsertPos);
phiMap[phi] = scalarPhi;
for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) {
llvm::Value *v = lExtractFirstVectorElement(phi->getIncomingValue(i),
insertBefore, phiMap);
phiMap);
scalarPhi->addIncoming(v, phi->getIncomingBlock(i));
}
@@ -1466,15 +1488,22 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore,
}
// Worst case, for everything else, just do a regular extract element
return llvm::ExtractElementInst::Create(v, LLVMInt32(0), "first_elt",
insertBefore);
// instruction, which we insert immediately after the instruction we
// have here.
llvm::Instruction *insertAfter = llvm::dyn_cast<llvm::Instruction>(v);
Assert(insertAfter != NULL);
llvm::Instruction *ee =
llvm::ExtractElementInst::Create(v, LLVMInt32(0), "first_elt",
(llvm::Instruction *)NULL);
ee->insertAfter(insertAfter);
return ee;
}
llvm::Value *
LLVMExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore) {
LLVMExtractFirstVectorElement(llvm::Value *v) {
std::map<llvm::PHINode *, llvm::PHINode *> phiMap;
llvm::Value *ret = lExtractFirstVectorElement(v, insertBefore, phiMap);
llvm::Value *ret = lExtractFirstVectorElement(v, phiMap);
return ret;
}
@@ -1489,8 +1518,8 @@ LLVMConcatVectors(llvm::Value *v1, llvm::Value *v2,
llvm::Instruction *insertBefore) {
Assert(v1->getType() == v2->getType());
LLVM_TYPE_CONST llvm::VectorType *vt =
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v1->getType());
llvm::VectorType *vt =
llvm::dyn_cast<llvm::VectorType>(v1->getType());
Assert(vt != NULL);
int32_t identity[ISPC_MAX_NVEC];
@@ -1518,12 +1547,29 @@ LLVMShuffleVectors(llvm::Value *v1, llvm::Value *v2, int32_t shuf[],
shufVec.push_back(LLVMInt32(shuf[i]));
}
#ifndef LLVM_2_9
llvm::ArrayRef<llvm::Constant *> aref(&shufVec[0], &shufVec[shufSize]);
llvm::Value *vec = llvm::ConstantVector::get(aref);
#else // LLVM_2_9
llvm::Value *vec = llvm::ConstantVector::get(shufVec);
#endif
return new llvm::ShuffleVectorInst(v1, v2, vec, "shuffle", insertBefore);
}
const char *
LLVMGetName(llvm::Value *v, const char *s) {
if (v == NULL) return s;
std::string ret = v->getName();
ret += s;
return strdup(ret.c_str());
}
const char *
LLVMGetName(const char *op, llvm::Value *v1, llvm::Value *v2) {
std::string r = op;
r += "_";
r += v1->getName().str();
r += "_";
r += v2->getName().str();
return strdup(r.c_str());
}

View File

@@ -48,57 +48,50 @@ namespace llvm {
class InsertElementInst;
}
// llvm::Type *s are no longer const in llvm 3.0
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
#define LLVM_TYPE_CONST
#else
#define LLVM_TYPE_CONST const
#endif
/** This structure holds pointers to a variety of LLVM types; code
elsewhere can use them from here, ratherthan needing to make more
verbose LLVM API calls.
*/
struct LLVMTypes {
static LLVM_TYPE_CONST llvm::Type *VoidType;
static LLVM_TYPE_CONST llvm::PointerType *VoidPointerType;
static LLVM_TYPE_CONST llvm::Type *PointerIntType;
static LLVM_TYPE_CONST llvm::Type *BoolType;
static llvm::Type *VoidType;
static llvm::PointerType *VoidPointerType;
static llvm::Type *PointerIntType;
static llvm::Type *BoolType;
static LLVM_TYPE_CONST llvm::Type *Int8Type;
static LLVM_TYPE_CONST llvm::Type *Int16Type;
static LLVM_TYPE_CONST llvm::Type *Int32Type;
static LLVM_TYPE_CONST llvm::Type *Int64Type;
static LLVM_TYPE_CONST llvm::Type *FloatType;
static LLVM_TYPE_CONST llvm::Type *DoubleType;
static llvm::Type *Int8Type;
static llvm::Type *Int16Type;
static llvm::Type *Int32Type;
static llvm::Type *Int64Type;
static llvm::Type *FloatType;
static llvm::Type *DoubleType;
static LLVM_TYPE_CONST llvm::Type *Int8PointerType;
static LLVM_TYPE_CONST llvm::Type *Int16PointerType;
static LLVM_TYPE_CONST llvm::Type *Int32PointerType;
static LLVM_TYPE_CONST llvm::Type *Int64PointerType;
static LLVM_TYPE_CONST llvm::Type *FloatPointerType;
static LLVM_TYPE_CONST llvm::Type *DoublePointerType;
static llvm::Type *Int8PointerType;
static llvm::Type *Int16PointerType;
static llvm::Type *Int32PointerType;
static llvm::Type *Int64PointerType;
static llvm::Type *FloatPointerType;
static llvm::Type *DoublePointerType;
static LLVM_TYPE_CONST llvm::VectorType *MaskType;
static llvm::VectorType *MaskType;
static LLVM_TYPE_CONST llvm::VectorType *BoolVectorType;
static LLVM_TYPE_CONST llvm::VectorType *Int1VectorType;
static LLVM_TYPE_CONST llvm::VectorType *Int8VectorType;
static LLVM_TYPE_CONST llvm::VectorType *Int16VectorType;
static LLVM_TYPE_CONST llvm::VectorType *Int32VectorType;
static LLVM_TYPE_CONST llvm::VectorType *Int64VectorType;
static LLVM_TYPE_CONST llvm::VectorType *FloatVectorType;
static LLVM_TYPE_CONST llvm::VectorType *DoubleVectorType;
static llvm::VectorType *BoolVectorType;
static llvm::VectorType *Int1VectorType;
static llvm::VectorType *Int8VectorType;
static llvm::VectorType *Int16VectorType;
static llvm::VectorType *Int32VectorType;
static llvm::VectorType *Int64VectorType;
static llvm::VectorType *FloatVectorType;
static llvm::VectorType *DoubleVectorType;
static LLVM_TYPE_CONST llvm::Type *Int8VectorPointerType;
static LLVM_TYPE_CONST llvm::Type *Int16VectorPointerType;
static LLVM_TYPE_CONST llvm::Type *Int32VectorPointerType;
static LLVM_TYPE_CONST llvm::Type *Int64VectorPointerType;
static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType;
static LLVM_TYPE_CONST llvm::Type *DoubleVectorPointerType;
static llvm::Type *Int8VectorPointerType;
static llvm::Type *Int16VectorPointerType;
static llvm::Type *Int32VectorPointerType;
static llvm::Type *Int64VectorPointerType;
static llvm::Type *FloatVectorPointerType;
static llvm::Type *DoubleVectorPointerType;
static LLVM_TYPE_CONST llvm::VectorType *VoidPointerVectorType;
static llvm::VectorType *VoidPointerVectorType;
};
/** These variables hold the corresponding LLVM constant values as a
@@ -175,11 +168,11 @@ extern llvm::Constant *LLVMDoubleVector(double f);
/** Returns a constant integer or vector (according to the given type) of
the given signed integer value. */
extern llvm::Constant *LLVMIntAsType(int64_t, LLVM_TYPE_CONST llvm::Type *t);
extern llvm::Constant *LLVMIntAsType(int64_t, llvm::Type *t);
/** Returns a constant integer or vector (according to the given type) of
the given unsigned integer value. */
extern llvm::Constant *LLVMUIntAsType(uint64_t, LLVM_TYPE_CONST llvm::Type *t);
extern llvm::Constant *LLVMUIntAsType(uint64_t, llvm::Type *t);
/** Returns an LLVM boolean vector based on the given array of values.
The array should have g->target.vectorWidth elements. */
@@ -281,8 +274,7 @@ extern void LLVMDumpValue(llvm::Value *v);
worth of values just to extract the first element, in cases where only
the first element's value is needed.
*/
extern llvm::Value *LLVMExtractFirstVectorElement(llvm::Value *v,
llvm::Instruction *insertBefore);
extern llvm::Value *LLVMExtractFirstVectorElement(llvm::Value *v);
/** This function takes two vectors, expected to be the same length, and
returns a new vector of twice the length that represents concatenating
@@ -298,4 +290,10 @@ extern llvm::Value *LLVMShuffleVectors(llvm::Value *v1, llvm::Value *v2,
int32_t shuf[], int shufSize,
llvm::Instruction *insertBefore);
/** Utility routines to concat strings with the names of existing values to
create meaningful new names for instruction values.
*/
extern const char *LLVMGetName(llvm::Value *v, const char *);
extern const char *LLVMGetName(const char *op, llvm::Value *v1, llvm::Value *v2);
#endif // ISPC_LLVMUTIL_H

View File

@@ -44,16 +44,9 @@
#ifdef ISPC_IS_WINDOWS
#include <time.h>
#endif // ISPC_IS_WINDOWS
#include <llvm/Support/PrettyStackTrace.h>
#include <llvm/Support/Signals.h>
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
#include <llvm/Support/TargetRegistry.h>
#include <llvm/Support/TargetSelect.h>
#else
#include <llvm/Target/TargetRegistry.h>
#include <llvm/Target/TargetSelect.h>
#include <llvm/Target/SubtargetFeature.h>
#endif
#include <llvm/Support/TargetRegistry.h>
#include <llvm/Support/TargetSelect.h>
#ifdef ISPC_IS_WINDOWS
#define strcasecmp stricmp
@@ -67,12 +60,12 @@ static void
lPrintVersion() {
printf("Intel(r) SPMD Program Compiler (ispc), %s (build %s @ %s, LLVM %s)\n",
ISPC_VERSION, BUILD_VERSION, BUILD_DATE,
#ifdef LLVM_2_9
"2.9"
#elif defined(LLVM_3_0) || defined(LLVM_3_0svn)
#if defined(LLVM_3_0)
"3.0"
#elif defined(LLVM_3_1) || defined(LLVM_3_1svn)
#elif defined(LLVM_3_1)
"3.1"
#elif defined(LLVM_3_2)
"3.2"
#else
#error "Unhandled LLVM version"
#endif
@@ -91,12 +84,10 @@ usage(int ret) {
Target::SupportedTargetArchs());
printf(" [--c++-include-file=<name>]\t\tSpecify name of file to emit in #include statement in generated C++ code.\n");
printf(" [--cpu=<cpu>]\t\t\tSelect target CPU type\n");
printf(" <cpu>={%s}\n", Target::SupportedTargetCPUs());
printf(" <cpu>={%s}\n", Target::SupportedTargetCPUs().c_str());
printf(" [-D<foo>]\t\t\t\t#define given value when running preprocessor\n");
printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n");
#ifndef LLVM_2_9
printf(" [--emit-c++]\t\t\tEmit a C++ source file as output\n");
#endif // !LLVM_2_9
printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n");
printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n");
printf(" [-g]\t\t\t\tGenerate debugging information\n");
@@ -202,17 +193,18 @@ static void lGetAllArgs(int Argc, char *Argv[], int &argc, char *argv[128]) {
}
static void
lSignal(void *) {
FATAL("Unhandled signal sent to process; terminating.");
}
int main(int Argc, char *Argv[]) {
int argc;
char *argv[128];
lGetAllArgs(Argc, Argv, argc, argv);
#if 0
// Use LLVM's little utility function to print out nice stack traces if
// we crash
llvm::sys::PrintStackTraceOnErrorSignal();
llvm::PrettyStackTraceProgram X(argc, argv);
#endif
llvm::sys::AddSignalHandler(lSignal, NULL);
// initialize available LLVM targets
LLVMInitializeX86TargetInfo();
@@ -220,9 +212,7 @@ int main(int Argc, char *Argv[]) {
LLVMInitializeX86AsmPrinter();
LLVMInitializeX86AsmParser();
LLVMInitializeX86Disassembler();
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
LLVMInitializeX86TargetMC();
#endif
char *file = NULL;
const char *headerFileName = NULL;
@@ -279,10 +269,8 @@ int main(int Argc, char *Argv[]) {
}
else if (!strcmp(argv[i], "--emit-asm"))
ot = Module::Asm;
#ifndef LLVM_2_9
else if (!strcmp(argv[i], "--emit-c++"))
ot = Module::CXX;
#endif // !LLVM_2_9
else if (!strcmp(argv[i], "--emit-llvm"))
ot = Module::Bitcode;
else if (!strcmp(argv[i], "--emit-obj"))

File diff suppressed because it is too large Load Diff

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -59,30 +59,33 @@ public:
int CompileFile();
/** Add a named type definition to the module. */
void AddTypeDef(Symbol *sym);
void AddTypeDef(const std::string &name, const Type *type,
SourcePos pos);
/** Add a new global variable corresponding to the given Symbol to the
module. If non-NULL, initExpr gives the initiailizer expression
for the global's inital value. */
void AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst);
void AddGlobalVariable(const std::string &name, const Type *type,
Expr *initExpr, bool isConst,
StorageClass storageClass, SourcePos pos);
/** Add a declaration of the function defined by the given function
symbol to the module. */
void AddFunctionDeclaration(Symbol *funSym, bool isInline);
void AddFunctionDeclaration(const std::string &name,
const FunctionType *ftype,
StorageClass sc, bool isInline, SourcePos pos);
/** Adds the function described by the declaration information and the
provided statements to the module. */
void AddFunctionDefinition(Symbol *sym, const std::vector<Symbol *> &args,
Stmt *code);
void AddFunctionDefinition(const std::string &name,
const FunctionType *ftype, Stmt *code);
/** After a source file has been compiled, output can be generated in a
number of different formats. */
enum OutputType { Asm, /** Generate text assembly language output */
Bitcode, /** Generate LLVM IR bitcode output */
Object, /** Generate a native object file */
#ifndef LLVM_2_9
CXX, /** Generate a C++ file */
#endif // !LLVM_2_9
Header /** Generate a C/C++ header file with
declarations of 'export'ed functions, global
variables, and the types used by them. */

758
opt.cpp

File diff suppressed because it is too large Load Diff

184
parse.yy
View File

@@ -173,8 +173,11 @@ struct ForeachDimension {
}
%token TOKEN_INT32_CONSTANT TOKEN_UINT32_CONSTANT TOKEN_INT64_CONSTANT
%token TOKEN_UINT64_CONSTANT TOKEN_FLOAT_CONSTANT TOKEN_STRING_C_LITERAL
%token TOKEN_INT32_CONSTANT TOKEN_UINT32_CONSTANT
%token TOKEN_INT64_CONSTANT TOKEN_UINT64_CONSTANT
%token TOKEN_INT32DOTDOTDOT_CONSTANT TOKEN_UINT32DOTDOTDOT_CONSTANT
%token TOKEN_INT64DOTDOTDOT_CONSTANT TOKEN_UINT64DOTDOTDOT_CONSTANT
%token TOKEN_FLOAT_CONSTANT TOKEN_STRING_C_LITERAL
%token TOKEN_IDENTIFIER TOKEN_STRING_LITERAL TOKEN_TYPE_NAME TOKEN_NULL
%token TOKEN_PTR_OP TOKEN_INC_OP TOKEN_DEC_OP TOKEN_LEFT_OP TOKEN_RIGHT_OP
%token TOKEN_LE_OP TOKEN_GE_OP TOKEN_EQ_OP TOKEN_NE_OP
@@ -196,7 +199,7 @@ struct ForeachDimension {
%token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE TOKEN_CBREAK
%token TOKEN_CCONTINUE TOKEN_CRETURN TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT
%type <expr> primary_expression postfix_expression
%type <expr> primary_expression postfix_expression integer_dotdotdot
%type <expr> unary_expression cast_expression funcall_expression launch_expression
%type <expr> multiplicative_expression additive_expression shift_expression
%type <expr> relational_expression equality_expression and_expression
@@ -250,6 +253,12 @@ struct ForeachDimension {
string_constant
: TOKEN_STRING_LITERAL { $$ = new std::string(*yylval.stringVal); }
| string_constant TOKEN_STRING_LITERAL
{
std::string s = *((std::string *)$1);
s += *yylval.stringVal;
$$ = new std::string(s);
}
;
primary_expression
@@ -382,7 +391,7 @@ argument_expression_list
{
ExprList *argList = dynamic_cast<ExprList *>($1);
if (argList == NULL) {
Assert(m->errorCount > 0);
AssertPos(@1, m->errorCount > 0);
argList = new ExprList(@3);
}
argList->exprs.push_back($3);
@@ -540,8 +549,8 @@ rate_qualified_type_specifier
if ($2 == NULL)
$$ = NULL;
else {
int soaWidth = $1;
const StructType *st = dynamic_cast<const StructType *>($2);
int soaWidth = (int)$1;
const StructType *st = CastType<StructType>($2);
if (st == NULL) {
Error(@1, "\"soa\" qualifier is illegal with non-struct type \"%s\".",
$2->GetString().c_str());
@@ -614,15 +623,17 @@ declaration_statement
: declaration
{
if ($1 == NULL) {
Assert(m->errorCount > 0);
AssertPos(@1, m->errorCount > 0);
$$ = NULL;
}
else if ($1->declSpecs->storageClass == SC_TYPEDEF) {
for (unsigned int i = 0; i < $1->declarators.size(); ++i) {
if ($1->declarators[i] == NULL)
Assert(m->errorCount > 0);
AssertPos(@1, m->errorCount > 0);
else
m->AddTypeDef($1->declarators[i]->GetSymbol());
m->AddTypeDef($1->declarators[i]->name,
$1->declarators[i]->type,
$1->declarators[i]->pos);
}
$$ = NULL;
}
@@ -778,7 +789,7 @@ init_declarator_list
{
std::vector<Declarator *> *dl = (std::vector<Declarator *> *)$1;
if (dl == NULL) {
Assert(m->errorCount > 0);
AssertPos(@1, m->errorCount > 0);
dl = new std::vector<Declarator *>;
}
if ($3 != NULL)
@@ -801,7 +812,6 @@ storage_class_specifier
: TOKEN_TYPEDEF { $$ = SC_TYPEDEF; }
| TOKEN_EXTERN { $$ = SC_EXTERN; }
| TOKEN_EXTERN TOKEN_STRING_C_LITERAL { $$ = SC_EXTERN_C; }
| TOKEN_EXPORT { $$ = SC_EXPORT; }
| TOKEN_STATIC { $$ = SC_STATIC; }
;
@@ -843,9 +853,9 @@ struct_or_union_specifier
: struct_or_union struct_or_union_name '{' struct_declaration_list '}'
{
if ($4 != NULL) {
std::vector<const Type *> elementTypes;
std::vector<std::string> elementNames;
std::vector<SourcePos> elementPositions;
llvm::SmallVector<const Type *, 8> elementTypes;
llvm::SmallVector<std::string, 8> elementNames;
llvm::SmallVector<SourcePos, 8> elementPositions;
GetStructTypesNamesPositions(*$4, &elementTypes, &elementNames,
&elementPositions);
StructType *st = new StructType($2, elementTypes, elementNames,
@@ -859,12 +869,11 @@ struct_or_union_specifier
| struct_or_union '{' struct_declaration_list '}'
{
if ($3 != NULL) {
std::vector<const Type *> elementTypes;
std::vector<std::string> elementNames;
std::vector<SourcePos> elementPositions;
llvm::SmallVector<const Type *, 8> elementTypes;
llvm::SmallVector<std::string, 8> elementNames;
llvm::SmallVector<SourcePos, 8> elementPositions;
GetStructTypesNamesPositions(*$3, &elementTypes, &elementNames,
&elementPositions);
// FIXME: should be unbound
$$ = new StructType("", elementTypes, elementNames, elementPositions,
false, Variability::Unbound, @1);
}
@@ -882,12 +891,11 @@ struct_or_union_specifier
| struct_or_union struct_or_union_name
{
const Type *st = m->symbolTable->LookupType($2);
if (!st) {
std::vector<std::string> alternates = m->symbolTable->ClosestTypeMatch($2);
std::string alts = lGetAlternates(alternates);
Error(@2, "Struct type \"%s\" unknown.%s", $2, alts.c_str());
if (st == NULL) {
st = new UndefinedStructType($2, Variability::Unbound, false, @2);
m->symbolTable->AddType($2, st, @2);
}
else if (dynamic_cast<const StructType *>(st) == NULL)
else if (CastType<StructType>(st) == NULL)
Error(@2, "Type \"%s\" is not a struct type! (%s)", $2,
st->GetString().c_str());
$$ = st;
@@ -910,7 +918,7 @@ struct_declaration_list
{
std::vector<StructDeclaration *> *sdl = (std::vector<StructDeclaration *> *)$1;
if (sdl == NULL) {
Assert(m->errorCount > 0);
AssertPos(@1, m->errorCount > 0);
sdl = new std::vector<StructDeclaration *>;
}
if ($2 != NULL)
@@ -976,6 +984,11 @@ specifier_qualifier_list
"function declarations.");
$$ = $2;
}
else if ($1 == TYPEQUAL_EXPORT) {
Error(@1, "\"export\" qualifier is illegal outside of "
"function declarations.");
$$ = $2;
}
else
FATAL("Unhandled type qualifier in parser.");
}
@@ -1000,7 +1013,7 @@ struct_declarator_list
{
std::vector<Declarator *> *sdl = (std::vector<Declarator *> *)$1;
if (sdl == NULL) {
Assert(m->errorCount > 0);
AssertPos(@1, m->errorCount > 0);
sdl = new std::vector<Declarator *>;
}
if ($3 != NULL)
@@ -1047,7 +1060,7 @@ enum_specifier
$$ = NULL;
}
else {
const EnumType *enumType = dynamic_cast<const EnumType *>(type);
const EnumType *enumType = CastType<EnumType>(type);
if (enumType == NULL) {
Error(@2, "Type \"%s\" is not an enum type (%s).", $2,
type->GetString().c_str());
@@ -1074,7 +1087,7 @@ enumerator_list
{
std::vector<Symbol *> *symList = $1;
if (symList == NULL) {
Assert(m->errorCount > 0);
AssertPos(@1, m->errorCount > 0);
symList = new std::vector<Symbol *>;
}
if ($3 != NULL)
@@ -1108,6 +1121,7 @@ type_qualifier
| TOKEN_UNIFORM { $$ = TYPEQUAL_UNIFORM; }
| TOKEN_VARYING { $$ = TYPEQUAL_VARYING; }
| TOKEN_TASK { $$ = TYPEQUAL_TASK; }
| TOKEN_EXPORT { $$ = TYPEQUAL_EXPORT; }
| TOKEN_INLINE { $$ = TYPEQUAL_INLINE; }
| TOKEN_SIGNED { $$ = TYPEQUAL_SIGNED; }
| TOKEN_UNSIGNED { $$ = TYPEQUAL_UNSIGNED; }
@@ -1160,7 +1174,7 @@ direct_declarator
: TOKEN_IDENTIFIER
{
Declarator *d = new Declarator(DK_BASE, @1);
d->sym = new Symbol(yytext, @1);
d->name = yytext;
$$ = d;
}
| '(' declarator ')'
@@ -1335,8 +1349,10 @@ type_name
{
if ($1 == NULL || $2 == NULL)
$$ = NULL;
else
$$ = $2->GetType($1, NULL);
else {
$2->InitFromType($1, NULL);
$$ = $2->type;
}
}
;
@@ -1471,7 +1487,7 @@ initializer_list
{
ExprList *exprList = $1;
if (exprList == NULL) {
Assert(m->errorCount > 0);
AssertPos(@1, m->errorCount > 0);
exprList = new ExprList(@3);
}
exprList->exprs.push_back($3);
@@ -1542,7 +1558,7 @@ statement_list
{
StmtList *sl = (StmtList *)$1;
if (sl == NULL) {
Assert(m->errorCount > 0);
AssertPos(@1, m->errorCount > 0);
sl = new StmtList(@2);
}
sl->Add($2);
@@ -1614,11 +1630,34 @@ foreach_active_identifier
}
;
integer_dotdotdot
: TOKEN_INT32DOTDOTDOT_CONSTANT {
$$ = new ConstExpr(AtomicType::UniformInt32->GetAsConstType(),
(int32_t)yylval.intVal, @1);
}
| TOKEN_UINT32DOTDOTDOT_CONSTANT {
$$ = new ConstExpr(AtomicType::UniformUInt32->GetAsConstType(),
(uint32_t)yylval.intVal, @1);
}
| TOKEN_INT64DOTDOTDOT_CONSTANT {
$$ = new ConstExpr(AtomicType::UniformInt64->GetAsConstType(),
(int64_t)yylval.intVal, @1);
}
| TOKEN_UINT64DOTDOTDOT_CONSTANT {
$$ = new ConstExpr(AtomicType::UniformUInt64->GetAsConstType(),
(uint64_t)yylval.intVal, @1);
}
;
foreach_dimension_specifier
: foreach_identifier '=' assignment_expression TOKEN_DOTDOTDOT assignment_expression
{
$$ = new ForeachDimension($1, $3, $5);
}
| foreach_identifier '=' integer_dotdotdot assignment_expression
{
$$ = new ForeachDimension($1, $3, $4);
}
;
foreach_dimension_list
@@ -1631,7 +1670,7 @@ foreach_dimension_list
{
std::vector<ForeachDimension *> *dv = $1;
if (dv == NULL) {
Assert(m->errorCount > 0);
AssertPos(@1, m->errorCount > 0);
dv = new std::vector<ForeachDimension *>;
}
if ($3 != NULL)
@@ -1669,7 +1708,7 @@ iteration_statement
{
std::vector<ForeachDimension *> *dims = $3;
if (dims == NULL) {
Assert(m->errorCount > 0);
AssertPos(@3, m->errorCount > 0);
dims = new std::vector<ForeachDimension *>;
}
for (unsigned int i = 0; i < dims->size(); ++i)
@@ -1679,7 +1718,7 @@ iteration_statement
{
std::vector<ForeachDimension *> *dims = $3;
if (dims == NULL) {
Assert(m->errorCount > 0);
AssertPos(@3, m->errorCount > 0);
dims = new std::vector<ForeachDimension *>;
}
@@ -1697,7 +1736,7 @@ iteration_statement
{
std::vector<ForeachDimension *> *dims = $3;
if (dims == NULL) {
Assert(m->errorCount > 0);
AssertPos(@3, m->errorCount > 0);
dims = new std::vector<ForeachDimension *>;
}
@@ -1708,7 +1747,7 @@ iteration_statement
{
std::vector<ForeachDimension *> *dims = $3;
if (dims == NULL) {
Assert(m->errorCount > 0);
AssertPos(@1, m->errorCount > 0);
dims = new std::vector<ForeachDimension *>;
}
@@ -1804,6 +1843,7 @@ external_declaration
for (unsigned int i = 0; i < $1->declarators.size(); ++i)
lAddDeclaration($1->declSpecs, $1->declarators[i]);
}
| ';'
;
function_definition
@@ -1817,11 +1857,18 @@ function_definition
}
compound_statement
{
std::vector<Symbol *> args;
if ($2 != NULL) {
Symbol *sym = $2->GetFunctionInfo($1, &args);
if (sym != NULL)
m->AddFunctionDefinition(sym, args, $4);
$2->InitFromDeclSpecs($1);
const FunctionType *funcType = CastType<FunctionType>($2->type);
if (funcType == NULL)
AssertPos(@1, m->errorCount > 0);
else if ($1->storageClass == SC_TYPEDEF)
Error(@1, "Illegal \"typedef\" provided with function definition.");
else {
Stmt *code = $4;
if (code == NULL) code = new StmtList(@4);
m->AddFunctionDefinition($2->name, funcType, code);
}
}
m->symbolTable->PopScope(); // push in lAddFunctionParams();
}
@@ -1931,35 +1978,27 @@ lAddDeclaration(DeclSpecs *ds, Declarator *decl) {
// Error happened earlier during parsing
return;
decl->InitFromDeclSpecs(ds);
if (ds->storageClass == SC_TYPEDEF)
m->AddTypeDef(decl->GetSymbol());
m->AddTypeDef(decl->name, decl->type, decl->pos);
else {
const Type *t = decl->GetType(ds);
if (t == NULL) {
if (decl->type == NULL) {
Assert(m->errorCount > 0);
return;
}
Symbol *sym = decl->GetSymbol();
if (sym == NULL) {
Assert(m->errorCount > 0);
return;
}
const FunctionType *ft = dynamic_cast<const FunctionType *>(t);
decl->type = decl->type->ResolveUnboundVariability(Variability::Varying);
const FunctionType *ft = CastType<FunctionType>(decl->type);
if (ft != NULL) {
sym->type = ft;
sym->storageClass = ds->storageClass;
bool isInline = (ds->typeQualifiers & TYPEQUAL_INLINE);
m->AddFunctionDeclaration(sym, isInline);
m->AddFunctionDeclaration(decl->name, ft, ds->storageClass,
isInline, decl->pos);
}
else {
if (sym->type == NULL)
Assert(m->errorCount > 0);
else
sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
bool isConst = (ds->typeQualifiers & TYPEQUAL_CONST) != 0;
m->AddGlobalVariable(sym, decl->initExpr, isConst);
m->AddGlobalVariable(decl->name, decl->type, decl->initExpr,
isConst, decl->storageClass, decl->pos);
}
}
}
@@ -1973,7 +2012,7 @@ lAddFunctionParams(Declarator *decl) {
m->symbolTable->PushScope();
if (decl == NULL) {
Assert(m->errorCount > 0);
AssertPos(decl->pos, m->errorCount > 0);
return;
}
@@ -1981,27 +2020,24 @@ lAddFunctionParams(Declarator *decl) {
while (decl->kind != DK_FUNCTION && decl->child != NULL)
decl = decl->child;
if (decl->kind != DK_FUNCTION) {
Assert(m->errorCount > 0);
AssertPos(decl->pos, m->errorCount > 0);
return;
}
// now loop over its parameters and add them to the symbol table
for (unsigned int i = 0; i < decl->functionParams.size(); ++i) {
Declaration *pdecl = decl->functionParams[i];
if (pdecl == NULL || pdecl->declarators.size() == 0)
// zero size declarators array corresponds to an anonymous
// parameter
continue;
Assert(pdecl->declarators.size() == 1);
Symbol *sym = pdecl->declarators[0]->GetSymbol();
if (sym == NULL || sym->type == NULL)
Assert(m->errorCount > 0);
Assert(pdecl != NULL && pdecl->declarators.size() == 1);
Declarator *declarator = pdecl->declarators[0];
if (declarator == NULL)
AssertPos(decl->pos, m->errorCount > 0);
else {
sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
Symbol *sym = new Symbol(declarator->name, declarator->pos,
declarator->type, declarator->storageClass);
#ifndef NDEBUG
bool ok = m->symbolTable->AddVariable(sym);
if (ok == false)
Assert(m->errorCount > 0);
AssertPos(decl->pos, m->errorCount > 0);
#else
m->symbolTable->AddVariable(sym);
#endif
@@ -2064,8 +2100,6 @@ lGetStorageClassString(StorageClass sc) {
return "";
case SC_EXTERN:
return "extern";
case SC_EXPORT:
return "export";
case SC_STATIC:
return "static";
case SC_TYPEDEF:
@@ -2157,7 +2191,7 @@ lFinalizeEnumeratorSymbols(std::vector<Symbol *> &enums,
if (enums[i]->constValue != NULL) {
/* Already has a value, so first update nextVal with it. */
int count = enums[i]->constValue->AsUInt32(&nextVal);
Assert(count == 1);
AssertPos(enums[i]->pos, count == 1);
++nextVal;
/* When the source file as being parsed, the ConstExpr for any
@@ -2170,7 +2204,7 @@ lFinalizeEnumeratorSymbols(std::vector<Symbol *> &enums,
enums[i]->pos);
castExpr = Optimize(castExpr);
enums[i]->constValue = dynamic_cast<ConstExpr *>(castExpr);
Assert(enums[i]->constValue != NULL);
AssertPos(enums[i]->pos, enums[i]->constValue != NULL);
}
else {
enums[i]->constValue = new ConstExpr(enumType, nextVal++,

View File

@@ -17,6 +17,10 @@ import shlex
import platform
import tempfile
# disable fancy error/warning printing with ANSI colors, so grepping for error
# messages doesn't get confused
os.environ["TERM"] = "dumb"
# This script is affected by http://bugs.python.org/issue5261 on OSX 10.5 Leopard
# git history has a workaround for that issue.
@@ -28,8 +32,10 @@ parser.add_option("-r", "--random-shuffle", dest="random", help="Randomly order
default=False, action="store_true")
parser.add_option("-g", "--generics-include", dest="include_file", help="Filename for header implementing functions for generics",
default=None)
parser.add_option("-f", "--ispc-flags", dest="ispc_flags", help="Additional flags for ispc (-g, -O1, ...)",
default="")
parser.add_option('-t', '--target', dest='target',
help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16)',
help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16, generic-32)',
default="sse4")
parser.add_option('-a', '--arch', dest='arch',
help='Set architecture (x86, x86-64)',
@@ -53,6 +59,10 @@ if not is_windows:
else:
ispc_exe = "../Release/ispc.exe"
ispc_exe += " " + options.ispc_flags
print ispc_exe
is_generic_target = (options.target.find("generic-") != -1 and
options.target != "generic-1")
if is_generic_target and options.include_file == None:
@@ -65,6 +75,12 @@ if is_generic_target and options.include_file == None:
elif options.target == "generic-16":
sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-16.h\n")
options.include_file = "examples/intrinsics/generic-16.h"
elif options.target == "generic-32":
sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-32.h\n")
options.include_file = "examples/intrinsics/generic-32.h"
elif options.target == "generic-64":
sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-64.h\n")
options.include_file = "examples/intrinsics/generic-64.h"
if options.compiler_exe == None:
if is_windows:

View File

@@ -1,6 +1,6 @@
// -*- mode: c++ -*-
/*
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -355,7 +355,8 @@ static inline uniform bool all(bool v) {
#else
int32 match = __sext_varying_bool((__sext_varying_bool(v) & __mask) == __mask);
#endif
return __movmsk(match) == (1 << programCount) - 1;
return __movmsk(match) == ((programCount == 64) ? ~0ull :
((1ull << programCount) - 1));
}
__declspec(safe)
@@ -388,14 +389,14 @@ __declspec(safe)
static inline uniform int popcnt(bool v) {
// As with any() and all(), only count across the active lanes
#ifdef ISPC_TARGET_GENERIC
return __popcnt_int32(__movmsk(v & __mask));
return __popcnt_int64(__movmsk(v & __mask));
#else
return __popcnt_int32(__movmsk(__sext_varying_bool(v) & __mask));
return __popcnt_int64(__movmsk(__sext_varying_bool(v) & __mask));
#endif
}
__declspec(safe)
static inline uniform int lanemask() {
static inline uniform unsigned int64 lanemask() {
return __movmsk(__mask);
}
@@ -746,6 +747,125 @@ static inline void prefetch_nt(const void * varying ptr) {
}
}
///////////////////////////////////////////////////////////////////////////
// non-short-circuiting alternatives
__declspec(safe,cost1)
static inline bool and(bool a, bool b) {
return a && b;
}
__declspec(safe,cost1)
static inline uniform bool and(uniform bool a, uniform bool b) {
return a && b;
}
__declspec(safe,cost1)
static inline bool or(bool a, bool b) {
return a || b;
}
__declspec(safe,cost1)
static inline uniform bool or(uniform bool a, uniform bool b) {
return a || b;
}
__declspec(safe,cost1)
static inline int8 select(bool c, int8 a, int8 b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline int8 select(uniform bool c, int8 a, int8 b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline uniform int8 select(uniform bool c, uniform int8 a,
uniform int8 b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline int16 select(bool c, int16 a, int16 b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline int16 select(uniform bool c, int16 a, int16 b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline uniform int16 select(uniform bool c, uniform int16 a,
uniform int16 b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline int32 select(bool c, int32 a, int32 b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline int32 select(uniform bool c, int32 a, int32 b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline uniform int32 select(uniform bool c, uniform int32 a,
uniform int32 b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline int64 select(bool c, int64 a, int64 b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline int64 select(uniform bool c, int64 a, int64 b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline uniform int64 select(uniform bool c, uniform int64 a,
uniform int64 b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline float select(bool c, float a, float b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline float select(uniform bool c, float a, float b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline uniform float select(uniform bool c, uniform float a,
uniform float b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline double select(bool c, double a, double b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline double select(uniform bool c, double a, double b) {
return c ? a : b;
}
__declspec(safe,cost1)
static inline uniform double select(uniform bool c, uniform double a,
uniform double b) {
return c ? a : b;
}
///////////////////////////////////////////////////////////////////////////
// Horizontal ops / reductions
@@ -1469,22 +1589,17 @@ static inline void memory_barrier() {
#define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE) \
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
memory_barrier(); \
TA ret = __atomic_##OPB##_##TB##_global(ptr, value, (MASKTYPE)__mask); \
memory_barrier(); \
return ret; \
} \
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
uniform TA value) { \
memory_barrier(); \
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
memory_barrier(); \
return ret; \
} \
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
uniform TA * uniform ptrArray[programCount]; \
ptrArray[programIndex] = ptr; \
memory_barrier(); \
TA ret; \
__foreach_active (i) { \
uniform TA * uniform p = ptrArray[i]; \
@@ -1492,23 +1607,21 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
ret = insert(ret, i, r); \
} \
memory_barrier(); \
return ret; \
} \
#define DEFINE_ATOMIC_SWAP(TA,TB) \
static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
memory_barrier(); \
uniform int i = 0; \
TA ret[programCount]; \
TA memVal; \
uniform int lastSwap; \
uniform int mask = lanemask(); \
uniform unsigned int64 mask = lanemask(); \
/* First, have the first running program instance (if any) perform \
the swap with memory with its value of "value"; record the \
value returned. */ \
for (; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
if ((mask & (1ull << i)) == 0) \
continue; \
memVal = __atomic_swap_uniform_##TB##_global(ptr, extract(value, i)); \
lastSwap = i; \
@@ -1520,7 +1633,7 @@ static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
current instance had executed a hardware atomic swap right before \
the last one that did a swap. */ \
for (; i < programCount; ++i) { \
if ((mask & (1 << i)) == 0) \
if ((mask & (1ull << i)) == 0) \
continue; \
ret[lastSwap] = extract(value, i); \
lastSwap = i; \
@@ -1528,20 +1641,16 @@ static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
/* And the last instance that wanted to swap gets the value we \
originally got back from memory... */ \
ret[lastSwap] = memVal; \
memory_barrier(); \
return ret[programIndex]; \
} \
static inline uniform TA atomic_swap_global(uniform TA * uniform ptr, \
uniform TA value) { \
memory_barrier(); \
uniform TA ret = __atomic_swap_uniform_##TB##_global(ptr, value); \
memory_barrier(); \
return ret; \
} \
static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
uniform TA * uniform ptrArray[programCount]; \
ptrArray[programIndex] = ptr; \
memory_barrier(); \
TA ret; \
__foreach_active (i) { \
uniform TA * uniform p = ptrArray[i]; \
@@ -1549,7 +1658,6 @@ static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
uniform TA r = __atomic_swap_uniform_##TB##_global(p, v); \
ret = insert(ret, i, r); \
} \
memory_barrier(); \
return ret; \
} \
@@ -1557,25 +1665,19 @@ static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
uniform TA oneval = reduce_##OPA(value); \
TA ret; \
if (lanemask() != 0) { \
memory_barrier(); \
if (lanemask() != 0) \
ret = __atomic_##OPB##_uniform_##TB##_global(ptr, oneval); \
memory_barrier(); \
} \
return ret; \
} \
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
uniform TA value) { \
memory_barrier(); \
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
memory_barrier(); \
return ret; \
} \
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
TA value) { \
uniform TA * uniform ptrArray[programCount]; \
ptrArray[programIndex] = ptr; \
memory_barrier(); \
TA ret; \
__foreach_active (i) { \
uniform TA * uniform p = ptrArray[i]; \
@@ -1583,7 +1685,6 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
ret = insert(ret, i, r); \
} \
memory_barrier(); \
return ret; \
}
@@ -1638,25 +1739,20 @@ DEFINE_ATOMIC_SWAP(double,double)
#define ATOMIC_DECL_CMPXCHG(TA, TB, MASKTYPE) \
static inline uniform TA atomic_compare_exchange_global( \
uniform TA * uniform ptr, uniform TA oldval, uniform TA newval) { \
memory_barrier(); \
uniform TA ret = \
__atomic_compare_exchange_uniform_##TB##_global(ptr, oldval, newval); \
memory_barrier(); \
return ret; \
} \
static inline TA atomic_compare_exchange_global( \
uniform TA * uniform ptr, TA oldval, TA newval) { \
memory_barrier(); \
TA ret = __atomic_compare_exchange_##TB##_global(ptr, oldval, newval, \
(MASKTYPE)__mask); \
memory_barrier(); \
return ret; \
} \
static inline TA atomic_compare_exchange_global( \
uniform TA * varying ptr, TA oldval, TA newval) { \
uniform TA * uniform ptrArray[programCount]; \
ptrArray[programIndex] = ptr; \
memory_barrier(); \
TA ret; \
__foreach_active (i) { \
uniform TA r = \
@@ -1665,7 +1761,6 @@ static inline TA atomic_compare_exchange_global( \
extract(newval, i)); \
ret = insert(ret, i, r); \
} \
memory_barrier(); \
return ret; \
}
@@ -1678,6 +1773,49 @@ ATOMIC_DECL_CMPXCHG(double, double, IntMaskType)
#undef ATOMIC_DECL_CMPXCHG
// void * variants of swap and compare exchange
static inline void *atomic_swap_global(void ** uniform ptr,
void * value) {
return (void *)atomic_swap_global((intptr_t * uniform)ptr,
(intptr_t)value);
}
static inline void * uniform atomic_swap_global(void ** uniform ptr,
void * uniform value) {
return (void * uniform)atomic_swap_global((intptr_t * uniform)ptr,
(uniform intptr_t)value);
}
static inline void *atomic_swap_global(void ** ptr, void * value) {
return (void *)atomic_swap_global((intptr_t *)ptr,
(intptr_t)value);
}
static inline void *
atomic_compare_exchange_global(void ** uniform ptr,
void * oldval, void * newval) {
return (void *)atomic_compare_exchange_global((intptr_t * uniform)ptr,
(intptr_t)oldval,
(intptr_t)newval);
}
static inline void * uniform
atomic_compare_exchange_global(void ** uniform ptr, void * uniform oldval,
void * uniform newval) {
return (void * uniform)atomic_compare_exchange_global((intptr_t * uniform)ptr,
(uniform intptr_t)oldval,
(uniform intptr_t)newval);
}
static inline void *
atomic_compare_exchange_global(void ** ptr, void * oldval,
void * newval) {
return (void *)atomic_compare_exchange_global((intptr_t *)ptr,
(intptr_t)oldval,
(intptr_t)newval);
}
///////////////////////////////////////////////////////////////////////////
// local atomics
@@ -1849,6 +1987,49 @@ LOCAL_CMPXCHG(double)
#undef LOCAL_ATOMIC
#undef LOCAL_CMPXCHG
// void * variants of swap and compare exchange
static inline void *atomic_swap_local(void ** uniform ptr,
void * value) {
return (void *)atomic_swap_local((intptr_t * uniform)ptr,
(intptr_t)value);
}
static inline void * uniform atomic_swap_local(void ** uniform ptr,
void * uniform value) {
return (void * uniform)atomic_swap_local((intptr_t * uniform)ptr,
(uniform intptr_t)value);
}
static inline void *atomic_swap_local(void ** ptr, void * value) {
return (void *)atomic_swap_local((intptr_t *)ptr,
(intptr_t)value);
}
static inline void *
atomic_compare_exchange_local(void ** uniform ptr,
void * oldval, void * newval) {
return (void *)atomic_compare_exchange_local((intptr_t * uniform)ptr,
(intptr_t)oldval,
(intptr_t)newval);
}
static inline void * uniform
atomic_compare_exchange_local(void ** uniform ptr, void * uniform oldval,
void * uniform newval) {
return (void * uniform)atomic_compare_exchange_local((intptr_t * uniform)ptr,
(uniform intptr_t)oldval,
(uniform intptr_t)newval);
}
static inline void *
atomic_compare_exchange_local(void ** ptr, void * oldval,
void * newval) {
return (void *)atomic_compare_exchange_local((intptr_t *)ptr,
(intptr_t)oldval,
(intptr_t)newval);
}
///////////////////////////////////////////////////////////////////////////
// Transcendentals (float precision)
@@ -2735,7 +2916,10 @@ static inline uniform float atan2(uniform float y, uniform float x) {
__declspec(safe)
static inline float exp(float x_full) {
if (__math_lib == __math_lib_svml) {
if (__have_native_transcendentals) {
return __exp_varying_float(x_full);
}
else if (__math_lib == __math_lib_svml) {
return __svml_exp(x_full);
}
else if (__math_lib == __math_lib_system) {
@@ -2814,7 +2998,10 @@ static inline float exp(float x_full) {
__declspec(safe)
static inline uniform float exp(uniform float x_full) {
if (__math_lib == __math_lib_system ||
if (__have_native_transcendentals) {
return __exp_uniform_float(x_full);
}
else if (__math_lib == __math_lib_system ||
__math_lib == __math_lib_svml) {
return __stdlib_expf(x_full);
}
@@ -2936,7 +3123,10 @@ static inline void __range_reduce_log(uniform float input, uniform float * unifo
__declspec(safe)
static inline float log(float x_full) {
if (__math_lib == __math_lib_svml) {
if (__have_native_transcendentals) {
return __log_varying_float(x_full);
}
else if (__math_lib == __math_lib_svml) {
return __svml_log(x_full);
}
else if (__math_lib == __math_lib_system) {
@@ -3024,7 +3214,10 @@ static inline float log(float x_full) {
__declspec(safe)
static inline uniform float log(uniform float x_full) {
if (__math_lib == __math_lib_system ||
if (__have_native_transcendentals) {
return __log_uniform_float(x_full);
}
else if (__math_lib == __math_lib_system ||
__math_lib == __math_lib_svml) {
return __stdlib_logf(x_full);
}
@@ -3105,7 +3298,10 @@ static inline uniform float log(uniform float x_full) {
__declspec(safe)
static inline float pow(float a, float b) {
if (__math_lib == __math_lib_svml) {
if (__have_native_transcendentals) {
return __pow_varying_float(a, b);
}
else if (__math_lib == __math_lib_svml) {
return __svml_pow(a, b);
}
else if (__math_lib == __math_lib_system) {
@@ -3124,6 +3320,9 @@ static inline float pow(float a, float b) {
__declspec(safe)
static inline uniform float pow(uniform float a, uniform float b) {
if (__have_native_transcendentals) {
return __pow_uniform_float(a, b);
}
if (__math_lib == __math_lib_system ||
__math_lib == __math_lib_svml) {
return __stdlib_powf(a, b);
@@ -3551,8 +3750,9 @@ static inline int16 float_to_half(float f) {
// like recursive filters in DSP - not a typical half-float application. Whether
// FP16 denormals are rare in practice, I don't know. Whatever slow path your HW
// may or may not have for denormals, this may well hit it.
int32 fint2 = intbits(floatbits(fint & round_mask) * floatbits(magic)) - round_mask;
fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed
float fscale = floatbits(fint & round_mask) * floatbits(magic);
fscale = min(fscale, floatbits((31 << 23) - 0x1000));
int32 fint2 = intbits(fscale) - round_mask;
if (fint < f32infty)
o = fint2 >> 13; // Take the bits!
@@ -3648,6 +3848,133 @@ static inline int16 float_to_half_fast(float f) {
}
}
///////////////////////////////////////////////////////////////////////////
// float -> srgb8
// https://gist.github.com/2246678, from Fabian "rygorous" Giesen.
//
// The basic ideas are still the same, only this time, we squeeze
// everything into the table, even the linear part of the range; since we
// are approximating the function as piecewise linear anyway, this is
// fairly easy.
//
// In the exact version of the conversion, any value that produces an
// output float less than 0.5 will be rounded to an integer of
// zero. Inverting the linear part of the transform, we get:
//
// log2(0.5 / (255 * 12.92)) =~ -12.686
//
// which in turn means that any value smaller than about 2^(-12.687) will
// return 0. What this means is that we can adapt the clamping code to
// just clamp to [2^(-13), 1-eps] and we're covered. This means our table
// needs to cover a range of 13 different exponents from -13 to -1.
//
// The table lookup, storage and interpolation works exactly the same way
// as in the code above.
//
// Max error for the whole function (integer-rounded result minus "exact"
// value, as computed in floats using the official formula): 0.544403 at
// 0x3e9f8000
__declspec(safe)
static inline int
float_to_srgb8(float in)
{
static const uniform unsigned int table[104] = {
0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d,
0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d,
0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a,
0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a,
0x010e0033, 0x01280033, 0x01410033, 0x015b0033,
0x01750033, 0x018f0033, 0x01a80033, 0x01c20033,
0x01dc0067, 0x020f0067, 0x02430067, 0x02760067,
0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067,
0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce,
0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5,
0x06970158, 0x07420142, 0x07e30130, 0x087b0120,
0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,
0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180,
0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143,
0x11070264, 0x1238023e, 0x1357021d, 0x14660201,
0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af,
0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad,
0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240,
0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392,
0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300,
0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5,
0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401,
0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d,
0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559,
0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f,
0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723,
};
static const uniform unsigned int almost_one = 0x3f7fffff;
// Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
in = max(in, 0.0f);
in = min(in, floatbits(almost_one));
// Do the table lookup and unpack bias, scale
unsigned int tab = table[(intbits(in) - 0x39000000u) >> 20];
unsigned int bias = (tab >> 16) << 9;
unsigned int scale = tab & 0xffff;
// Grab next-highest mantissa bits and perform linear interpolation
unsigned int t = (intbits(in) >> 12) & 0xff;
return (bias + scale*t) >> 16;
}
__declspec(safe)
static inline uniform int
float_to_srgb8(uniform float in)
{
static const uniform unsigned int table[104] = {
0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d,
0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d,
0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a,
0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a,
0x010e0033, 0x01280033, 0x01410033, 0x015b0033,
0x01750033, 0x018f0033, 0x01a80033, 0x01c20033,
0x01dc0067, 0x020f0067, 0x02430067, 0x02760067,
0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067,
0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce,
0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5,
0x06970158, 0x07420142, 0x07e30130, 0x087b0120,
0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,
0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180,
0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143,
0x11070264, 0x1238023e, 0x1357021d, 0x14660201,
0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af,
0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad,
0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240,
0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392,
0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300,
0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5,
0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401,
0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d,
0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559,
0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f,
0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723,
};
static const uniform unsigned int almost_one = 0x3f7fffff;
// Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
in = max(in, 0.0f);
in = min(in, floatbits(almost_one));
// Do the table lookup and unpack bias, scale
uniform unsigned int tab = table[(intbits(in) - 0x39000000u) >> 20];
uniform unsigned int bias = (tab >> 16) << 9;
uniform unsigned int scale = tab & 0xffff;
// Grab next-highest mantissa bits and perform linear interpolation
uniform unsigned int t = (intbits(in) >> 12) & 0xff;
return (bias + scale*t) >> 16;
}
///////////////////////////////////////////////////////////////////////////
// RNG stuff
@@ -3699,60 +4026,13 @@ static inline uniform float frandom(uniform RNGState * uniform state)
return floatbits(0x3F800000 | irand)-1.0f;
}
static inline uniform unsigned int __seed4(varying RNGState * uniform state,
uniform int start,
uniform unsigned int seed) {
uniform unsigned int c1 = 0xf0f0f0f0;
uniform unsigned int c2 = 0x0f0f0f0f;
state->z1 = insert(state->z1, start + 0, seed);
state->z1 = insert(state->z1, start + 1, seed ^ c1);
state->z1 = insert(state->z1, start + 2, (seed << 3) ^ c1);
state->z1 = insert(state->z1, start + 3, (seed << 2) ^ c2);
seed += 131;
state->z2 = insert(state->z2, start + 0, seed);
state->z2 = insert(state->z2, start + 1, seed ^ c1);
state->z2 = insert(state->z2, start + 2, (seed << 3) ^ c1);
state->z2 = insert(state->z2, start + 3, (seed << 2) ^ c2);
seed ^= extract(state->z2, 2);
state->z3 = insert(state->z3, start + 0, seed);
state->z3 = insert(state->z3, start + 1, seed ^ c1);
state->z3 = insert(state->z3, start + 2, (seed << 3) ^ c1);
state->z3 = insert(state->z3, start + 3, (seed << 2) ^ c2);
seed <<= 4;
seed += 3;
seed ^= extract(state->z1, 3);
state->z4 = insert(state->z4, start + 0, seed);
state->z4 = insert(state->z4, start + 1, seed ^ c1);
state->z4 = insert(state->z4, start + 2, (seed << 3) ^ c1);
state->z4 = insert(state->z4, start + 3, (seed << 2) ^ c2);
return seed;
}
static inline void seed_rng(varying RNGState * uniform state,
uniform unsigned int seed) {
if (programCount == 1) {
state->z1 = seed;
state->z2 = seed ^ 0xbeeff00d;
state->z3 = ((seed & 0xffff) << 16) | (seed >> 16);
state->z4 = (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24);
}
else {
seed = __seed4(state, 0, seed);
if (programCount == 8)
__seed4(state, 4, seed ^ 0xbeeff00d);
if (programCount == 16) {
__seed4(state, 4, seed ^ 0xbeeff00d);
__seed4(state, 8, ((seed & 0xffff) << 16) | (seed >> 16));
__seed4(state, 12, (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24));
}
}
unsigned int seed) {
state->z1 = seed;
state->z2 = seed ^ 0xbeeff00d;
state->z3 = ((seed & 0xffff) << 16) | (seed >> 16);
state->z4 = (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24);
}
static inline void seed_rng(uniform RNGState * uniform state,

172
stmt.cpp
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -40,6 +40,7 @@
#include "util.h"
#include "expr.h"
#include "type.h"
#include "func.h"
#include "sym.h"
#include "module.h"
#include "llvmutil.h"
@@ -121,7 +122,7 @@ DeclStmt::DeclStmt(const std::vector<VariableDeclaration> &v, SourcePos p)
static bool
lHasUnsizedArrays(const Type *type) {
const ArrayType *at = dynamic_cast<const ArrayType *>(type);
const ArrayType *at = CastType<ArrayType>(type);
if (at == NULL)
return false;
@@ -139,7 +140,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
for (unsigned int i = 0; i < vars.size(); ++i) {
Symbol *sym = vars[i].sym;
Assert(sym != NULL);
AssertPos(pos, sym != NULL);
if (sym->type == NULL)
continue;
Expr *initExpr = vars[i].init;
@@ -167,16 +168,30 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
}
// References must have initializer expressions as well.
if (dynamic_cast<const ReferenceType *>(sym->type) && initExpr == NULL) {
Error(sym->pos,
"Must provide initializer for reference-type variable \"%s\".",
sym->name.c_str());
continue;
if (IsReferenceType(sym->type) == true) {
if (initExpr == NULL) {
Error(sym->pos, "Must provide initializer for reference-type "
"variable \"%s\".", sym->name.c_str());
continue;
}
if (IsReferenceType(initExpr->GetType()) == false) {
const Type *initLVType = initExpr->GetLValueType();
if (initLVType == NULL) {
Error(initExpr->pos, "Initializer for reference-type variable "
"\"%s\" must have an lvalue type.", sym->name.c_str());
continue;
}
if (initLVType->IsUniformType() == false) {
Error(initExpr->pos, "Initializer for reference-type variable "
"\"%s\" must have a uniform lvalue type.", sym->name.c_str());
continue;
}
}
}
LLVM_TYPE_CONST llvm::Type *llvmType = sym->type->LLVMType(g->ctx);
llvm::Type *llvmType = sym->type->LLVMType(g->ctx);
if (llvmType == NULL) {
Assert(m->errorCount > 0);
AssertPos(pos, m->errorCount > 0);
return;
}
@@ -282,8 +297,8 @@ DeclStmt::TypeCheck() {
// the int->float type conversion is in there and we don't return
// an int as the constValue later...
const Type *type = vars[i].sym->type;
if (dynamic_cast<const AtomicType *>(type) != NULL ||
dynamic_cast<const EnumType *>(type) != NULL) {
if (CastType<AtomicType>(type) != NULL ||
CastType<EnumType>(type) != NULL) {
// If it's an expr list with an atomic type, we'll later issue
// an error. Need to leave vars[i].init as is in that case so
// it is in fact caught later, though.
@@ -463,12 +478,12 @@ IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
// under varying control flow,, returns can't stop instruction
// emission, so this better be non-NULL...
Assert(ctx->GetCurrentBasicBlock());
AssertPos(ctx->GetDebugPos(), ctx->GetCurrentBasicBlock());
}
if (falseStmts) {
ctx->SetInternalMaskAndNot(oldMask, test);
lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
Assert(ctx->GetCurrentBasicBlock());
AssertPos(ctx->GetDebugPos(), ctx->GetCurrentBasicBlock());
}
}
@@ -549,7 +564,7 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
(costIsAcceptable || g->opt.disableCoherentControlFlow)) {
ctx->StartVaryingIf(oldMask);
emitMaskedTrueAndFalse(ctx, oldMask, ltest);
Assert(ctx->GetCurrentBasicBlock());
AssertPos(pos, ctx->GetCurrentBasicBlock());
ctx->EndIf();
}
else {
@@ -572,7 +587,7 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest,
// compiler see what's going on so that subsequent optimizations for
// code emitted here can operate with the knowledge that the mask is
// definitely all on (until it modifies the mask itself).
Assert(!g->opt.disableCoherentControlFlow);
AssertPos(pos, !g->opt.disableCoherentControlFlow);
if (!g->opt.disableMaskAllOnOptimizations)
ctx->SetInternalMask(LLVMMaskAllOn);
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
@@ -622,7 +637,7 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest,
emitMaskedTrueAndFalse(ctx, LLVMMaskAllOn, ltest);
// In this case, return/break/continue isn't allowed to jump and end
// emission.
Assert(ctx->GetCurrentBasicBlock());
AssertPos(pos, ctx->GetCurrentBasicBlock());
ctx->EndIf();
ctx->BranchInst(bDone);
@@ -651,7 +666,7 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
// Emit statements for true
ctx->SetCurrentBasicBlock(bRunTrue);
lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
Assert(ctx->GetCurrentBasicBlock());
AssertPos(pos, ctx->GetCurrentBasicBlock());
ctx->BranchInst(bNext);
ctx->SetCurrentBasicBlock(bNext);
}
@@ -668,7 +683,7 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
// Emit code for false
ctx->SetCurrentBasicBlock(bRunFalse);
lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
Assert(ctx->GetCurrentBasicBlock());
AssertPos(pos, ctx->GetCurrentBasicBlock());
ctx->BranchInst(bNext);
ctx->SetCurrentBasicBlock(bNext);
}
@@ -822,7 +837,7 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const {
ctx->SetFunctionMask(LLVMMaskAllOn);
if (bodyStmts)
bodyStmts->EmitCode(ctx);
Assert(ctx->GetCurrentBasicBlock());
AssertPos(pos, ctx->GetCurrentBasicBlock());
ctx->SetFunctionMask(oldFunctionMask);
ctx->BranchInst(btest);
@@ -830,7 +845,7 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const {
ctx->SetCurrentBasicBlock(bMixed);
if (bodyStmts)
bodyStmts->EmitCode(ctx);
Assert(ctx->GetCurrentBasicBlock());
AssertPos(pos, ctx->GetCurrentBasicBlock());
ctx->BranchInst(btest);
}
else {
@@ -971,7 +986,7 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
// it and then jump into the loop test code. (Also start a new scope
// since the initiailizer may be a declaration statement).
if (init) {
Assert(dynamic_cast<StmtList *>(init) == NULL);
AssertPos(pos, dynamic_cast<StmtList *>(init) == NULL);
ctx->StartScope();
init->EmitCode(ctx);
}
@@ -1000,7 +1015,7 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
if (doCoherentCheck)
Warning(test->pos, "Uniform condition supplied to cfor/cwhile "
"statement.");
Assert(ltest->getType() == LLVMTypes::BoolType);
AssertPos(pos, ltest->getType() == LLVMTypes::BoolType);
ctx->BranchInst(bloop, bexit, ltest);
}
else {
@@ -1036,7 +1051,7 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
ctx->SetFunctionMask(LLVMMaskAllOn);
if (stmts)
stmts->EmitCode(ctx);
Assert(ctx->GetCurrentBasicBlock());
AssertPos(pos, ctx->GetCurrentBasicBlock());
ctx->SetFunctionMask(oldFunctionMask);
ctx->BranchInst(bstep);
@@ -1349,8 +1364,8 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
ctx->SetFunctionMask(LLVMMaskAllOn);
// This should be caught during typechecking
Assert(startExprs.size() == dimVariables.size() &&
endExprs.size() == dimVariables.size());
AssertPos(pos, startExprs.size() == dimVariables.size() &&
endExprs.size() == dimVariables.size());
int nDims = (int)dimVariables.size();
///////////////////////////////////////////////////////////////////////
@@ -1689,7 +1704,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
ctx->SetContinueTarget(bbFullBodyContinue);
ctx->AddInstrumentationPoint("foreach loop body (all on)");
stmts->EmitCode(ctx);
Assert(ctx->GetCurrentBasicBlock() != NULL);
AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
ctx->BranchInst(bbFullBodyContinue);
}
ctx->SetCurrentBasicBlock(bbFullBodyContinue); {
@@ -2079,7 +2094,7 @@ SwitchStmt::EmitCode(FunctionEmitContext *ctx) const {
const Type *type;
if (expr == NULL || ((type = expr->GetType()) == NULL)) {
Assert(m->errorCount > 0);
AssertPos(pos, m->errorCount > 0);
return;
}
@@ -2097,7 +2112,7 @@ SwitchStmt::EmitCode(FunctionEmitContext *ctx) const {
llvm::Value *exprValue = expr->GetValue(ctx);
if (exprValue == NULL) {
Assert(m->errorCount > 0);
AssertPos(pos, m->errorCount > 0);
return;
}
@@ -2173,8 +2188,8 @@ SwitchStmt::EstimateCost() const {
///////////////////////////////////////////////////////////////////////////
// ReturnStmt
ReturnStmt::ReturnStmt(Expr *v, bool cc, SourcePos p)
: Stmt(p), val(v),
ReturnStmt::ReturnStmt(Expr *e, bool cc, SourcePos p)
: Stmt(p), expr(e),
doCoherenceCheck(cc && !g->opt.disableCoherentControlFlow) {
}
@@ -2189,8 +2204,29 @@ ReturnStmt::EmitCode(FunctionEmitContext *ctx) const {
return;
}
// Make sure we're not trying to return a reference to something where
// that doesn't make sense
const Function *func = ctx->GetFunction();
const Type *returnType = func->GetReturnType();
if (IsReferenceType(returnType) == true &&
IsReferenceType(expr->GetType()) == false) {
const Type *lvType = expr->GetLValueType();
if (lvType == NULL) {
Error(expr->pos, "Illegal to return non-lvalue from function "
"returning reference type \"%s\".",
returnType->GetString().c_str());
return;
}
else if (lvType->IsUniformType() == false) {
Error(expr->pos, "Illegal to return varying lvalue type from "
"function returning a reference type \"%s\".",
returnType->GetString().c_str());
return;
}
}
ctx->SetDebugPos(pos);
ctx->CurrentLanesReturned(val, doCoherenceCheck);
ctx->CurrentLanesReturned(expr, doCoherenceCheck);
}
@@ -2210,7 +2246,8 @@ void
ReturnStmt::Print(int indent) const {
printf("%*c%sReturn Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : "");
pos.Print();
if (val) val->Print();
if (expr)
expr->Print();
else printf("(void)");
printf("\n");
}
@@ -2228,6 +2265,9 @@ GotoStmt::GotoStmt(const char *l, SourcePos gotoPos, SourcePos ip)
void
GotoStmt::EmitCode(FunctionEmitContext *ctx) const {
if (!ctx->GetCurrentBasicBlock())
return;
if (ctx->VaryingCFDepth() > 0) {
Error(pos, "\"goto\" statements are only legal under \"uniform\" "
"control flow.");
@@ -2241,10 +2281,22 @@ GotoStmt::EmitCode(FunctionEmitContext *ctx) const {
llvm::BasicBlock *bb = ctx->GetLabeledBasicBlock(label);
if (bb == NULL) {
// TODO: use the string distance stuff to suggest alternatives if
// there are some with names close to the label name we have here..
Error(identifierPos, "No label named \"%s\" found in current function.",
label.c_str());
/* Label wasn't found. Look for suggestions that are close */
std::vector<std::string> labels = ctx->GetLabels();
std::vector<std::string> matches = MatchStrings(label, labels);
std::string match_output;
if (! matches.empty()) {
/* Print up to 5 matches. Don't want to spew too much */
match_output += "\nDid you mean:";
for (unsigned int i=0; i<matches.size() && i<5; i++)
match_output += "\n " + matches[i] + "?";
}
/* Label wasn't found. Emit an error */
Error(identifierPos,
"No label named \"%s\" found in current function.%s",
label.c_str(), match_output.c_str());
return;
}
@@ -2290,7 +2342,7 @@ LabeledStmt::LabeledStmt(const char *n, Stmt *s, SourcePos p)
void
LabeledStmt::EmitCode(FunctionEmitContext *ctx) const {
llvm::BasicBlock *bblock = ctx->GetLabeledBasicBlock(name);
Assert(bblock != NULL);
AssertPos(pos, bblock != NULL);
// End the current basic block with a jump to our basic block and then
// set things up for emission to continue there. Note that the current
@@ -2409,7 +2461,7 @@ lEncodeType(const Type *t) {
if (Type::Equal(t, AtomicType::VaryingUInt64)) return 'V';
if (Type::Equal(t, AtomicType::UniformDouble)) return 'd';
if (Type::Equal(t, AtomicType::VaryingDouble)) return 'D';
if (dynamic_cast<const PointerType *>(t) != NULL) {
if (CastType<PointerType>(t) != NULL) {
if (t->IsUniformType())
return 'p';
else
@@ -2429,7 +2481,7 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
if (type == NULL)
return NULL;
if (dynamic_cast<const ReferenceType *>(type) != NULL) {
if (CastType<ReferenceType>(type) != NULL) {
expr = new RefDerefExpr(expr, expr->pos);
type = expr->GetType();
if (type == NULL)
@@ -2457,7 +2509,7 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
else {
argTypes.push_back(t);
LLVM_TYPE_CONST llvm::Type *llvmExprType = type->LLVMType(g->ctx);
llvm::Type *llvmExprType = type->LLVMType(g->ctx);
llvm::Value *ptr = ctx->AllocaInst(llvmExprType, "print_arg");
llvm::Value *val = expr->GetValue(ctx);
if (!val)
@@ -2478,6 +2530,9 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
*/
void
PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
if (!ctx->GetCurrentBasicBlock())
return;
ctx->SetDebugPos(pos);
// __do_print takes 5 arguments; we'll get them stored in the args[] array
@@ -2494,7 +2549,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
std::string argTypes;
if (values == NULL) {
LLVM_TYPE_CONST llvm::Type *ptrPtrType =
llvm::Type *ptrPtrType =
llvm::PointerType::get(LLVMTypes::VoidPointerType, 0);
args[4] = llvm::Constant::getNullValue(ptrPtrType);
}
@@ -2506,7 +2561,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
int nArgs = elist ? elist->exprs.size() : 1;
// Allocate space for the array of pointers to values to be printed
LLVM_TYPE_CONST llvm::Type *argPtrArrayType =
llvm::Type *argPtrArrayType =
llvm::ArrayType::get(LLVMTypes::VoidPointerType, nArgs);
llvm::Value *argPtrArray = ctx->AllocaInst(argPtrArrayType,
"print_arg_ptrs");
@@ -2542,7 +2597,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
// Now we can emit code to call __do_print()
llvm::Function *printFunc = m->module->getFunction("__do_print");
Assert(printFunc);
AssertPos(pos, printFunc);
llvm::Value *mask = ctx->GetFullMask();
// Set up the rest of the parameters to it
@@ -2583,6 +2638,9 @@ AssertStmt::AssertStmt(const std::string &msg, Expr *e, SourcePos p)
void
AssertStmt::EmitCode(FunctionEmitContext *ctx) const {
if (!ctx->GetCurrentBasicBlock())
return;
if (expr == NULL)
return;
const Type *type = expr->GetType();
@@ -2595,7 +2653,7 @@ AssertStmt::EmitCode(FunctionEmitContext *ctx) const {
llvm::Function *assertFunc =
isUniform ? m->module->getFunction("__do_assert_uniform") :
m->module->getFunction("__do_assert_varying");
Assert(assertFunc != NULL);
AssertPos(pos, assertFunc != NULL);
char *errorString;
if (asprintf(&errorString, "%s:%d:%d: Assertion failed: %s\n",
@@ -2658,20 +2716,23 @@ DeleteStmt::DeleteStmt(Expr *e, SourcePos p)
void
DeleteStmt::EmitCode(FunctionEmitContext *ctx) const {
if (!ctx->GetCurrentBasicBlock())
return;
const Type *exprType;
if (expr == NULL || ((exprType = expr->GetType()) == NULL)) {
Assert(m->errorCount > 0);
AssertPos(pos, m->errorCount > 0);
return;
}
llvm::Value *exprValue = expr->GetValue(ctx);
if (exprValue == NULL) {
Assert(m->errorCount > 0);
AssertPos(pos, m->errorCount > 0);
return;
}
// Typechecking should catch this
Assert(dynamic_cast<const PointerType *>(exprType) != NULL);
AssertPos(pos, CastType<PointerType>(exprType) != NULL);
if (exprType->IsUniformType()) {
// For deletion of a uniform pointer, we just need to cast the
@@ -2680,7 +2741,7 @@ DeleteStmt::EmitCode(FunctionEmitContext *ctx) const {
exprValue = ctx->BitCastInst(exprValue, LLVMTypes::VoidPointerType,
"ptr_to_void");
llvm::Function *func = m->module->getFunction("__delete_uniform");
Assert(func != NULL);
AssertPos(pos, func != NULL);
ctx->CallInst(func, NULL, exprValue, "");
}
@@ -2690,7 +2751,7 @@ DeleteStmt::EmitCode(FunctionEmitContext *ctx) const {
// only need to extend to 64-bit values on 32-bit targets before
// calling it.
llvm::Function *func = m->module->getFunction("__delete_varying");
Assert(func != NULL);
AssertPos(pos, func != NULL);
if (g->target.is32Bit)
exprValue = ctx->ZExtInst(exprValue, LLVMTypes::Int64VectorType,
"ptr_to_64");
@@ -2711,7 +2772,7 @@ DeleteStmt::TypeCheck() {
if (expr == NULL || ((exprType = expr->GetType()) == NULL))
return NULL;
if (dynamic_cast<const PointerType *>(exprType) == NULL) {
if (CastType<PointerType>(exprType) == NULL) {
Error(pos, "Illegal to delete non-pointer type \"%s\".",
exprType->GetString().c_str());
return NULL;
@@ -2743,7 +2804,7 @@ DeleteStmt::EstimateCost() const {
Stmt *
CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) {
if (iterSym == NULL) {
Assert(m->errorCount > 0);
AssertPos(pos, m->errorCount > 0);
return NULL;
}
@@ -2770,11 +2831,11 @@ CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) {
// First, call __movmsk(__mask)) to get the mask as a set of bits.
// This should be hoisted out of the loop
Symbol *maskSym = m->symbolTable->LookupVariable("__mask");
Assert(maskSym != NULL);
AssertPos(pos, maskSym != NULL);
Expr *maskVecExpr = new SymbolExpr(maskSym, pos);
std::vector<Symbol *> mmFuns;
m->symbolTable->LookupFunction("__movmsk", &mmFuns);
Assert(mmFuns.size() == 2);
AssertPos(pos, mmFuns.size() == (g->target.maskBitCount == 32 ? 2 : 1));
FunctionSymbolExpr *movmskFunc = new FunctionSymbolExpr("__movmsk", mmFuns,
pos);
ExprList *movmskArgs = new ExprList(maskVecExpr, pos);
@@ -2782,7 +2843,7 @@ CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) {
pos);
// Compute the per lane mask to test the mask bits against: (1 << iter)
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, 1,
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt64, int64_t(1),
iterSym->pos);
Expr *shiftLaneExpr = new BinaryExpr(BinaryExpr::Shl, oneExpr, symExpr,
pos);
@@ -2802,4 +2863,3 @@ CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) {
// And return a for loop that wires it all together.
return new ForStmt(initStmt, testExpr, stepStmt, laneCheckIf, false, pos);
}

6
stmt.h
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -265,7 +265,7 @@ public:
statement in the program. */
class ReturnStmt : public Stmt {
public:
ReturnStmt(Expr *v, bool cc, SourcePos p);
ReturnStmt(Expr *e, bool cc, SourcePos p);
void EmitCode(FunctionEmitContext *ctx) const;
void Print(int indent) const;
@@ -273,7 +273,7 @@ public:
Stmt *TypeCheck();
int EstimateCost() const;
Expr *val;
Expr *expr;
/** This indicates whether the generated code will check to see if no
more program instances are currently running after the return, in
which case the code can possibly jump to the end of the current

116
sym.cpp
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -56,12 +56,6 @@ Symbol::Symbol(const std::string &n, SourcePos p, const Type *t,
}
std::string
Symbol::MangledName() const {
return name + type->Mangle();
}
///////////////////////////////////////////////////////////////////////////
// SymbolTable
@@ -72,27 +66,31 @@ SymbolTable::SymbolTable() {
SymbolTable::~SymbolTable() {
// Otherwise we have mismatched push/pop scopes
Assert(variables.size() == 1 && types.size() == 1);
Assert(variables.size() == 1);
PopScope();
}
void
SymbolTable::PushScope() {
variables.push_back(new SymbolMapType);
types.push_back(new TypeMapType);
SymbolMapType *sm;
if (freeSymbolMaps.size() > 0) {
sm = freeSymbolMaps.back();
freeSymbolMaps.pop_back();
sm->erase(sm->begin(), sm->end());
}
else
sm = new SymbolMapType;
variables.push_back(sm);
}
void
SymbolTable::PopScope() {
Assert(variables.size() > 1);
delete variables.back();
freeSymbolMaps.push_back(variables.back());
variables.pop_back();
Assert(types.size() > 1);
delete types.back();
types.pop_back();
}
@@ -147,7 +145,7 @@ SymbolTable::LookupVariable(const char *name) {
bool
SymbolTable::AddFunction(Symbol *symbol) {
const FunctionType *ft = dynamic_cast<const FunctionType *>(symbol->type);
const FunctionType *ft = CastType<FunctionType>(symbol->type);
Assert(ft != NULL);
if (LookupFunction(symbol->name.c_str(), ft) != NULL)
// A function of the same name and type has already been added to
@@ -192,26 +190,17 @@ SymbolTable::LookupFunction(const char *name, const FunctionType *type) {
bool
SymbolTable::AddType(const char *name, const Type *type, SourcePos pos) {
// Like AddVariable(), we go backwards through the type maps, working
// from innermost scope to outermost.
for (int i = types.size()-1; i >= 0; --i) {
TypeMapType &sm = *(types[i]);
if (sm.find(name) != sm.end()) {
if (i == (int)types.size() - 1) {
Error(pos, "Ignoring redefinition of type \"%s\".", name);
return false;
}
else {
Warning(pos, "Type \"%s\" shadows type declared in outer scope.", name);
TypeMapType &sm = *(types.back());
sm[name] = type;
return true;
}
}
const Type *t = LookupType(name);
if (t != NULL && CastType<UndefinedStructType>(t) == NULL) {
// If we have a previous declaration of anything other than an
// UndefinedStructType with this struct name, issue an error. If
// we have an UndefinedStructType, then we'll fall through to the
// code below that adds the definition to the type map.
Error(pos, "Ignoring redefinition of type \"%s\".", name);
return false;
}
TypeMapType &sm = *(types.back());
sm[name] = type;
types[name] = type;
return true;
}
@@ -219,11 +208,9 @@ SymbolTable::AddType(const char *name, const Type *type, SourcePos pos) {
const Type *
SymbolTable::LookupType(const char *name) const {
// Again, search through the type maps backward to get scoping right.
for (int i = types.size()-1; i >= 0; --i) {
TypeMapType &sm = *(types[i]);
if (sm.find(name) != sm.end())
return sm[name];
}
TypeMapType::const_iterator iter = types.find(name);
if (iter != types.end())
return iter->second;
return NULL;
}
@@ -288,21 +275,19 @@ SymbolTable::closestTypeMatch(const char *str, bool structsVsEnums) const {
const int maxDelta = 2;
std::vector<std::string> matches[maxDelta+1];
for (unsigned int i = 0; i < types.size(); ++i) {
TypeMapType::const_iterator iter;
for (iter = types[i]->begin(); iter != types[i]->end(); ++iter) {
// Skip over either StructTypes or EnumTypes, depending on the
// value of the structsVsEnums parameter
bool isEnum = (dynamic_cast<const EnumType *>(iter->second) != NULL);
if (isEnum && structsVsEnums)
continue;
else if (!isEnum && !structsVsEnums)
continue;
TypeMapType::const_iterator iter;
for (iter = types.begin(); iter != types.end(); ++iter) {
// Skip over either StructTypes or EnumTypes, depending on the
// value of the structsVsEnums parameter
bool isEnum = (CastType<EnumType>(iter->second) != NULL);
if (isEnum && structsVsEnums)
continue;
else if (!isEnum && !structsVsEnums)
continue;
int dist = StringEditDistance(str, iter->first, maxDelta+1);
if (dist <= maxDelta)
matches[dist].push_back(iter->first);
}
int dist = StringEditDistance(str, iter->first, maxDelta+1);
if (dist <= maxDelta)
matches[dist].push_back(iter->first);
}
for (int i = 0; i <= maxDelta; ++i) {
@@ -342,16 +327,12 @@ SymbolTable::Print() {
depth = 0;
fprintf(stderr, "Named types:\n---------------\n");
for (unsigned int i = 0; i < types.size(); ++i) {
TypeMapType &sm = *types[i];
TypeMapType::iterator siter = sm.begin();
while (siter != sm.end()) {
fprintf(stderr, "%*c", depth, ' ');
fprintf(stderr, "%s -> %s\n", siter->first.c_str(),
siter->second->GetString().c_str());
++siter;
}
depth += 4;
TypeMapType::iterator siter = types.begin();
while (siter != types.end()) {
fprintf(stderr, "%*c", depth, ' ');
fprintf(stderr, "%s -> %s\n", siter->first.c_str(),
siter->second->GetString().c_str());
++siter;
}
}
@@ -382,14 +363,11 @@ SymbolTable::RandomSymbol() {
const Type *
SymbolTable::RandomType() {
int v = ispcRand() % types.size();
if (types[v]->size() == 0)
return NULL;
int count = ispcRand() % types[v]->size();
TypeMapType::iterator iter = types[v]->begin();
int count = types.size();
TypeMapType::iterator iter = types.begin();
while (count-- > 0) {
++iter;
Assert(iter != types[v]->end());
Assert(iter != types.end());
}
return iter->second;
}

22
sym.h
View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -67,15 +67,8 @@ public:
Symbol(const std::string &name, SourcePos pos, const Type *t = NULL,
StorageClass sc = SC_NONE);
/** This method should only be called for function symbols; for them,
it returns a mangled version of the function name with the argument
types encoded into the returned name. This is used to generate
unique symbols in object files for overloaded functions.
*/
std::string MangledName() const;
SourcePos pos; /*!< Source file position where the symbol was defined */
const std::string name; /*!< Symbol's name */
std::string name; /*!< Symbol's name */
llvm::Value *storagePtr; /*!< For symbols with storage associated with
them (i.e. variables but not functions),
this member stores a pointer to its
@@ -208,6 +201,9 @@ public:
/** Adds the named type to the symbol table. This is used for both
struct definitions (where <tt>struct Foo</tt> causes type \c Foo to
be added to the symbol table) as well as for <tt>typedef</tt>s.
For structs with forward declarations ("struct Foo;") and are thus
UndefinedStructTypes, this method replaces these with an actual
struct definition if one is provided.
@param name Name of the type to be added
@param type Type that \c name represents
@@ -264,6 +260,8 @@ private:
typedef std::map<std::string, Symbol *> SymbolMapType;
std::vector<SymbolMapType *> variables;
std::vector<SymbolMapType *> freeSymbolMaps;
/** Function declarations are *not* scoped. (C99, for example, allows
an implementation to maintain function declarations in a single
namespace.) A STL \c vector is used to store the function symbols
@@ -272,12 +270,10 @@ private:
typedef std::map<std::string, std::vector<Symbol *> > FunctionMapType;
FunctionMapType functions;
/** Type definitions can also be scoped. A new \c TypeMapType
is added to the back of the \c types \c vector each time a new scope
is entered. (And it's removed when the scope exits).
/** Type definitions can't currently be scoped.
*/
typedef std::map<std::string, const Type *> TypeMapType;
std::vector<TypeMapType *> types;
TypeMapType types;
};

View File

@@ -102,15 +102,21 @@ void *ISPCAlloc(void **handle, int64_t size, int32_t alignment) {
int main(int argc, char *argv[]) {
int w = width();
assert(w <= 16);
assert(w <= 64);
float returned_result[16];
for (int i = 0; i < 16; ++i)
float returned_result[64];
float vfloat[64];
double vdouble[64];
int vint[64], vint2[64];
for (int i = 0; i < 64; ++i) {
returned_result[i] = -1e20;
float vfloat[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
double vdouble[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
int vint[16] = { 2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32 };
int vint2[16] = { 5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
vfloat[i] = i+1;
vdouble[i] = i+1;
vint[i] = 2*(i+1);
vint2[i] = i+5;
}
float b = 5.;
#if (TEST_SIG == 0)
@@ -131,8 +137,8 @@ int main(int argc, char *argv[]) {
#error "Unknown or unset TEST_SIG value"
#endif
float expected_result[16];
memset(expected_result, 0, 16*sizeof(float));
float expected_result[64];
memset(expected_result, 0, 64*sizeof(float));
result(expected_result);
int errors = 0;

View File

@@ -6,14 +6,14 @@ bool ok(float x, float ref) { return (abs(x - ref) < 1e-6) || abs((x-ref)/ref) <
export void f_v(uniform float RET[]) {
uniform float vals[8] = { 0, 1, 0.5, -1, -.87, -.25, 1e-3, -.99999999 };
uniform float r[8];
uniform float r[programCount];
foreach (i = 0 ... 8)
r[i] = cos(acos(vals[i]));
r[i] = cos(acos(vals[i % 8]));
int errors = 0;
for (uniform int i = 0; i < 8; ++i) {
if (ok(r[i], vals[i]) == false) {
print("error @ %: got %, expected %\n", i, r[i], vals[i]);
if (ok(r[i], vals[i%8]) == false) {
print("error @ %: got %, expected %\n", i, r[i], vals[i%8]);
++errors;
}
}

View File

@@ -3,7 +3,9 @@ export uniform int width() { return programCount; }
export void f_v(uniform float RET[]) {
#define width 3
#define maxProgramCount 16
#define maxProgramCount 64
assert(programCount <= maxProgramCount);
//CO const uniform int width = 3;
//CO const uniform int maxProgramCount = 16;
uniform float a[width*maxProgramCount], r[width*maxProgramCount];

View File

@@ -3,7 +3,9 @@ export uniform int width() { return programCount; }
export void f_v(uniform float RET[]) {
#define width 4
#define maxProgramCount 16
#define maxProgramCount 64
assert(programCount <= maxProgramCount);
//CO const uniform int width = 4;
//CO const uniform int maxProgramCount = 16;
uniform float a[width*maxProgramCount], r[width*maxProgramCount];

View File

@@ -3,7 +3,9 @@ export uniform int width() { return programCount; }
export void f_v(uniform float RET[]) {
#define width 3
#define maxProgramCount 16
#define maxProgramCount 64
assert(programCount <= maxProgramCount);
//CO const uniform int width = 3;
//CO const uniform int maxProgramCount = 16;
uniform int a[width*maxProgramCount], r[width*maxProgramCount];

View File

@@ -3,7 +3,9 @@ export uniform int width() { return programCount; }
export void f_v(uniform float RET[]) {
#define width 4
#define maxProgramCount 16
#define maxProgramCount 64
assert(programCount <= maxProgramCount);
//CO const uniform int width = 4;
//CO const uniform int maxProgramCount = 16;
uniform int a[width*maxProgramCount], r[width*maxProgramCount];

View File

@@ -5,9 +5,9 @@ export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
uniform float x[45];
uniform float x[programCount+15];
uniform int i;
for (i = 0; i < 45; ++i)
for (i = 0; i < programCount+15; ++i)
x[i] = i;
float ret;

View File

@@ -10,7 +10,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
for (uniform int i = 0; i < 29+b; ++i)
for (uniform int j = 0; j < 29+b; ++j)
x[i][j] = 0;
x[a][a] = a;
if (a < 34)
x[a][a] = a;
RET[programIndex] = x[4][4] + x[1][1] + x[b][b] + x[0][0];
}

View File

@@ -12,8 +12,10 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
}
export void result(uniform float RET[]) {
RET[0] = 1; RET[4] = 5; RET[8] = 9; RET[12] = 13;
RET[1] = RET[5] = RET[9] = RET[13] = 0;
RET[2] = 6; RET[6] = 14; RET[10] = 22; RET[14] = 30;
RET[3] = RET[7] = RET[11] = RET[15] = 3;
for (uniform int i = 0; i < programCount; i += 4) {
RET[i] = i+1;
RET[i+1] = 0;
RET[i+2] = 2 * (i+3);
RET[i+3] = 3;
}
}

View File

@@ -4,9 +4,9 @@ export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
uniform float x[45];
uniform float x[programCount+5];
uniform int i;
for (i = 0; i < 45; ++i)
for (i = 0; i < programCount+5; ++i)
x[i] = i+b;
a -= 1;
if (a == 3) a = 0;

View File

@@ -4,9 +4,9 @@ export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
uniform float x[45];
uniform float x[programCount+5];
uniform int i;
for (i = 0; i < 45; ++i)
for (i = 0; i < programCount+5; ++i)
x[i] = i+b;
RET[programIndex] = x[a];
}

View File

@@ -4,14 +4,14 @@ export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
float x[55];
float x[programCount+10];
uniform int i;
for (i = 0; i < 45; ++i)
for (i = 0; i < programCount+10; ++i)
x[i] = a+b;
RET[programIndex] = x[a];
}
export void result(uniform float RET[]) {
RET[programIndex] = 6 + programIndex;;
RET[programIndex] = 6 + programIndex;
}

View File

@@ -15,6 +15,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
x[a][b-1] = 0;
else
x[a][b-1] = 1;
a = min(a, 46);
RET[programIndex] = x[3][a];
}

View File

@@ -4,9 +4,10 @@ export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
uniform float x[47][47];
for (uniform int i = 0; i < 47; ++i)
for (uniform int j = 0; j < 47; ++j)
assert(programCount <= 64);
uniform float x[70][70];
for (uniform int i = 0; i < 70; ++i)
for (uniform int j = 0; j < 70; ++j)
x[i][j] = 2+b-5;
// all are 2 except (4,2) = 0, (4,...) = 1, (4,programCount-1)=2

View File

@@ -10,6 +10,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
for (uniform int j = 0; j < 47; ++j)
x[i][j] = 2+b-5;
a = min(a,46);
x[a][b-1] = 0;
RET[programIndex] = x[2][a];
}

View File

@@ -11,7 +11,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
uniform int index[4] = { 0, 1, 2, 4 };
float v = index[programIndex & 0x3];
x[a][v] = 0;
x[min(a,39)][v] = 0;
RET[programIndex] = x[v+1][v];
}

View File

@@ -4,7 +4,7 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float a[programCount+4];
for (unsigned int i = 0; i < programCount+4; ++i)
for (uniform int i = 0; i < programCount+4; ++i)
a[i] = aFOO[min((int)i, programCount)];
RET[programIndex] = *(a + 2);

View File

@@ -4,9 +4,8 @@ export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
uniform float x[100];
// HACK to avoid @llvm.memset...
for (uniform int i = 0; i < b*20; ++i)
uniform float x[2*programCount];
for (uniform int i = 0; i < 2*programCount; ++i)
x[i] = 0;
x[2*(a-1)] = b;

View File

@@ -4,9 +4,8 @@ export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
uniform float x[100];
// HACK to avoid @llvm.memset...
for (uniform int i = 0; i < b*20; ++i)
uniform float x[2*programCount];
for (uniform int i = 0; i < 2*programCount; ++i)
x[i] = 0;
x[2*(a-1)] = b;

View File

@@ -5,8 +5,8 @@ export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
uniform float x[40];
for (uniform int i = 0; i < 40; ++i)
uniform float x[programCount+5];
for (uniform int i = 0; i < programCount+5; ++i)
x[i] = 0.;
x[a] = 2;
RET[programIndex] = x[4] + x[0] + x[5];

View File

@@ -4,9 +4,8 @@ export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
float x[30];
// HACK to avoid @llvm.memset...
for (uniform int i = 0; i < b*6; ++i)
float x[2*programCount];
for (uniform int i = 0; i < 2*programCount; ++i)
x[i] = 0;
x[a] = a;
RET[programIndex] = x[4] + x[0] + x[5];

View File

@@ -4,14 +4,14 @@ export uniform int width() { return programCount; }
struct Foo {
uniform float x[17];
uniform float x[programCount+1];
};
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
float a = aFOO[programIndex];
uniform Foo foo;
uniform int i;
for (i = 0; i < 17; ++i)
for (i = 0; i < programCount+1; ++i)
foo.x[i] = i;
if ((int)a & 1)

View File

@@ -8,12 +8,12 @@ export void f_v(uniform float RET[]) {
uniform float vals[8] = { 0, 1, 0.5, -1, -.87, -.25, 1e-3, -.99999999 };
uniform float r[8];
foreach (i = 0 ... 8)
r[i] = sin(asin(vals[i]));
r[i] = sin(asin(vals[i%8]));
int errors = 0;
for (uniform int i = 0; i < 8; ++i) {
if (ok(r[i], vals[i]) == false) {
print("error @ %: got %, expected %\n", i, r[i], vals[i]);
if (ok(r[i], vals[i%8]) == false) {
print("error @ %: got %, expected %\n", i, r[i], vals[i%8]);
++errors;
}
}

View File

@@ -6,14 +6,14 @@ uniform unsigned int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = 0;
if (programIndex & 1)
if (programIndex < 30 && programIndex & 1)
b = atomic_or_global(&s, (1 << programIndex));
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
uniform int sum = 0;
for (uniform int i = 0; i < programCount; ++i)
for (uniform int i = 0; i < min(30, programCount); ++i)
if (i & 1)
sum += (1 << i);
RET[programIndex] = sum;

View File

@@ -5,12 +5,12 @@ uniform unsigned int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = 0;
if (programIndex & 1)
int32 b = 0;
if (programIndex < 32 && programIndex & 1)
b = atomic_or_global(&s, (1 << programIndex));
RET[programIndex] = popcnt(reduce_max((int32)b));
}
export void result(uniform float RET[]) {
RET[programIndex] = programCount == 1 ? 0 : ((programCount/2) - 1);
RET[programIndex] = programCount == 1 ? 0 : ((min(32, programCount)/2) - 1);
}

View File

@@ -7,14 +7,14 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = 0;
if (programIndex & 1)
b = atomic_or_global(&s, (1 << programIndex));
b = atomic_or_global(&s, (1ull << programIndex));
RET[programIndex] = (s>>20);
}
export void result(uniform float RET[]) {
uniform int sum = 0;
uniform int64 sum = 0;
for (uniform int i = 0; i < programCount; ++i)
if (i & 1)
sum += (1 << i);
sum += (1ull << i);
RET[programIndex] = ((unsigned int64)(0xffffffffff000000 | sum)) >> 20;
}

View File

@@ -5,10 +5,10 @@ uniform int32 s = 0;
export void f_f(uniform float RET[], uniform float aFOO[]) {
float a = aFOO[programIndex];
float b = atomic_or_global(&s, (1<<programIndex));
float b = atomic_or_global(&s, (1<<min(programIndex,30)));
RET[programIndex] = s;
}
export void result(uniform float RET[]) {
RET[programIndex] = (1<<programCount)-1;
RET[programIndex] = (1<<min(programCount,31))-1;
}

View File

@@ -5,7 +5,8 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
uniform float * uniform buf = uniform new uniform float[32*32];
for (uniform int i = 0; i < 32*32; ++i)
buf[i] = i;
assert(programIndex <= 64);
RET[programIndex] = buf[64-programIndex];
}

View File

@@ -3,10 +3,10 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
int32 i = (1 << programIndex);
int32 i = (1 << (programIndex % 28));
RET[programIndex] = count_leading_zeros(i);
}
export void result(uniform float RET[]) {
RET[programIndex] = 31-programIndex;
RET[programIndex] = 31-(programIndex%28);
}

View File

@@ -3,10 +3,10 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
unsigned int64 i = ((unsigned int64)1 << (40+programIndex));
unsigned int64 i = ((unsigned int64)1 << min(63, 40+programIndex));
RET[programIndex] = count_trailing_zeros(i);
}
export void result(uniform float RET[]) {
RET[programIndex] = 40+programIndex;
RET[programIndex] = min(63, 40+programIndex);
}

View File

@@ -5,8 +5,17 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
RET[programIndex] = exclusive_scan_add(programIndex);
}
export void result(uniform float RET[]) {
uniform int result[] = { 0, 0, 1, 3, 6, 10, 15, 21, 28,
36, 45, 55, 66, 78, 91, 105, 120 };
RET[programIndex] = result[programIndex];
int es(int v) {
uniform int vv[programCount];
vv[programIndex] = v;
uniform int r[programCount];
r[0] = 0;
for (uniform int i = 1; i < programCount; ++i)
r[i] = r[i-1] + vv[i-1];
return r[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = es(programIndex);
}

View File

@@ -10,11 +10,19 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
}
int es(int v) {
uniform int vv[programCount];
vv[programIndex] = v;
uniform int r[programCount];
r[0] = 0;
for (uniform int i = 1; i < programCount; ++i)
r[i] = r[i-1] + vv[i-1];
return r[programIndex];
}
export void result(uniform float RET[]) {
uniform int result[16] = { 0, 0, 0, 2, 0, 6, 0, 12,
0, 20, 0, 30, 0, 42, 0, 56 };
if (programIndex & 1)
RET[programIndex] = result[programIndex];
else
RET[programIndex] = es((programIndex & 1) ? (programIndex+1) : 0);
if ((programIndex & 1) == 0)
RET[programIndex] = -1;
}

View File

@@ -5,8 +5,17 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
RET[programIndex] = exclusive_scan_add(aFOO[programIndex]);
}
export void result(uniform float RET[]) {
uniform int result[] = { 0, 1, 3, 6, 10, 15, 21, 28,
36, 45, 55, 66, 78, 91, 105, 120, 136 };
RET[programIndex] = result[programIndex];
int es(int v) {
uniform int vv[programCount];
vv[programIndex] = v;
uniform int r[programCount];
r[0] = 0;
for (uniform int i = 1; i < programCount; ++i)
r[i] = r[i-1] + vv[i-1];
return r[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = es(programIndex+1);
}

View File

@@ -9,8 +9,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
}
export void result(uniform float RET[]) {
uniform int result[] = { 0, 1, 3, 0, 0, 0, 0, 0,
0, 0, 0, 0, 0, 0, 0, 0 };
uniform int result[] = { 0, 1, 3 };
RET[programIndex] = -1;
if (programIndex <= 1)
RET[programIndex] = result[programIndex];

View File

@@ -9,12 +9,20 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
}
}
int es(int v) {
uniform int vv[programCount];
vv[programIndex] = v;
uniform int r[programCount];
r[0] = 0;
for (uniform int i = 1; i < programCount; ++i)
r[i] = r[i-1] + vv[i-1];
return r[programIndex];
}
export void result(uniform float RET[]) {
uniform int result[16] = { 0, 0, 0, 2, 0, 6, 0, 12,
0, 20, 0, 30, 0, 42, 0, 56 };
if (programIndex & 1)
RET[programIndex] = result[programIndex];
else
RET[programIndex] = es((programIndex & 1) ? (programIndex+1) : 0);
if ((programIndex & 1) == 0)
RET[programIndex] = -1;
}

View File

@@ -5,8 +5,17 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
RET[programIndex] = exclusive_scan_add((float)programIndex);
}
export void result(uniform float RET[]) {
uniform int result[] = { 0, 0, 1, 3, 6, 10, 15, 21, 28,
36, 45, 55, 66, 78, 91, 105, 120 };
RET[programIndex] = result[programIndex];
int es(int v) {
uniform int vv[programCount];
vv[programIndex] = v;
uniform int r[programCount];
r[0] = 0;
for (uniform int i = 1; i < programCount; ++i)
r[i] = r[i-1] + vv[i-1];
return r[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = es(programIndex);
}

View File

@@ -5,8 +5,17 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
RET[programIndex] = exclusive_scan_add((double)aFOO[programIndex]);
}
export void result(uniform float RET[]) {
uniform int result[] = { 0, 1, 3, 6, 10, 15, 21, 28,
36, 45, 55, 66, 78, 91, 105, 120, 136 };
RET[programIndex] = result[programIndex];
int es(int v) {
uniform int vv[programCount];
vv[programIndex] = v;
uniform int r[programCount];
r[0] = 0;
for (uniform int i = 1; i < programCount; ++i)
r[i] = r[i-1] + vv[i-1];
return r[programIndex];
}
export void result(uniform float RET[]) {
RET[programIndex] = es(programIndex+1);
}

View File

@@ -4,7 +4,7 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
RET[programIndex] = -1;
int32 a = ~(1 << programIndex);
if ((programIndex & 1) == 0) {
if ((programIndex < 32) && (programIndex & 1) == 0) {
RET[programIndex] = exclusive_scan_and(a);
}
}
@@ -12,7 +12,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
export void result(uniform float RET[]) {
RET[programIndex] = -1;
if ((programIndex & 1) == 0 && programIndex > 0) {
if ((programIndex & 1) == 0 && programIndex > 0 && programIndex < 32) {
int val = 0xffffffff;
for (int i = 0; i < programIndex-1; i += 2)
val &= ~(1<<i);

View File

@@ -3,11 +3,11 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
RET[programIndex] = -1;
int32 a = (1 << programIndex);
int32 a = (1 << (min(programIndex, 30)));
RET[programIndex] = exclusive_scan_or(a);
}
export void result(uniform float RET[]) {
RET[programIndex] = (1 << programIndex) - 1;
RET[programIndex] = (1 << (min(programIndex, 31))) - 1;
}

View File

@@ -10,8 +10,10 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
// make sure we reset the func mask in the foreach loop...
if ((int)aFOO[programIndex] & 1)
foreach (i = 0 ... programCount+3)
val[i] += aFOO[i] - 1;
foreach (i = 0 ... programCount+3) {
int ic = min(i, programCount-1);
val[i] += aFOO[ic] - 1 + i-ic;
}
RET[programIndex] = val[3+programIndex];
}

View File

@@ -5,8 +5,10 @@ export uniform int width() { return programCount; }
// make sure we reset the func mask in the foreach loop...
void update(uniform float val[], const uniform float a[]) {
foreach (i = 0 ... programCount+3)
val[i] += a[i] - 1;
foreach (i = 0 ... programCount+3) {
int ic = min(i, programCount-1);
val[i] += a[ic] - 1 + i-ic;
}
}
export void f_f(uniform float RET[], uniform float aFOO[]) {

View File

@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
export void f_f(uniform float RET[], uniform float aFOO[]) {
double a = (1<<programIndex) * 1.5;
double a = (1<< (programIndex % 28)) * 1.5;
if (programIndex & 1)
a = -a;
int exponent;
@@ -12,5 +12,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
}
export void result(uniform float RET[]) {
RET[programIndex] = 1+programIndex;
RET[programIndex] = 1+(programIndex % 28);
}

Some files were not shown because too many files have changed in this diff Show More