Merged Upstream
This commit is contained in:
34
Makefile
34
Makefile
@@ -2,6 +2,15 @@
|
||||
# ispc Makefile
|
||||
#
|
||||
|
||||
# If you have your own special version of llvm and/or clang, change
|
||||
# these variables to match.
|
||||
LLVM_CONFIG=$(shell which llvm-config)
|
||||
CLANG_INCLUDE=$(shell $(LLVM_CONFIG) --includedir)
|
||||
|
||||
# Add llvm bin to the path so any scripts run will go to the right llvm-config
|
||||
LLVM_BIN= $(shell $(LLVM_CONFIG) --bindir)
|
||||
export PATH:=$(LLVM_BIN):$(PATH)
|
||||
|
||||
ARCH_OS = $(shell uname)
|
||||
ifeq ($(ARCH_OS), Darwin)
|
||||
ARCH_OS2 = "OSX"
|
||||
@@ -10,10 +19,12 @@ else
|
||||
endif
|
||||
ARCH_TYPE = $(shell arch)
|
||||
|
||||
ifeq ($(shell llvm-config --version), 3.1svn)
|
||||
ifeq ($(shell $(LLVM_CONFIG) --version), 3.0)
|
||||
LLVM_LIBS=$(shell $(LLVM_CONFIG) --libs)
|
||||
else
|
||||
LLVM_LIBS=-lLLVMAsmParser -lLLVMInstrumentation -lLLVMLinker \
|
||||
-lLLVMArchive -lLLVMBitReader -lLLVMDebugInfo -lLLVMJIT -lLLVMipo \
|
||||
-lLLVMBitWriter -lLLVMTableGen -lLLVMCBackendInfo \
|
||||
-lLLVMBitWriter -lLLVMTableGen \
|
||||
-lLLVMX86Disassembler -lLLVMX86CodeGen -lLLVMSelectionDAG \
|
||||
-lLLVMAsmPrinter -lLLVMX86AsmParser -lLLVMX86Desc -lLLVMX86Info \
|
||||
-lLLVMX86AsmPrinter -lLLVMX86Utils -lLLVMMCDisassembler -lLLVMMCParser \
|
||||
@@ -21,19 +32,17 @@ ifeq ($(shell llvm-config --version), 3.1svn)
|
||||
-lLLVMipa -lLLVMAnalysis -lLLVMMCJIT -lLLVMRuntimeDyld \
|
||||
-lLLVMExecutionEngine -lLLVMTarget -lLLVMMC -lLLVMObject -lLLVMCore \
|
||||
-lLLVMSupport
|
||||
else
|
||||
LLVM_LIBS=$(shell llvm-config --libs)
|
||||
endif
|
||||
|
||||
CLANG=clang
|
||||
CLANG_LIBS = -lclangFrontend -lclangDriver \
|
||||
-lclangSerialization -lclangParse -lclangSema \
|
||||
-lclangAnalysis -lclangAST -lclangLex -lclangBasic
|
||||
ifeq ($(shell llvm-config --version), 3.1svn)
|
||||
ifneq ($(shell $(LLVM_CONFIG) --version), 3.0)
|
||||
CLANG_LIBS += -lclangEdit
|
||||
endif
|
||||
|
||||
ISPC_LIBS=$(shell llvm-config --ldflags) $(CLANG_LIBS) $(LLVM_LIBS) \
|
||||
ISPC_LIBS=$(shell $(LLVM_CONFIG) --ldflags) $(CLANG_LIBS) $(LLVM_LIBS) \
|
||||
-lpthread
|
||||
|
||||
ifeq ($(ARCH_OS),Linux)
|
||||
@@ -44,8 +53,8 @@ ifeq ($(ARCH_OS2),Msys)
|
||||
ISPC_LIBS += -lshlwapi -limagehlp -lpsapi
|
||||
endif
|
||||
|
||||
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
|
||||
LLVM_VERSION=LLVM_$(shell llvm-config --version | sed s/\\./_/)
|
||||
LLVM_CXXFLAGS=$(shell $(LLVM_CONFIG) --cppflags)
|
||||
LLVM_VERSION=LLVM_$(shell $(LLVM_CONFIG) --version | sed -e s/\\./_/ -e s/svn//)
|
||||
LLVM_VERSION_DEF=-D$(LLVM_VERSION)
|
||||
|
||||
BUILD_DATE=$(shell date +%Y%m%d)
|
||||
@@ -53,8 +62,9 @@ BUILD_VERSION=$(shell git log --abbrev-commit --abbrev=16 | head -1)
|
||||
|
||||
CXX=g++
|
||||
CPP=cpp
|
||||
OPT=-g3
|
||||
CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -Wall $(LLVM_VERSION_DEF) \
|
||||
OPT=-O2
|
||||
CXXFLAGS=$(OPT) $(LLVM_CXXFLAGS) -I. -Iobjs/ -I$(CLANG_INCLUDE) \
|
||||
-Wall $(LLVM_VERSION_DEF) \
|
||||
-DBUILD_DATE="\"$(BUILD_DATE)\"" -DBUILD_VERSION="\"$(BUILD_VERSION)\""
|
||||
|
||||
LDFLAGS=
|
||||
@@ -75,7 +85,7 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
|
||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||
opt.h stmt.h sym.h type.h util.h
|
||||
TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
|
||||
generic-16 generic-1
|
||||
generic-16 generic-32 generic-64 generic-1
|
||||
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
|
||||
builtins/dispatch.ll
|
||||
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
|
||||
@@ -114,7 +124,7 @@ doxygen:
|
||||
|
||||
ispc: print_llvm_src dirs $(OBJS)
|
||||
@echo Creating ispc executable
|
||||
@$(CXX) $(LDFLAGS) -o $@ $(OBJS) $(ISPC_LIBS)
|
||||
@$(CXX) $(OPT) $(LDFLAGS) -o $@ $(OBJS) $(ISPC_LIBS)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
@echo Compiling $<
|
||||
|
||||
71
ast.cpp
71
ast.cpp
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
Copyright (c) 2011-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -32,8 +32,10 @@
|
||||
*/
|
||||
|
||||
/** @file ast.cpp
|
||||
@brief
|
||||
*/
|
||||
|
||||
@brief General functionality related to abstract syntax trees and
|
||||
traversal of them.
|
||||
*/
|
||||
|
||||
#include "ast.h"
|
||||
#include "expr.h"
|
||||
@@ -53,10 +55,10 @@ ASTNode::~ASTNode() {
|
||||
// AST
|
||||
|
||||
void
|
||||
AST::AddFunction(Symbol *sym, const std::vector<Symbol *> &args, Stmt *code) {
|
||||
AST::AddFunction(Symbol *sym, Stmt *code) {
|
||||
if (sym == NULL)
|
||||
return;
|
||||
functions.push_back(new Function(sym, args, code));
|
||||
functions.push_back(new Function(sym, code));
|
||||
}
|
||||
|
||||
|
||||
@@ -151,7 +153,7 @@ WalkAST(ASTNode *node, ASTPreCallBackFunc preFunc, ASTPostCallBackFunc postFunc,
|
||||
else if ((ls = dynamic_cast<LabeledStmt *>(node)) != NULL)
|
||||
ls->stmt = (Stmt *)WalkAST(ls->stmt, preFunc, postFunc, data);
|
||||
else if ((rs = dynamic_cast<ReturnStmt *>(node)) != NULL)
|
||||
rs->val = (Expr *)WalkAST(rs->val, preFunc, postFunc, data);
|
||||
rs->expr = (Expr *)WalkAST(rs->expr, preFunc, postFunc, data);
|
||||
else if ((sl = dynamic_cast<StmtList *>(node)) != NULL) {
|
||||
std::vector<Stmt *> &sls = sl->stmts;
|
||||
for (unsigned int i = 0; i < sls.size(); ++i)
|
||||
@@ -305,19 +307,39 @@ TypeCheck(Stmt *stmt) {
|
||||
}
|
||||
|
||||
|
||||
struct CostData {
|
||||
CostData() { cost = foreachDepth = 0; }
|
||||
|
||||
int cost;
|
||||
int foreachDepth;
|
||||
};
|
||||
|
||||
|
||||
static bool
|
||||
lCostCallback(ASTNode *node, void *c) {
|
||||
int *cost = (int *)c;
|
||||
*cost += node->EstimateCost();
|
||||
lCostCallbackPre(ASTNode *node, void *d) {
|
||||
CostData *data = (CostData *)d;
|
||||
if (dynamic_cast<ForeachStmt *>(node) != NULL)
|
||||
++data->foreachDepth;
|
||||
if (data->foreachDepth == 0)
|
||||
data->cost += node->EstimateCost();
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
static ASTNode *
|
||||
lCostCallbackPost(ASTNode *node, void *d) {
|
||||
CostData *data = (CostData *)d;
|
||||
if (dynamic_cast<ForeachStmt *>(node) != NULL)
|
||||
--data->foreachDepth;
|
||||
return node;
|
||||
}
|
||||
|
||||
|
||||
int
|
||||
EstimateCost(ASTNode *root) {
|
||||
int cost = 0;
|
||||
WalkAST(root, lCostCallback, NULL, &cost);
|
||||
return cost;
|
||||
CostData data;
|
||||
WalkAST(root, lCostCallbackPre, lCostCallbackPost, &data);
|
||||
return data.cost;
|
||||
}
|
||||
|
||||
|
||||
@@ -334,10 +356,10 @@ lCheckAllOffSafety(ASTNode *node, void *data) {
|
||||
return false;
|
||||
|
||||
const Type *type = fce->func->GetType();
|
||||
const PointerType *pt = dynamic_cast<const PointerType *>(type);
|
||||
const PointerType *pt = CastType<PointerType>(type);
|
||||
if (pt != NULL)
|
||||
type = pt->GetBaseType();
|
||||
const FunctionType *ftype = dynamic_cast<const FunctionType *>(type);
|
||||
const FunctionType *ftype = CastType<FunctionType>(type);
|
||||
Assert(ftype != NULL);
|
||||
|
||||
if (ftype->isSafe == false) {
|
||||
@@ -363,17 +385,22 @@ lCheckAllOffSafety(ASTNode *node, void *data) {
|
||||
return false;
|
||||
}
|
||||
|
||||
if (g->target.allOffMaskIsSafe == true)
|
||||
// Don't worry about memory accesses if we have a target that can
|
||||
// safely run them with the mask all off
|
||||
return true;
|
||||
if (dynamic_cast<ForeachStmt *>(node) != NULL) {
|
||||
// foreach() statements also shouldn't be run with an all-off mask.
|
||||
// Since they re-establish an 'all on' mask, this would be pretty
|
||||
// unintuitive. (More generally, it's possibly a little strange to
|
||||
// allow foreach() in the presence of any non-uniform control
|
||||
// flow...)
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
IndexExpr *ie;
|
||||
if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL && ie->baseExpr != NULL) {
|
||||
const Type *type = ie->baseExpr->GetType();
|
||||
if (type == NULL)
|
||||
return true;
|
||||
if (dynamic_cast<const ReferenceType *>(type) != NULL)
|
||||
if (CastType<ReferenceType>(type) != NULL)
|
||||
type = type->GetReferenceTarget();
|
||||
|
||||
ConstExpr *ce = dynamic_cast<ConstExpr *>(ie->index);
|
||||
@@ -383,16 +410,14 @@ lCheckAllOffSafety(ASTNode *node, void *data) {
|
||||
return false;
|
||||
}
|
||||
|
||||
const PointerType *pointerType =
|
||||
dynamic_cast<const PointerType *>(type);
|
||||
const PointerType *pointerType = CastType<PointerType>(type);
|
||||
if (pointerType != NULL) {
|
||||
// pointer[index] -> can't be sure -> not safe
|
||||
*okPtr = false;
|
||||
return false;
|
||||
}
|
||||
|
||||
const SequentialType *seqType =
|
||||
dynamic_cast<const SequentialType *>(type);
|
||||
const SequentialType *seqType = CastType<SequentialType>(type);
|
||||
Assert(seqType != NULL);
|
||||
int nElements = seqType->GetElementCount();
|
||||
if (nElements == 0) {
|
||||
|
||||
5
ast.h
5
ast.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
Copyright (c) 2011-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -84,8 +84,7 @@ class AST {
|
||||
public:
|
||||
/** Add the AST for a function described by the given declaration
|
||||
information and source code. */
|
||||
void AddFunction(Symbol *sym, const std::vector<Symbol *> &args,
|
||||
Stmt *code);
|
||||
void AddFunction(Symbol *sym, Stmt *code);
|
||||
|
||||
/** Generate LLVM IR for all of the functions into the current
|
||||
module. */
|
||||
|
||||
104
builtins.cpp
104
builtins.cpp
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -157,7 +157,7 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
||||
|
||||
static void
|
||||
lCreateSymbol(const std::string &name, const Type *returnType,
|
||||
const std::vector<const Type *> &argTypes,
|
||||
llvm::SmallVector<const Type *, 8> &argTypes,
|
||||
const llvm::FunctionType *ftype, llvm::Function *func,
|
||||
SymbolTable *symbolTable) {
|
||||
SourcePos noPos;
|
||||
@@ -199,7 +199,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
||||
// bool, so just have a one-off override for that one...
|
||||
if (g->target.maskBitCount != 1 && name == "__sext_varying_bool") {
|
||||
const Type *returnType = AtomicType::VaryingInt32;
|
||||
std::vector<const Type *> argTypes;
|
||||
llvm::SmallVector<const Type *, 8> argTypes;
|
||||
argTypes.push_back(AtomicType::VaryingBool);
|
||||
|
||||
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
|
||||
@@ -229,7 +229,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
||||
// Iterate over the arguments and try to find their equivalent ispc
|
||||
// types. Track if any of the arguments has an integer type.
|
||||
bool anyIntArgs = false;
|
||||
std::vector<const Type *> argTypes;
|
||||
llvm::SmallVector<const Type *, 8> argTypes;
|
||||
for (unsigned int j = 0; j < ftype->getNumParams(); ++j) {
|
||||
const llvm::Type *llvmArgType = ftype->getParamType(j);
|
||||
const Type *type = lLLVMTypeToISPCType(llvmArgType, intAsUnsigned);
|
||||
@@ -291,7 +291,7 @@ lCheckModuleIntrinsics(llvm::Module *module) {
|
||||
if (!strncmp(funcName.c_str(), "llvm.x86.", 9)) {
|
||||
llvm::Intrinsic::ID id = (llvm::Intrinsic::ID)func->getIntrinsicID();
|
||||
Assert(id != 0);
|
||||
LLVM_TYPE_CONST llvm::Type *intrinsicType =
|
||||
llvm::Type *intrinsicType =
|
||||
llvm::Intrinsic::getType(*g->ctx, id);
|
||||
intrinsicType = llvm::PointerType::get(intrinsicType, 0);
|
||||
Assert(func->getType() == intrinsicType);
|
||||
@@ -411,12 +411,16 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__extract_int64",
|
||||
"__extract_int8",
|
||||
"__fastmath",
|
||||
"__float_to_half_uniform",
|
||||
"__float_to_half_varying",
|
||||
"__floatbits_uniform_int32",
|
||||
"__floatbits_varying_int32",
|
||||
"__floor_uniform_double",
|
||||
"__floor_uniform_float",
|
||||
"__floor_varying_double",
|
||||
"__floor_varying_float",
|
||||
"__half_to_float_uniform",
|
||||
"__half_to_float_varying",
|
||||
"__insert_int16",
|
||||
"__insert_int32",
|
||||
"__insert_int64",
|
||||
@@ -616,9 +620,7 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
||||
|
||||
std::string(linkError);
|
||||
if (llvm::Linker::LinkModules(module, bcModule,
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
llvm::Linker::DestroySource,
|
||||
#endif // LLVM_3_0
|
||||
&linkError))
|
||||
Error(SourcePos(), "Error linking stdlib bitcode: %s", linkError.c_str());
|
||||
lSetInternalFunctions(module);
|
||||
@@ -635,16 +637,36 @@ AddBitcodeToModule(const unsigned char *bitcode, int length,
|
||||
static void
|
||||
lDefineConstantInt(const char *name, int val, llvm::Module *module,
|
||||
SymbolTable *symbolTable) {
|
||||
Symbol *pw =
|
||||
Symbol *sym =
|
||||
new Symbol(name, SourcePos(), AtomicType::UniformInt32->GetAsConstType(),
|
||||
SC_STATIC);
|
||||
pw->constValue = new ConstExpr(pw->type, val, SourcePos());
|
||||
LLVM_TYPE_CONST llvm::Type *ltype = LLVMTypes::Int32Type;
|
||||
sym->constValue = new ConstExpr(sym->type, val, SourcePos());
|
||||
llvm::Type *ltype = LLVMTypes::Int32Type;
|
||||
llvm::Constant *linit = LLVMInt32(val);
|
||||
pw->storagePtr = new llvm::GlobalVariable(*module, ltype, true,
|
||||
llvm::GlobalValue::InternalLinkage,
|
||||
linit, pw->name.c_str());
|
||||
symbolTable->AddVariable(pw);
|
||||
// Use WeakODRLinkage rather than InternalLinkage so that a definition
|
||||
// survives even if it's not used in the module, so that the symbol is
|
||||
// there in the debugger.
|
||||
sym->storagePtr = new llvm::GlobalVariable(*module, ltype, true,
|
||||
llvm::GlobalValue::WeakODRLinkage,
|
||||
linit, name);
|
||||
symbolTable->AddVariable(sym);
|
||||
|
||||
if (m->diBuilder != NULL) {
|
||||
llvm::DIFile file;
|
||||
llvm::DIType diType = sym->type->GetDIType(file);
|
||||
Assert(diType.Verify());
|
||||
// FIXME? DWARF says that this (and programIndex below) should
|
||||
// have the DW_AT_artifical attribute. It's not clear if this
|
||||
// matters for anything though.
|
||||
llvm::DIGlobalVariable var =
|
||||
m->diBuilder->createGlobalVariable(name,
|
||||
file,
|
||||
0 /* line */,
|
||||
diType,
|
||||
true /* static */,
|
||||
sym->storagePtr);
|
||||
Assert(var.Verify());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -652,7 +674,7 @@ lDefineConstantInt(const char *name, int val, llvm::Module *module,
|
||||
static void
|
||||
lDefineConstantIntFunc(const char *name, int val, llvm::Module *module,
|
||||
SymbolTable *symbolTable) {
|
||||
std::vector<const Type *> args;
|
||||
llvm::SmallVector<const Type *, 8> args;
|
||||
FunctionType *ft = new FunctionType(AtomicType::UniformInt32, args, SourcePos());
|
||||
Symbol *sym = new Symbol(name, SourcePos(), ft, SC_STATIC);
|
||||
|
||||
@@ -670,21 +692,37 @@ lDefineConstantIntFunc(const char *name, int val, llvm::Module *module,
|
||||
|
||||
static void
|
||||
lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
|
||||
Symbol *pidx =
|
||||
Symbol *sym =
|
||||
new Symbol("programIndex", SourcePos(),
|
||||
AtomicType::VaryingInt32->GetAsConstType(), SC_STATIC);
|
||||
|
||||
int pi[ISPC_MAX_NVEC];
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
pi[i] = i;
|
||||
pidx->constValue = new ConstExpr(pidx->type, pi, SourcePos());
|
||||
sym->constValue = new ConstExpr(sym->type, pi, SourcePos());
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *ltype = LLVMTypes::Int32VectorType;
|
||||
llvm::Type *ltype = LLVMTypes::Int32VectorType;
|
||||
llvm::Constant *linit = LLVMInt32Vector(pi);
|
||||
pidx->storagePtr = new llvm::GlobalVariable(*module, ltype, true,
|
||||
llvm::GlobalValue::InternalLinkage, linit,
|
||||
pidx->name.c_str());
|
||||
symbolTable->AddVariable(pidx);
|
||||
// See comment in lDefineConstantInt() for why WeakODRLinkage is used here
|
||||
sym->storagePtr = new llvm::GlobalVariable(*module, ltype, true,
|
||||
llvm::GlobalValue::WeakODRLinkage,
|
||||
linit,
|
||||
sym->name.c_str());
|
||||
symbolTable->AddVariable(sym);
|
||||
|
||||
if (m->diBuilder != NULL) {
|
||||
llvm::DIFile file;
|
||||
llvm::DIType diType = sym->type->GetDIType(file);
|
||||
Assert(diType.Verify());
|
||||
llvm::DIGlobalVariable var =
|
||||
m->diBuilder->createGlobalVariable(sym->name.c_str(),
|
||||
file,
|
||||
0 /* line */,
|
||||
diType,
|
||||
false /* static */,
|
||||
sym->storagePtr);
|
||||
Assert(var.Verify());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -809,6 +847,20 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
builtins_bitcode_generic_16_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 32:
|
||||
extern unsigned char builtins_bitcode_generic_32[];
|
||||
extern int builtins_bitcode_generic_32_length;
|
||||
AddBitcodeToModule(builtins_bitcode_generic_32,
|
||||
builtins_bitcode_generic_32_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 64:
|
||||
extern unsigned char builtins_bitcode_generic_64[];
|
||||
extern int builtins_bitcode_generic_64_length;
|
||||
AddBitcodeToModule(builtins_bitcode_generic_64,
|
||||
builtins_bitcode_generic_64_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 1:
|
||||
extern unsigned char builtins_bitcode_generic_1[];
|
||||
extern int builtins_bitcode_generic_1_length;
|
||||
@@ -841,10 +893,12 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
||||
symbolTable);
|
||||
lDefineConstantInt("__math_lib_system", (int)Globals::Math_System, module,
|
||||
symbolTable);
|
||||
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload, module,
|
||||
symbolTable);
|
||||
lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload,
|
||||
module, symbolTable);
|
||||
|
||||
lDefineConstantInt("__have_native_half", (g->target.isa == Target::AVX2),
|
||||
lDefineConstantInt("__have_native_half", g->target.hasHalf, module,
|
||||
symbolTable);
|
||||
lDefineConstantInt("__have_native_transcendentals", g->target.hasTranscendentals,
|
||||
module, symbolTable);
|
||||
|
||||
if (includeStdlibISPC) {
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -70,7 +70,7 @@ typedef int Bool;
|
||||
putchar('['); \
|
||||
for (int i = 0; i < width; ++i) { \
|
||||
/* only print the value if the current lane is executing */ \
|
||||
if (mask & (1<<i)) \
|
||||
if (mask & (1ull<<i)) \
|
||||
printf(fmt, ((type *)ptr)[i]); \
|
||||
else \
|
||||
printf("((" fmt "))", ((type *)ptr)[i]); \
|
||||
@@ -89,7 +89,7 @@ typedef int Bool;
|
||||
@param mask Current lane mask when the print statemnt is called
|
||||
@param args Array of pointers to the values to be printed
|
||||
*/
|
||||
void __do_print(const char *format, const char *types, int width, int mask,
|
||||
void __do_print(const char *format, const char *types, int width, uint64_t mask,
|
||||
void **args) {
|
||||
if (mask == 0)
|
||||
return;
|
||||
@@ -113,7 +113,7 @@ void __do_print(const char *format, const char *types, int width, int mask,
|
||||
case 'B': {
|
||||
putchar('[');
|
||||
for (int i = 0; i < width; ++i) {
|
||||
if (mask & (1<<i))
|
||||
if (mask & (1ull << i))
|
||||
printf("%s", ((Bool *)ptr)[i] ? "true" : "false");
|
||||
else
|
||||
printf("_________");
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; Copyright (c) 2010-2012, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
@@ -175,7 +175,7 @@ define <16 x float> @__min_varying_float(<16 x float>,
|
||||
|
||||
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
|
||||
|
||||
define i32 @__movmsk(<16 x i32>) nounwind readnone alwaysinline {
|
||||
define i64 @__movmsk(<16 x i32>) nounwind readnone alwaysinline {
|
||||
%floatmask = bitcast <16 x i32> %0 to <16 x float>
|
||||
%mask0 = shufflevector <16 x float> %floatmask, <16 x float> undef,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
@@ -186,7 +186,8 @@ define i32 @__movmsk(<16 x i32>) nounwind readnone alwaysinline {
|
||||
|
||||
%v1shift = shl i32 %v1, 8
|
||||
%v = or i32 %v1shift, %v0
|
||||
ret i32 %v
|
||||
%v64 = zext i32 %v to i64
|
||||
ret i64 %v64
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; Copyright (c) 2010-2012, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
@@ -175,10 +175,11 @@ define <8 x float> @__min_varying_float(<8 x float>,
|
||||
|
||||
declare i32 @llvm.x86.avx.movmsk.ps.256(<8 x float>) nounwind readnone
|
||||
|
||||
define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||
define i64 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||
%floatmask = bitcast <8 x i32> %0 to <8 x float>
|
||||
%v = call i32 @llvm.x86.avx.movmsk.ps.256(<8 x float> %floatmask) nounwind readnone
|
||||
ret i32 %v
|
||||
%v64 = zext i32 %v to i64
|
||||
ret i64 %v64
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -186,14 +186,14 @@ define void @__masked_store_blend_64(<1 x i64>* nocapture, <1 x i64>,
|
||||
ret void
|
||||
}
|
||||
|
||||
define i32 @__movmsk(<1 x i32>) nounwind readnone alwaysinline {
|
||||
define i64 @__movmsk(<1 x i32>) nounwind readnone alwaysinline {
|
||||
%item = extractelement <1 x i32> %0, i32 0
|
||||
%v = lshr i32 %item, 31
|
||||
ret i32 %v
|
||||
%v64 = zext i32 %v to i64
|
||||
ret i64 %v64
|
||||
}
|
||||
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding
|
||||
;;
|
||||
|
||||
33
builtins/target-generic-32.ll
Normal file
33
builtins/target-generic-32.ll
Normal file
@@ -0,0 +1,33 @@
|
||||
;; Copyright (c) 2010-2012, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
define(`WIDTH',`32')
|
||||
include(`target-generic-common.ll')
|
||||
33
builtins/target-generic-64.ll
Normal file
33
builtins/target-generic-64.ll
Normal file
@@ -0,0 +1,33 @@
|
||||
;; Copyright (c) 2010-2012, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
define(`WIDTH',`64')
|
||||
include(`target-generic-common.ll')
|
||||
@@ -1,4 +1,4 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; Copyright (c) 2010-2012, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
@@ -39,12 +39,12 @@ reduce_equal(WIDTH)
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; broadcast/rotate/shuffle
|
||||
|
||||
declare <WIDTH x float> @__smear_float(float) nounwind readnone
|
||||
declare <WIDTH x double> @__smear_double(double) nounwind readnone
|
||||
declare <WIDTH x i8> @__smear_i8(i8) nounwind readnone
|
||||
declare <WIDTH x i16> @__smear_i16(i16) nounwind readnone
|
||||
declare <WIDTH x i32> @__smear_i32(i32) nounwind readnone
|
||||
declare <WIDTH x i64> @__smear_i64(i64) nounwind readnone
|
||||
declare <WIDTH x float> @__smear_float(<WIDTH x float>, float) nounwind readnone
|
||||
declare <WIDTH x double> @__smear_double(<WIDTH x double>, double) nounwind readnone
|
||||
declare <WIDTH x i8> @__smear_i8(<WIDTH x i8>, i8) nounwind readnone
|
||||
declare <WIDTH x i16> @__smear_i16(<WIDTH x i16>, i16) nounwind readnone
|
||||
declare <WIDTH x i32> @__smear_i32(<WIDTH x i32>, i32) nounwind readnone
|
||||
declare <WIDTH x i64> @__smear_i64(<WIDTH x i64>, i64) nounwind readnone
|
||||
|
||||
declare <WIDTH x float> @__broadcast_float(<WIDTH x float>, i32) nounwind readnone
|
||||
declare <WIDTH x double> @__broadcast_double(<WIDTH x double>, i32) nounwind readnone
|
||||
@@ -201,7 +201,7 @@ declare <WIDTH x float> @__svml_pow(<WIDTH x float>, <WIDTH x float>)
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; reductions
|
||||
|
||||
declare i32 @__movmsk(<WIDTH x i1>) nounwind readnone
|
||||
declare i64 @__movmsk(<WIDTH x i1>) nounwind readnone
|
||||
|
||||
declare float @__reduce_add_float(<WIDTH x float>) nounwind readnone
|
||||
declare float @__reduce_min_float(<WIDTH x float>) nounwind readnone
|
||||
@@ -249,7 +249,16 @@ declare void @__masked_store_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
|
||||
declare void @__masked_store_64(<WIDTH x i64>* nocapture, <WIDTH x i64>,
|
||||
<WIDTH x i1> %mask) nounwind
|
||||
|
||||
ifelse(LLVM_VERSION, `LLVM_3_1svn',`
|
||||
ifelse(LLVM_VERSION, `LLVM_3_0', `
|
||||
declare void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_blend_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_blend_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_blend_64(<WIDTH x i64>* nocapture, <WIDTH x i64>,
|
||||
<WIDTH x i1> %mask) nounwind
|
||||
', `
|
||||
define void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
|
||||
<WIDTH x i1>) nounwind alwaysinline {
|
||||
%v = load <WIDTH x i8> * %0
|
||||
@@ -281,15 +290,6 @@ define void @__masked_store_blend_64(<WIDTH x i64>* nocapture,
|
||||
store <WIDTH x i64> %v1, <WIDTH x i64> * %0
|
||||
ret void
|
||||
}
|
||||
',`
|
||||
declare void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_blend_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_blend_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
|
||||
<WIDTH x i1>) nounwind
|
||||
declare void @__masked_store_blend_64(<WIDTH x i64>* nocapture, <WIDTH x i64>,
|
||||
<WIDTH x i1> %mask) nounwind
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; Copyright (c) 2010-2012, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
@@ -295,7 +295,7 @@ define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
|
||||
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
||||
|
||||
define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||
define i64 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||
; first do two 4-wide movmsk calls
|
||||
%floatmask = bitcast <8 x i32> %0 to <8 x float>
|
||||
%m0 = shufflevector <8 x float> %floatmask, <8 x float> undef,
|
||||
@@ -309,7 +309,8 @@ define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||
; of the second one
|
||||
%v1s = shl i32 %v1, 4
|
||||
%v = or i32 %v0, %v1s
|
||||
ret i32 %v
|
||||
%v64 = zext i32 %v to i64
|
||||
ret i64 %v64
|
||||
}
|
||||
|
||||
define <4 x float> @__vec4_add_float(<4 x float> %v0,
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; Copyright (c) 2010-2012, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
@@ -239,10 +239,11 @@ define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
|
||||
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
||||
|
||||
define i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
|
||||
define i64 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
|
||||
%floatmask = bitcast <4 x i32> %0 to <4 x float>
|
||||
%v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone
|
||||
ret i32 %v
|
||||
%v64 = zext i32 %v to i64
|
||||
ret i64 %v64
|
||||
}
|
||||
|
||||
define float @__reduce_add_float(<4 x float> %v) nounwind readonly alwaysinline {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; Copyright (c) 2010-2012, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
@@ -237,7 +237,7 @@ define <8 x i32> @__max_varying_uint32(<8 x i32>,
|
||||
|
||||
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
||||
|
||||
define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||
define i64 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||
; first do two 4-wide movmsk calls
|
||||
%floatmask = bitcast <8 x i32> %0 to <8 x float>
|
||||
%m0 = shufflevector <8 x float> %floatmask, <8 x float> undef,
|
||||
@@ -251,7 +251,8 @@ define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||
; of the second one
|
||||
%v1s = shl i32 %v1, 4
|
||||
%v = or i32 %v0, %v1s
|
||||
ret i32 %v
|
||||
%v64 = zext i32 %v to i64
|
||||
ret i64 %v64
|
||||
}
|
||||
|
||||
define float @__reduce_min_float(<8 x float>) nounwind readnone alwaysinline {
|
||||
|
||||
@@ -1,4 +1,4 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; Copyright (c) 2010-2012, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
@@ -271,10 +271,11 @@ define <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alway
|
||||
|
||||
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
||||
|
||||
define i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
|
||||
define i64 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
|
||||
%floatmask = bitcast <4 x i32> %0 to <4 x float>
|
||||
%v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone
|
||||
ret i32 %v
|
||||
%v64 = zext i32 %v to i64
|
||||
ret i64 %v64
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
357
builtins/util.m4
357
builtins/util.m4
@@ -1,4 +1,4 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; Copyright (c) 2010-2012, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
@@ -38,6 +38,18 @@ declare i1 @__is_compile_time_constant_uniform_int32(i32)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; It is a bit of a pain to compute this in m4 for 32 and 64-wide targets...
|
||||
define(`ALL_ON_MASK',
|
||||
`ifelse(WIDTH, `64', `-1',
|
||||
WIDTH, `32', `4294967295',
|
||||
`eval((1<<WIDTH)-1)')')
|
||||
|
||||
define(`MASK_HIGH_BIT_ON',
|
||||
`ifelse(WIDTH, `64', `-9223372036854775808',
|
||||
WIDTH, `32', `2147483648',
|
||||
`eval(1<<(WIDTH-1))')')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
;; Helper macro for calling various SSE instructions for scalar values
|
||||
;; but where the instruction takes a vector parameter.
|
||||
@@ -1529,7 +1541,7 @@ declare i32 @__fast_masked_vload()
|
||||
declare i8* @ISPCAlloc(i8**, i64, i32) nounwind
|
||||
declare void @ISPCLaunch(i8**, i8*, i8*, i32) nounwind
|
||||
declare void @ISPCSync(i8*) nounwind
|
||||
declare void @ISPCInstrument(i8*, i8*, i32, i32) nounwind
|
||||
declare void @ISPCInstrument(i8*, i8*, i32, i64) nounwind
|
||||
|
||||
declare i1 @__is_compile_time_constant_mask(<WIDTH x MASK> %mask)
|
||||
declare i1 @__is_compile_time_constant_varying_int32(<WIDTH x i32>)
|
||||
@@ -1654,6 +1666,265 @@ declare void @__pseudo_scatter_base_offsets64_32(i8 * nocapture, <WIDTH x i64>,
|
||||
declare void @__pseudo_scatter_base_offsets64_64(i8 * nocapture, <WIDTH x i64>, i32, <WIDTH x i64>,
|
||||
<WIDTH x i64>, <WIDTH x MASK>) nounwind
|
||||
|
||||
declare float @__log_uniform_float(float) nounwind readnone
|
||||
declare <WIDTH x float> @__log_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare float @__exp_uniform_float(float) nounwind readnone
|
||||
declare <WIDTH x float> @__exp_varying_float(<WIDTH x float>) nounwind readnone
|
||||
declare float @__pow_uniform_float(float, float) nounwind readnone
|
||||
declare <WIDTH x float> @__pow_varying_float(<WIDTH x float>, <WIDTH x float>) nounwind readnone
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
declare void @__use8(<WIDTH x i8>)
|
||||
declare void @__use16(<WIDTH x i16>)
|
||||
declare void @__use32(<WIDTH x i32>)
|
||||
declare void @__use64(<WIDTH x i64>)
|
||||
|
||||
;; This is a temporary function that will be removed at the end of
|
||||
;; compilation--the idea is that it calls out to all of the various
|
||||
;; functions / pseudo-function declarations that we need to keep around
|
||||
;; so that they are available to the various optimization passes. This
|
||||
;; then prevents those functions from being removed as dead code when
|
||||
;; we do early DCE...
|
||||
|
||||
define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
|
||||
<WIDTH x i32> %v32, <WIDTH x i64> %v64,
|
||||
<WIDTH x MASK> %mask) {
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; loads
|
||||
%ml8 = call <WIDTH x i8> @__masked_load_8(i8 * %ptr, <WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %ml8)
|
||||
%ml16 = call <WIDTH x i16> @__masked_load_16(i8 * %ptr, <WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %ml16)
|
||||
%ml32 = call <WIDTH x i32> @__masked_load_32(i8 * %ptr, <WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %ml32)
|
||||
%ml64 = call <WIDTH x i64> @__masked_load_64(i8 * %ptr, <WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %ml64)
|
||||
|
||||
%lb8 = call <WIDTH x i8> @__load_and_broadcast_8(i8 * %ptr, <WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %lb8)
|
||||
%lb16 = call <WIDTH x i16> @__load_and_broadcast_16(i8 * %ptr, <WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %lb16)
|
||||
%lb32 = call <WIDTH x i32> @__load_and_broadcast_32(i8 * %ptr, <WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %lb32)
|
||||
%lb64 = call <WIDTH x i64> @__load_and_broadcast_64(i8 * %ptr, <WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %lb64)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; stores
|
||||
%pv8 = bitcast i8 * %ptr to <WIDTH x i8> *
|
||||
call void @__pseudo_masked_store_8(<WIDTH x i8> * %pv8, <WIDTH x i8> %v8,
|
||||
<WIDTH x MASK> %mask)
|
||||
%pv16 = bitcast i8 * %ptr to <WIDTH x i16> *
|
||||
call void @__pseudo_masked_store_16(<WIDTH x i16> * %pv16, <WIDTH x i16> %v16,
|
||||
<WIDTH x MASK> %mask)
|
||||
%pv32 = bitcast i8 * %ptr to <WIDTH x i32> *
|
||||
call void @__pseudo_masked_store_32(<WIDTH x i32> * %pv32, <WIDTH x i32> %v32,
|
||||
<WIDTH x MASK> %mask)
|
||||
%pv64 = bitcast i8 * %ptr to <WIDTH x i64> *
|
||||
call void @__pseudo_masked_store_64(<WIDTH x i64> * %pv64, <WIDTH x i64> %v64,
|
||||
<WIDTH x MASK> %mask)
|
||||
|
||||
call void @__masked_store_8(<WIDTH x i8> * %pv8, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||
call void @__masked_store_16(<WIDTH x i16> * %pv16, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||
call void @__masked_store_32(<WIDTH x i32> * %pv32, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__masked_store_64(<WIDTH x i64> * %pv64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
|
||||
call void @__masked_store_blend_8(<WIDTH x i8> * %pv8, <WIDTH x i8> %v8,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__masked_store_blend_16(<WIDTH x i16> * %pv16, <WIDTH x i16> %v16,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__masked_store_blend_32(<WIDTH x i32> * %pv32, <WIDTH x i32> %v32,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__masked_store_blend_64(<WIDTH x i64> * %pv64, <WIDTH x i64> %v64,
|
||||
<WIDTH x MASK> %mask)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gathers
|
||||
|
||||
%pg32_8 = call <WIDTH x i8> @__pseudo_gather32_8(<WIDTH x i32> %v32,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %pg32_8)
|
||||
%pg32_16 = call <WIDTH x i16> @__pseudo_gather32_16(<WIDTH x i32> %v32,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %pg32_16)
|
||||
%pg32_32 = call <WIDTH x i32> @__pseudo_gather32_32(<WIDTH x i32> %v32,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %pg32_32)
|
||||
%pg32_64 = call <WIDTH x i64> @__pseudo_gather32_64(<WIDTH x i32> %v32,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %pg32_64)
|
||||
|
||||
%pg64_8 = call <WIDTH x i8> @__pseudo_gather64_8(<WIDTH x i64> %v64,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %pg64_8)
|
||||
%pg64_16 = call <WIDTH x i16> @__pseudo_gather64_16(<WIDTH x i64> %v64,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %pg64_16)
|
||||
%pg64_32 = call <WIDTH x i32> @__pseudo_gather64_32(<WIDTH x i64> %v64,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %pg64_32)
|
||||
%pg64_64 = call <WIDTH x i64> @__pseudo_gather64_64(<WIDTH x i64> %v64,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %pg64_64)
|
||||
|
||||
%g32_8 = call <WIDTH x i8> @__gather32_i8(<WIDTH x i32> %v32,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %g32_8)
|
||||
%g32_16 = call <WIDTH x i16> @__gather32_i16(<WIDTH x i32> %v32,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %g32_16)
|
||||
%g32_32 = call <WIDTH x i32> @__gather32_i32(<WIDTH x i32> %v32,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %g32_32)
|
||||
%g32_64 = call <WIDTH x i64> @__gather32_i64(<WIDTH x i32> %v32,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %g32_64)
|
||||
|
||||
%g64_8 = call <WIDTH x i8> @__gather64_i8(<WIDTH x i64> %v64,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %g64_8)
|
||||
%g64_16 = call <WIDTH x i16> @__gather64_i16(<WIDTH x i64> %v64,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %g64_16)
|
||||
%g64_32 = call <WIDTH x i32> @__gather64_i32(<WIDTH x i64> %v64,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %g64_32)
|
||||
%g64_64 = call <WIDTH x i64> @__gather64_i64(<WIDTH x i64> %v64,
|
||||
<WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %g64_64)
|
||||
|
||||
%pgbo32_8 = call <WIDTH x i8>
|
||||
@__pseudo_gather_base_offsets32_8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %pgbo32_8)
|
||||
%pgbo32_16 = call <WIDTH x i16>
|
||||
@__pseudo_gather_base_offsets32_16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %pgbo32_16)
|
||||
%pgbo32_32 = call <WIDTH x i32>
|
||||
@__pseudo_gather_base_offsets32_32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %pgbo32_32)
|
||||
%pgbo32_64 = call <WIDTH x i64>
|
||||
@__pseudo_gather_base_offsets32_64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %pgbo32_64)
|
||||
|
||||
%gbo32_8 = call <WIDTH x i8>
|
||||
@__gather_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %gbo32_8)
|
||||
%gbo32_16 = call <WIDTH x i16>
|
||||
@__gather_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %gbo32_16)
|
||||
%gbo32_32 = call <WIDTH x i32>
|
||||
@__gather_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %gbo32_32)
|
||||
%gbo32_64 = call <WIDTH x i64>
|
||||
@__gather_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %gbo32_64)
|
||||
|
||||
|
||||
%pgbo64_8 = call <WIDTH x i8>
|
||||
@__pseudo_gather_base_offsets64_8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %pgbo64_8)
|
||||
%pgbo64_16 = call <WIDTH x i16>
|
||||
@__pseudo_gather_base_offsets64_16(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %pgbo64_16)
|
||||
%pgbo64_32 = call <WIDTH x i32>
|
||||
@__pseudo_gather_base_offsets64_32(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %pgbo64_32)
|
||||
%pgbo64_64 = call <WIDTH x i64>
|
||||
@__pseudo_gather_base_offsets64_64(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %pgbo64_64)
|
||||
|
||||
%gbo64_8 = call <WIDTH x i8>
|
||||
@__gather_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use8(<WIDTH x i8> %gbo64_8)
|
||||
%gbo64_16 = call <WIDTH x i16>
|
||||
@__gather_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use16(<WIDTH x i16> %gbo64_16)
|
||||
%gbo64_32 = call <WIDTH x i32>
|
||||
@__gather_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use32(<WIDTH x i32> %gbo64_32)
|
||||
%gbo64_64 = call <WIDTH x i64>
|
||||
@__gather_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
call void @__use64(<WIDTH x i64> %gbo64_64)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; scatters
|
||||
|
||||
call void @__pseudo_scatter32_8(<WIDTH x i32> %v32, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter32_16(<WIDTH x i32> %v32, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter32_32(<WIDTH x i32> %v32, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter32_64(<WIDTH x i32> %v32, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
|
||||
call void @__pseudo_scatter64_8(<WIDTH x i64> %v64, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter64_16(<WIDTH x i64> %v64, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter64_32(<WIDTH x i64> %v64, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter64_64(<WIDTH x i64> %v64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
|
||||
call void @__scatter32_i8(<WIDTH x i32> %v32, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||
call void @__scatter32_i16(<WIDTH x i32> %v32, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||
call void @__scatter32_i32(<WIDTH x i32> %v32, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__scatter32_i64(<WIDTH x i32> %v32, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
|
||||
call void @__scatter64_i8(<WIDTH x i64> %v64, <WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||
call void @__scatter64_i16(<WIDTH x i64> %v64, <WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||
call void @__scatter64_i32(<WIDTH x i64> %v64, <WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__scatter64_i64(<WIDTH x i64> %v64, <WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
|
||||
call void @__pseudo_scatter_base_offsets32_8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets32_16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets32_32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets32_64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
|
||||
call void @__pseudo_scatter_base_offsets64_8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets64_16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets64_32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__pseudo_scatter_base_offsets64_64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
|
||||
call void @__scatter_base_offsets32_i8(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets32_i16(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets32_i32(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets32_i64(i8 * %ptr, <WIDTH x i32> %v32, i32 0, <WIDTH x i32> %v32,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
|
||||
call void @__scatter_base_offsets64_i8(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i8> %v8, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets64_i16(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i16> %v16, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets64_i32(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i32> %v32, <WIDTH x MASK> %mask)
|
||||
call void @__scatter_base_offsets64_i64(i8 * %ptr, <WIDTH x i64> %v64, i32 0, <WIDTH x i64> %v64,
|
||||
<WIDTH x i64> %v64, <WIDTH x MASK> %mask)
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; vector ops
|
||||
|
||||
@@ -1837,12 +2108,12 @@ ok:
|
||||
|
||||
|
||||
define void @__do_assert_varying(i8 *%str, <WIDTH x MASK> %test,
|
||||
<WIDTH x MASK> %mask) {
|
||||
<WIDTH x MASK> %mask) {
|
||||
%nottest = xor <WIDTH x MASK> %test,
|
||||
< forloop(i, 1, eval(WIDTH-1), `MASK -1, ') MASK -1 >
|
||||
%nottest_and_mask = and <WIDTH x MASK> %nottest, %mask
|
||||
%mm = call i32 @__movmsk(<WIDTH x MASK> %nottest_and_mask)
|
||||
%all_ok = icmp eq i32 %mm, 0
|
||||
%mm = call i64 @__movmsk(<WIDTH x MASK> %nottest_and_mask)
|
||||
%all_ok = icmp eq i64 %mm, 0
|
||||
br i1 %all_ok, label %ok, label %fail
|
||||
|
||||
fail:
|
||||
@@ -2244,14 +2515,18 @@ define <$1 x $2> @__load_and_broadcast_$3(i8 *, <$1 x MASK> %mask) nounwind alwa
|
||||
;; $4: alignment for elements of type $2 (4, 8, ...)
|
||||
|
||||
define(`masked_load', `
|
||||
define <$1 x $2> @__masked_load_$3(i8 *, <$1 x i32> %mask) nounwind alwaysinline {
|
||||
define <$1 x $2> @__masked_load_$3(i8 *, <$1 x MASK> %mask) nounwind alwaysinline {
|
||||
entry:
|
||||
%mm = call i32 @__movmsk(<$1 x i32> %mask)
|
||||
%mm = call i64 @__movmsk(<$1 x MASK> %mask)
|
||||
|
||||
; if the first lane and the last lane are on, then it is safe to do a vector load
|
||||
; of the whole thing--what the lanes in the middle want turns out to not matter...
|
||||
%mm_and = and i32 %mm, eval(1 | (1<<($1-1)))
|
||||
%can_vload = icmp eq i32 %mm_and, eval(1 | (1<<($1-1)))
|
||||
%mm_and_low = and i64 %mm, 1
|
||||
%mm_and_high = and i64 %mm, MASK_HIGH_BIT_ON
|
||||
%mm_and_high_shift = lshr i64 %mm_and_high, eval(WIDTH-1)
|
||||
%mm_and_low_i1 = trunc i64 %mm_and_low to i1
|
||||
%mm_and_high_shift_i1 = trunc i64 %mm_and_high_shift to i1
|
||||
%can_vload = and i1 %mm_and_low_i1, %mm_and_high_shift_i1
|
||||
|
||||
%fast32 = call i32 @__fast_masked_vload()
|
||||
%fast_i1 = trunc i32 %fast32 to i1
|
||||
@@ -2270,9 +2545,10 @@ load:
|
||||
loop:
|
||||
; loop over the lanes and see if each one is on...
|
||||
%lane = phi i32 [ 0, %entry ], [ %next_lane, %lane_done ]
|
||||
%lanemask = shl i32 1, %lane
|
||||
%mask_and = and i32 %mm, %lanemask
|
||||
%do_lane = icmp ne i32 %mask_and, 0
|
||||
%lane64 = zext i32 %lane to i64
|
||||
%lanemask = shl i64 1, %lane64
|
||||
%mask_and = and i64 %mm, %lanemask
|
||||
%do_lane = icmp ne i64 %mask_and, 0
|
||||
br i1 %do_lane, label %load_lane, label %lane_done
|
||||
|
||||
load_lane:
|
||||
@@ -2484,12 +2760,12 @@ define(`packed_load_and_store', `
|
||||
define i32 @__packed_load_active(i32 * %startptr, <WIDTH x i32> * %val_ptr,
|
||||
<WIDTH x i32> %full_mask) nounwind alwaysinline {
|
||||
entry:
|
||||
%mask = call i32 @__movmsk(<WIDTH x i32> %full_mask)
|
||||
%mask = call i64 @__movmsk(<WIDTH x i32> %full_mask)
|
||||
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x i32> %full_mask)
|
||||
br i1 %mask_known, label %known_mask, label %unknown_mask
|
||||
|
||||
known_mask:
|
||||
%allon = icmp eq i32 %mask, eval((1 << WIDTH) -1)
|
||||
%allon = icmp eq i64 %mask, ALL_ON_MASK
|
||||
br i1 %allon, label %all_on, label %unknown_mask
|
||||
|
||||
all_on:
|
||||
@@ -2505,12 +2781,12 @@ unknown_mask:
|
||||
|
||||
loop:
|
||||
%lane = phi i32 [ 0, %unknown_mask ], [ %nextlane, %loopend ]
|
||||
%lanemask = phi i32 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ]
|
||||
%lanemask = phi i64 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ]
|
||||
%offset = phi i32 [ 0, %unknown_mask ], [ %nextoffset, %loopend ]
|
||||
|
||||
; is the current lane on?
|
||||
%and = and i32 %mask, %lanemask
|
||||
%do_load = icmp eq i32 %and, %lanemask
|
||||
%and = and i64 %mask, %lanemask
|
||||
%do_load = icmp eq i64 %and, %lanemask
|
||||
br i1 %do_load, label %load, label %loopend
|
||||
|
||||
load:
|
||||
@@ -2525,7 +2801,7 @@ load:
|
||||
loopend:
|
||||
%nextoffset = phi i32 [ %offset1, %load ], [ %offset, %loop ]
|
||||
%nextlane = add i32 %lane, 1
|
||||
%nextlanemask = mul i32 %lanemask, 2
|
||||
%nextlanemask = mul i64 %lanemask, 2
|
||||
|
||||
; are we done yet?
|
||||
%test = icmp ne i32 %nextlane, WIDTH
|
||||
@@ -2536,14 +2812,14 @@ done:
|
||||
}
|
||||
|
||||
define i32 @__packed_store_active(i32 * %startptr, <WIDTH x i32> %vals,
|
||||
<WIDTH x i32> %full_mask) nounwind alwaysinline {
|
||||
<WIDTH x i32> %full_mask) nounwind alwaysinline {
|
||||
entry:
|
||||
%mask = call i32 @__movmsk(<WIDTH x i32> %full_mask)
|
||||
%mask = call i64 @__movmsk(<WIDTH x i32> %full_mask)
|
||||
%mask_known = call i1 @__is_compile_time_constant_mask(<WIDTH x i32> %full_mask)
|
||||
br i1 %mask_known, label %known_mask, label %unknown_mask
|
||||
|
||||
known_mask:
|
||||
%allon = icmp eq i32 %mask, eval((1 << WIDTH) -1)
|
||||
%allon = icmp eq i64 %mask, ALL_ON_MASK
|
||||
br i1 %allon, label %all_on, label %unknown_mask
|
||||
|
||||
all_on:
|
||||
@@ -2556,12 +2832,12 @@ unknown_mask:
|
||||
|
||||
loop:
|
||||
%lane = phi i32 [ 0, %unknown_mask ], [ %nextlane, %loopend ]
|
||||
%lanemask = phi i32 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ]
|
||||
%lanemask = phi i64 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ]
|
||||
%offset = phi i32 [ 0, %unknown_mask ], [ %nextoffset, %loopend ]
|
||||
|
||||
; is the current lane on?
|
||||
%and = and i32 %mask, %lanemask
|
||||
%do_store = icmp eq i32 %and, %lanemask
|
||||
%and = and i64 %mask, %lanemask
|
||||
%do_store = icmp eq i64 %and, %lanemask
|
||||
br i1 %do_store, label %store, label %loopend
|
||||
|
||||
store:
|
||||
@@ -2574,7 +2850,7 @@ store:
|
||||
loopend:
|
||||
%nextoffset = phi i32 [ %offset1, %store ], [ %offset, %loop ]
|
||||
%nextlane = add i32 %lane, 1
|
||||
%nextlanemask = mul i32 %lanemask, 2
|
||||
%nextlanemask = mul i64 %lanemask, 2
|
||||
|
||||
; are we done yet?
|
||||
%test = icmp ne i32 %nextlane, WIDTH
|
||||
@@ -2598,14 +2874,15 @@ define(`reduce_equal_aux', `
|
||||
define i1 @__reduce_equal_$3(<$1 x $2> %v, $2 * %samevalue,
|
||||
<$1 x MASK> %mask) nounwind alwaysinline {
|
||||
entry:
|
||||
%mm = call i32 @__movmsk(<$1 x MASK> %mask)
|
||||
%allon = icmp eq i32 %mm, eval((1<<$1)-1)
|
||||
%mm = call i64 @__movmsk(<$1 x MASK> %mask)
|
||||
%allon = icmp eq i64 %mm, ALL_ON_MASK
|
||||
br i1 %allon, label %check_neighbors, label %domixed
|
||||
|
||||
domixed:
|
||||
; First, figure out which lane is the first active one
|
||||
%first = call i32 @llvm.cttz.i32(i32 %mm)
|
||||
%baseval = extractelement <$1 x $2> %v, i32 %first
|
||||
%first = call i64 @llvm.cttz.i64(i64 %mm)
|
||||
%first32 = trunc i64 %first to i32
|
||||
%baseval = extractelement <$1 x $2> %v, i32 %first32
|
||||
%basev1 = bitcast $2 %baseval to <1 x $2>
|
||||
; get a vector that is that value smeared across all elements
|
||||
%basesmear = shufflevector <1 x $2> %basev1, <1 x $2> undef,
|
||||
@@ -2636,9 +2913,9 @@ check_neighbors:
|
||||
%eq = $5 eq <$1 x $2> %vec, %vr
|
||||
ifelse(MASK,i32, `
|
||||
%eq32 = sext <$1 x i1> %eq to <$1 x i32>
|
||||
%eqmm = call i32 @__movmsk(<$1 x i32> %eq32)', `
|
||||
%eqmm = call i32 @__movmsk(<$1 x MASK> %eq)')
|
||||
%alleq = icmp eq i32 %eqmm, eval((1<<$1)-1)
|
||||
%eqmm = call i64 @__movmsk(<$1 x i32> %eq32)', `
|
||||
%eqmm = call i64 @__movmsk(<$1 x MASK> %eq)')
|
||||
%alleq = icmp eq i64 %eqmm, ALL_ON_MASK
|
||||
br i1 %alleq, label %all_equal, label %not_all_equal
|
||||
', `
|
||||
; But for 64-bit elements, it turns out to be more efficient to just
|
||||
@@ -2751,14 +3028,14 @@ define(`per_lane', `
|
||||
br label %pl_entry
|
||||
|
||||
pl_entry:
|
||||
%pl_mask = call i32 @__movmsk($2)
|
||||
%pl_mask = call i64 @__movmsk($2)
|
||||
%pl_mask_known = call i1 @__is_compile_time_constant_mask($2)
|
||||
br i1 %pl_mask_known, label %pl_known_mask, label %pl_unknown_mask
|
||||
|
||||
pl_known_mask:
|
||||
;; the mask is known at compile time; see if it is something we can
|
||||
;; handle more efficiently
|
||||
%pl_is_allon = icmp eq i32 %pl_mask, eval((1<<$1)-1)
|
||||
%pl_is_allon = icmp eq i64 %pl_mask, ALL_ON_MASK
|
||||
br i1 %pl_is_allon, label %pl_all_on, label %pl_unknown_mask
|
||||
|
||||
pl_all_on:
|
||||
@@ -2780,11 +3057,11 @@ pl_unknown_mask:
|
||||
pl_loop:
|
||||
;; Loop over each lane and see if we want to do the work for this lane
|
||||
%pl_lane = phi i32 [ 0, %pl_unknown_mask ], [ %pl_nextlane, %pl_loopend ]
|
||||
%pl_lanemask = phi i32 [ 1, %pl_unknown_mask ], [ %pl_nextlanemask, %pl_loopend ]
|
||||
%pl_lanemask = phi i64 [ 1, %pl_unknown_mask ], [ %pl_nextlanemask, %pl_loopend ]
|
||||
|
||||
; is the current lane on? if so, goto do work, otherwise to end of loop
|
||||
%pl_and = and i32 %pl_mask, %pl_lanemask
|
||||
%pl_doit = icmp eq i32 %pl_and, %pl_lanemask
|
||||
%pl_and = and i64 %pl_mask, %pl_lanemask
|
||||
%pl_doit = icmp eq i64 %pl_and, %pl_lanemask
|
||||
br i1 %pl_doit, label %pl_dolane, label %pl_loopend
|
||||
|
||||
pl_dolane:
|
||||
@@ -2795,7 +3072,7 @@ pl_dolane:
|
||||
|
||||
pl_loopend:
|
||||
%pl_nextlane = add i32 %pl_lane, 1
|
||||
%pl_nextlanemask = mul i32 %pl_lanemask, 2
|
||||
%pl_nextlanemask = mul i64 %pl_lanemask, 2
|
||||
|
||||
; are we done yet?
|
||||
%pl_test = icmp ne i32 %pl_nextlane, $1
|
||||
@@ -2880,11 +3157,11 @@ define <$1 x $2> @__gather_base_offsets32_$2(i8 * %ptr, <$1 x i32> %offsets, i32
|
||||
%newDelta = load <$1 x i32> * %deltaPtr
|
||||
|
||||
%ret0 = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr, <$1 x i32> %newOffsets,
|
||||
i32 %offset_scale, <$1 x i32> %offset_delta,
|
||||
i32 %offset_scale, <$1 x i32> %newDelta,
|
||||
<$1 x $2> undef, i32 0)
|
||||
forloop(lane, 1, eval($1-1),
|
||||
`patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt32_$2(i8 * %ptr,
|
||||
<$1 x i32> %newOffsets, i32 %offset_scale, <$1 x i32> %offset_delta,
|
||||
<$1 x i32> %newOffsets, i32 %offset_scale, <$1 x i32> %newDelta,
|
||||
<$1 x $2> %retPREV, i32 LANE)
|
||||
', `LANE', lane), `PREV', eval(lane-1))')
|
||||
ret <$1 x $2> %ret`'eval($1-1)
|
||||
|
||||
712
cbackend.cpp
712
cbackend.cpp
@@ -12,9 +12,7 @@
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifdef LLVM_2_9
|
||||
#warning "The C++ backend isn't supported when building with LLVM 2.9"
|
||||
#else
|
||||
#include <stdio.h>
|
||||
|
||||
#ifndef _MSC_VER
|
||||
#include <inttypes.h>
|
||||
@@ -339,8 +337,6 @@ namespace {
|
||||
bool IsVolatile, unsigned Alignment);
|
||||
|
||||
private :
|
||||
std::string InterpretASMConstraint(InlineAsm::ConstraintInfo& c);
|
||||
|
||||
void lowerIntrinsics(Function &F);
|
||||
/// Prints the definition of the intrinsic function F. Supports the
|
||||
/// intrinsics which need to be explicitly defined in the CBackend.
|
||||
@@ -363,7 +359,7 @@ namespace {
|
||||
bool printConstExprCast(const ConstantExpr *CE, bool Static);
|
||||
void printConstantArray(ConstantArray *CPA, bool Static);
|
||||
void printConstantVector(ConstantVector *CV, bool Static);
|
||||
#ifdef LLVM_3_1svn
|
||||
#ifndef LLVM_3_0
|
||||
void printConstantDataSequential(ConstantDataSequential *CDS, bool Static);
|
||||
#endif
|
||||
|
||||
@@ -440,11 +436,11 @@ namespace {
|
||||
void visitInvokeInst(InvokeInst &I) {
|
||||
llvm_unreachable("Lowerinvoke pass didn't work!");
|
||||
}
|
||||
#if !defined(LLVM_3_1) && !defined(LLVM_3_1svn)
|
||||
#ifdef LLVM_3_0
|
||||
void visitUnwindInst(UnwindInst &I) {
|
||||
llvm_unreachable("Lowerinvoke pass didn't work!");
|
||||
}
|
||||
#endif // !LLVM_3_1svn
|
||||
#endif // LLVM_3_0
|
||||
void visitResumeInst(ResumeInst &I) {
|
||||
llvm_unreachable("DwarfEHPrepare pass didn't work!");
|
||||
}
|
||||
@@ -804,7 +800,7 @@ raw_ostream &CWriter::printType(raw_ostream &Out, Type *Ty,
|
||||
}
|
||||
|
||||
void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
|
||||
#ifndef LLVM_3_1svn
|
||||
#ifdef LLVM_3_0
|
||||
Type *ETy = CPA->getType()->getElementType();
|
||||
// MMP: this looks like a bug: both sides of the || are the same
|
||||
bool isString = ETy == Type::getInt8Ty(CPA->getContext());
|
||||
@@ -857,7 +853,7 @@ void CWriter::printConstantArray(ConstantArray *CPA, bool Static) {
|
||||
Out << "\"";
|
||||
return;
|
||||
}
|
||||
#endif // !LLVM_3_1
|
||||
#endif // LLVM_3_0
|
||||
|
||||
printConstant(cast<Constant>(CPA->getOperand(0)), Static);
|
||||
for (unsigned i = 1, e = CPA->getNumOperands(); i != e; ++i) {
|
||||
@@ -874,7 +870,7 @@ void CWriter::printConstantVector(ConstantVector *CP, bool Static) {
|
||||
}
|
||||
}
|
||||
|
||||
#ifdef LLVM_3_1svn
|
||||
#ifndef LLVM_3_0
|
||||
void CWriter::printConstantDataSequential(ConstantDataSequential *CDS,
|
||||
bool Static) {
|
||||
// As a special case, print the array as a string if it is an array of
|
||||
@@ -931,7 +927,21 @@ void CWriter::printConstantDataSequential(ConstantDataSequential *CDS,
|
||||
}
|
||||
}
|
||||
}
|
||||
#endif // LLVM_3_1svn
|
||||
#endif // !LLVM_3_0
|
||||
|
||||
#ifndef LLVM_3_0
|
||||
static inline std::string ftostr(const APFloat& V) {
|
||||
std::string Buf;
|
||||
if (&V.getSemantics() == &APFloat::IEEEdouble) {
|
||||
raw_string_ostream(Buf) << V.convertToDouble();
|
||||
return Buf;
|
||||
} else if (&V.getSemantics() == &APFloat::IEEEsingle) {
|
||||
raw_string_ostream(Buf) << (double)V.convertToFloat();
|
||||
return Buf;
|
||||
}
|
||||
return "<unknown format in ftostr>"; // error
|
||||
}
|
||||
#endif // !LLVM_3_0
|
||||
|
||||
// isFPCSafeToPrint - Returns true if we may assume that CFP may be written out
|
||||
// textually as a double (rather than as a reference to a stack-allocated
|
||||
@@ -1084,6 +1094,26 @@ bool CWriter::printCast(unsigned opc, Type *SrcTy, Type *DstTy) {
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
// FIXME: generalize this/make it not so hard-coded?
|
||||
static const char *lGetSmearFunc(Type *matchType) {
|
||||
switch (matchType->getTypeID()) {
|
||||
case Type::FloatTyID: return "__smear_float";
|
||||
case Type::DoubleTyID: return "__smear_double";
|
||||
case Type::IntegerTyID: {
|
||||
switch (cast<IntegerType>(matchType)->getBitWidth()) {
|
||||
case 1: return "__smear_i1";
|
||||
case 8: return "__smear_i8";
|
||||
case 16: return "__smear_i16";
|
||||
case 32: return "__smear_i32";
|
||||
case 64: return "__smear_i64";
|
||||
}
|
||||
}
|
||||
default: return NULL;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
// printConstant - The LLVM Constant to C Constant converter.
|
||||
void CWriter::printConstant(Constant *CPV, bool Static) {
|
||||
if (const ConstantExpr *CE = dyn_cast<ConstantExpr>(CPV)) {
|
||||
@@ -1400,11 +1430,11 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
|
||||
}
|
||||
if (ConstantArray *CA = dyn_cast<ConstantArray>(CPV)) {
|
||||
printConstantArray(CA, Static);
|
||||
#ifdef LLVM_3_1svn
|
||||
#ifndef LLVM_3_0
|
||||
} else if (ConstantDataSequential *CDS =
|
||||
dyn_cast<ConstantDataSequential>(CPV)) {
|
||||
printConstantDataSequential(CDS, Static);
|
||||
#endif // LLVM_3_1svn
|
||||
#endif // !LLVM_3_0
|
||||
} else {
|
||||
assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
|
||||
if (AT->getNumElements()) {
|
||||
@@ -1423,30 +1453,68 @@ void CWriter::printConstant(Constant *CPV, bool Static) {
|
||||
Out << ")";
|
||||
break;
|
||||
}
|
||||
case Type::VectorTyID:
|
||||
printType(Out, CPV->getType());
|
||||
Out << "(";
|
||||
case Type::VectorTyID: {
|
||||
VectorType *VT = dyn_cast<VectorType>(CPV->getType());
|
||||
const char *smearFunc = lGetSmearFunc(VT->getElementType());
|
||||
|
||||
if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
|
||||
printConstantVector(CV, Static);
|
||||
#ifdef LLVM_3_1svn
|
||||
} else if (ConstantDataSequential *CDS =
|
||||
dyn_cast<ConstantDataSequential>(CPV)) {
|
||||
printConstantDataSequential(CDS, Static);
|
||||
#endif
|
||||
} else {
|
||||
assert(isa<ConstantAggregateZero>(CPV) || isa<UndefValue>(CPV));
|
||||
VectorType *VT = cast<VectorType>(CPV->getType());
|
||||
if (isa<ConstantAggregateZero>(CPV)) {
|
||||
assert(smearFunc != NULL);
|
||||
|
||||
Constant *CZ = Constant::getNullValue(VT->getElementType());
|
||||
Out << smearFunc << "(";
|
||||
printType(Out, VT);
|
||||
Out << "(), ";
|
||||
printConstant(CZ, Static);
|
||||
Out << ")";
|
||||
}
|
||||
else if (ConstantVector *CV = dyn_cast<ConstantVector>(CPV)) {
|
||||
llvm::Constant *splatValue = CV->getSplatValue();
|
||||
if (splatValue != NULL && smearFunc != NULL) {
|
||||
Out << smearFunc << "(";
|
||||
printType(Out, VT);
|
||||
Out << "(), ";
|
||||
printConstant(splatValue, Static);
|
||||
Out << ")";
|
||||
}
|
||||
else {
|
||||
printType(Out, CPV->getType());
|
||||
Out << "(";
|
||||
printConstantVector(CV, Static);
|
||||
Out << ")";
|
||||
}
|
||||
}
|
||||
#ifndef LLVM_3_0
|
||||
else if (ConstantDataVector *CDV = dyn_cast<ConstantDataVector>(CPV)) {
|
||||
llvm::Constant *splatValue = CDV->getSplatValue();
|
||||
if (splatValue != NULL && smearFunc != NULL) {
|
||||
Out << smearFunc << "(";
|
||||
printType(Out, VT);
|
||||
Out << "(), ";
|
||||
printConstant(splatValue, Static);
|
||||
Out << ")";
|
||||
}
|
||||
else {
|
||||
printType(Out, CPV->getType());
|
||||
Out << "(";
|
||||
printConstantDataSequential(CDV, Static);
|
||||
Out << ")";
|
||||
}
|
||||
}
|
||||
#endif // !LLVM_3_0
|
||||
else {
|
||||
assert(isa<UndefValue>(CPV));
|
||||
Constant *CZ = Constant::getNullValue(VT->getElementType());
|
||||
printType(Out, CPV->getType());
|
||||
Out << "(";
|
||||
printConstant(CZ, Static);
|
||||
for (unsigned i = 1, e = VT->getNumElements(); i != e; ++i) {
|
||||
Out << ", ";
|
||||
printConstant(CZ, Static);
|
||||
}
|
||||
Out << ")";
|
||||
}
|
||||
Out << ")";
|
||||
break;
|
||||
|
||||
}
|
||||
case Type::StructTyID:
|
||||
if (!Static) {
|
||||
// call init func...
|
||||
@@ -1639,7 +1707,12 @@ std::string CWriter::GetValueName(const Value *Operand) {
|
||||
VarName += ch;
|
||||
}
|
||||
|
||||
return VarName + "_llvm_cbe";
|
||||
if (isa<BasicBlock>(Operand))
|
||||
VarName += "_label";
|
||||
else
|
||||
VarName += "_";
|
||||
|
||||
return VarName;
|
||||
}
|
||||
|
||||
/// writeInstComputationInline - Emit the computation for the specified
|
||||
@@ -2071,69 +2144,18 @@ bool CWriter::doInitialization(Module &M) {
|
||||
|
||||
Out << "#include \"" << includeName << "\"\n";
|
||||
|
||||
generateCompilerSpecificCode(Out, TD);
|
||||
|
||||
// Function declarations
|
||||
Out << "\n/* Function Declarations */\n";
|
||||
Out << "\n/* Basic Library Function Declarations */\n";
|
||||
Out << "extern \"C\" {\n";
|
||||
Out << "int puts(unsigned char *);\n";
|
||||
Out << "unsigned int putchar(unsigned int);\n";
|
||||
Out << "int fflush(void *);\n";
|
||||
Out << "int printf(const unsigned char *, ...);\n";
|
||||
Out << "uint8_t *memcpy(uint8_t *, uint8_t *, uint64_t );\n";
|
||||
Out << "uint8_t *memset(uint8_t *, uint8_t, uint64_t );\n";
|
||||
Out << "void memset_pattern16(void *, const void *, uint64_t );\n";
|
||||
Out << "}\n\n";
|
||||
|
||||
// Store the intrinsics which will be declared/defined below.
|
||||
SmallVector<const Function*, 8> intrinsicsToDefine;
|
||||
|
||||
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
|
||||
// Don't print declarations for intrinsic functions.
|
||||
// Store the used intrinsics, which need to be explicitly defined.
|
||||
if (I->isIntrinsic()) {
|
||||
switch (I->getIntrinsicID()) {
|
||||
default:
|
||||
break;
|
||||
case Intrinsic::uadd_with_overflow:
|
||||
case Intrinsic::sadd_with_overflow:
|
||||
intrinsicsToDefine.push_back(I);
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (I->getName() == "setjmp" || I->getName() == "abort" ||
|
||||
I->getName() == "longjmp" || I->getName() == "_setjmp" ||
|
||||
I->getName() == "memset" || I->getName() == "memset_pattern16" ||
|
||||
I->getName() == "puts" ||
|
||||
I->getName() == "printf" || I->getName() == "putchar" ||
|
||||
I->getName() == "fflush" || I->getName() == "malloc" ||
|
||||
I->getName() == "free")
|
||||
continue;
|
||||
|
||||
// Don't redeclare ispc's own intrinsics
|
||||
std::string name = I->getName();
|
||||
if (name.size() > 2 && name[0] == '_' && name[1] == '_')
|
||||
continue;
|
||||
|
||||
if (I->hasExternalWeakLinkage())
|
||||
Out << "extern ";
|
||||
printFunctionSignature(I, true);
|
||||
if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
|
||||
Out << " __ATTRIBUTE_WEAK__";
|
||||
if (I->hasExternalWeakLinkage())
|
||||
Out << " __EXTERNAL_WEAK__";
|
||||
if (StaticCtors.count(I))
|
||||
Out << " __ATTRIBUTE_CTOR__";
|
||||
if (StaticDtors.count(I))
|
||||
Out << " __ATTRIBUTE_DTOR__";
|
||||
if (I->hasHiddenVisibility())
|
||||
Out << " __HIDDEN__";
|
||||
|
||||
if (I->hasName() && I->getName()[0] == 1)
|
||||
Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
|
||||
|
||||
Out << ";\n";
|
||||
}
|
||||
Out << "}\n";
|
||||
generateCompilerSpecificCode(Out, TD);
|
||||
|
||||
// Provide a definition for `bool' if not compiling with a C++ compiler.
|
||||
Out << "\n"
|
||||
@@ -2240,6 +2262,106 @@ bool CWriter::doInitialization(Module &M) {
|
||||
}
|
||||
}
|
||||
|
||||
// Function declarations
|
||||
Out << "\n/* Function Declarations */\n";
|
||||
Out << "extern \"C\" {\n";
|
||||
|
||||
// Store the intrinsics which will be declared/defined below.
|
||||
SmallVector<const Function*, 8> intrinsicsToDefine;
|
||||
|
||||
for (Module::iterator I = M.begin(), E = M.end(); I != E; ++I) {
|
||||
// Don't print declarations for intrinsic functions.
|
||||
// Store the used intrinsics, which need to be explicitly defined.
|
||||
if (I->isIntrinsic()) {
|
||||
switch (I->getIntrinsicID()) {
|
||||
default:
|
||||
break;
|
||||
case Intrinsic::uadd_with_overflow:
|
||||
case Intrinsic::sadd_with_overflow:
|
||||
intrinsicsToDefine.push_back(I);
|
||||
break;
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
if (I->getName() == "setjmp" || I->getName() == "abort" ||
|
||||
I->getName() == "longjmp" || I->getName() == "_setjmp" ||
|
||||
I->getName() == "memset" || I->getName() == "memset_pattern16" ||
|
||||
I->getName() == "puts" ||
|
||||
I->getName() == "printf" || I->getName() == "putchar" ||
|
||||
I->getName() == "fflush" || I->getName() == "malloc" ||
|
||||
I->getName() == "free")
|
||||
continue;
|
||||
|
||||
// Don't redeclare ispc's own intrinsics
|
||||
std::string name = I->getName();
|
||||
if (name.size() > 2 && name[0] == '_' && name[1] == '_')
|
||||
continue;
|
||||
|
||||
if (I->hasExternalWeakLinkage())
|
||||
Out << "extern ";
|
||||
printFunctionSignature(I, true);
|
||||
if (I->hasWeakLinkage() || I->hasLinkOnceLinkage())
|
||||
Out << " __ATTRIBUTE_WEAK__";
|
||||
if (I->hasExternalWeakLinkage())
|
||||
Out << " __EXTERNAL_WEAK__";
|
||||
if (StaticCtors.count(I))
|
||||
Out << " __ATTRIBUTE_CTOR__";
|
||||
if (StaticDtors.count(I))
|
||||
Out << " __ATTRIBUTE_DTOR__";
|
||||
if (I->hasHiddenVisibility())
|
||||
Out << " __HIDDEN__";
|
||||
|
||||
if (I->hasName() && I->getName()[0] == 1)
|
||||
Out << " LLVM_ASM(\"" << I->getName().substr(1) << "\")";
|
||||
|
||||
Out << ";\n";
|
||||
}
|
||||
Out << "}\n\n";
|
||||
|
||||
if (!M.empty())
|
||||
Out << "\n\n/* Function Bodies */\n";
|
||||
|
||||
// Emit some helper functions for dealing with FCMP instruction's
|
||||
// predicates
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ord(A X, B Y) { ";
|
||||
Out << "return X == X && Y == Y; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_uno(A X, B Y) { ";
|
||||
Out << "return X != X || Y != Y; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ueq(A X, B Y) { ";
|
||||
Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_une(A X, B Y) { ";
|
||||
Out << "return X != Y; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ult(A X, B Y) { ";
|
||||
Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ugt(A X, B Y) { ";
|
||||
Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ule(A X, B Y) { ";
|
||||
Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_uge(A X, B Y) { ";
|
||||
Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_oeq(A X, B Y) { ";
|
||||
Out << "return X == Y ; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_one(A X, B Y) { ";
|
||||
Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_olt(A X, B Y) { ";
|
||||
Out << "return X < Y ; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ogt(A X, B Y) { ";
|
||||
Out << "return X > Y ; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ole(A X, B Y) { ";
|
||||
Out << "return X <= Y ; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_oge(A X, B Y) { ";
|
||||
Out << "return X >= Y ; }\n";
|
||||
Out << "template <typename A> A *Memset(A *ptr, int count, size_t len) { ";
|
||||
Out << "return (A *)memset(ptr, count, len); }\n";
|
||||
|
||||
// Emit definitions of the intrinsics.
|
||||
for (SmallVector<const Function*, 8>::const_iterator
|
||||
I = intrinsicsToDefine.begin(),
|
||||
E = intrinsicsToDefine.end(); I != E; ++I) {
|
||||
printIntrinsicDefinition(**I, Out);
|
||||
}
|
||||
|
||||
// Output the global variable definitions and contents...
|
||||
if (!M.global_empty()) {
|
||||
Out << "\n\n/* Global Variable Definitions and Initialization */\n";
|
||||
@@ -2303,49 +2425,6 @@ bool CWriter::doInitialization(Module &M) {
|
||||
}
|
||||
}
|
||||
|
||||
if (!M.empty())
|
||||
Out << "\n\n/* Function Bodies */\n";
|
||||
|
||||
// Emit some helper functions for dealing with FCMP instruction's
|
||||
// predicates
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ord(A X, B Y) { ";
|
||||
Out << "return X == X && Y == Y; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_uno(A X, B Y) { ";
|
||||
Out << "return X != X || Y != Y; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ueq(A X, B Y) { ";
|
||||
Out << "return X == Y || llvm_fcmp_uno(X, Y); }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_une(A X, B Y) { ";
|
||||
Out << "return X != Y; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ult(A X, B Y) { ";
|
||||
Out << "return X < Y || llvm_fcmp_uno(X, Y); }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ugt(A X, B Y) { ";
|
||||
Out << "return X > Y || llvm_fcmp_uno(X, Y); }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ule(A X, B Y) { ";
|
||||
Out << "return X <= Y || llvm_fcmp_uno(X, Y); }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_uge(A X, B Y) { ";
|
||||
Out << "return X >= Y || llvm_fcmp_uno(X, Y); }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_oeq(A X, B Y) { ";
|
||||
Out << "return X == Y ; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_one(A X, B Y) { ";
|
||||
Out << "return X != Y && llvm_fcmp_ord(X, Y); }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_olt(A X, B Y) { ";
|
||||
Out << "return X < Y ; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ogt(A X, B Y) { ";
|
||||
Out << "return X > Y ; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_ole(A X, B Y) { ";
|
||||
Out << "return X <= Y ; }\n";
|
||||
Out << "template <typename A, typename B> static inline int llvm_fcmp_oge(A X, B Y) { ";
|
||||
Out << "return X >= Y ; }\n";
|
||||
Out << "template <typename A> A *Memset(A *ptr, int count, size_t len) { ";
|
||||
Out << "return (A *)memset(ptr, count, len); }\n";
|
||||
|
||||
// Emit definitions of the intrinsics.
|
||||
for (SmallVector<const Function*, 8>::const_iterator
|
||||
I = intrinsicsToDefine.begin(),
|
||||
E = intrinsicsToDefine.end(); I != E; ++I) {
|
||||
printIntrinsicDefinition(**I, Out);
|
||||
}
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
@@ -2823,17 +2902,17 @@ void CWriter::visitSwitchInst(SwitchInst &SI) {
|
||||
printBranchToBlock(SI.getParent(), SI.getDefaultDest(), 2);
|
||||
Out << ";\n";
|
||||
|
||||
#ifdef LLVM_3_1svn
|
||||
for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
|
||||
ConstantInt* CaseVal = i.getCaseValue();
|
||||
BasicBlock* Succ = i.getCaseSuccessor();
|
||||
#else
|
||||
#ifdef LLVM_3_0
|
||||
// Skip the first item since that's the default case.
|
||||
unsigned NumCases = SI.getNumCases();
|
||||
for (unsigned i = 1; i < NumCases; ++i) {
|
||||
ConstantInt* CaseVal = SI.getCaseValue(i);
|
||||
BasicBlock* Succ = SI.getSuccessor(i);
|
||||
#endif // LLVM_3_1svn
|
||||
#else
|
||||
for (SwitchInst::CaseIt i = SI.case_begin(), e = SI.case_end(); i != e; ++i) {
|
||||
ConstantInt* CaseVal = i.getCaseValue();
|
||||
BasicBlock* Succ = i.getCaseSuccessor();
|
||||
#endif // !LLVM_3_0
|
||||
Out << " case ";
|
||||
writeOperand(CaseVal);
|
||||
Out << ":\n";
|
||||
@@ -3401,6 +3480,7 @@ void CWriter::lowerIntrinsics(Function &F) {
|
||||
case Intrinsic::ppc_altivec_lvsl:
|
||||
case Intrinsic::uadd_with_overflow:
|
||||
case Intrinsic::sadd_with_overflow:
|
||||
case Intrinsic::trap:
|
||||
// We directly implement these intrinsics
|
||||
break;
|
||||
default:
|
||||
@@ -3568,7 +3648,9 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
|
||||
// If this is an intrinsic that directly corresponds to a GCC
|
||||
// builtin, we emit it here.
|
||||
const char *BuiltinName = "";
|
||||
#ifdef LLVM_3_0
|
||||
Function *F = I.getCalledFunction();
|
||||
#endif // LLVM_3_0
|
||||
#define GET_GCC_BUILTIN_NAME
|
||||
#include "llvm/Intrinsics.gen"
|
||||
#undef GET_GCC_BUILTIN_NAME
|
||||
@@ -3711,184 +3793,17 @@ bool CWriter::visitBuiltinCall(CallInst &I, Intrinsic::ID ID,
|
||||
writeOperand(I.getArgOperand(1));
|
||||
Out << ")";
|
||||
return true;
|
||||
case Intrinsic::trap:
|
||||
Out << "abort()";
|
||||
return true;
|
||||
}
|
||||
}
|
||||
|
||||
//This converts the llvm constraint string to something gcc is expecting.
|
||||
//TODO: work out platform independent constraints and factor those out
|
||||
// of the per target tables
|
||||
// handle multiple constraint codes
|
||||
std::string CWriter::InterpretASMConstraint(InlineAsm::ConstraintInfo& c) {
|
||||
assert(c.Codes.size() == 1 && "Too many asm constraint codes to handle");
|
||||
|
||||
// Grab the translation table from MCAsmInfo if it exists.
|
||||
const MCAsmInfo *TargetAsm;
|
||||
std::string Triple = TheModule->getTargetTriple();
|
||||
if (Triple.empty())
|
||||
#if defined(LLVM_3_1) || defined(LLVM_3_1svn)
|
||||
Triple = llvm::sys::getDefaultTargetTriple();
|
||||
#else
|
||||
Triple = llvm::sys::getHostTriple();
|
||||
#endif
|
||||
|
||||
std::string E;
|
||||
if (const llvm::Target *Match = TargetRegistry::lookupTarget(Triple, E))
|
||||
TargetAsm = Match->createMCAsmInfo(Triple);
|
||||
else
|
||||
return c.Codes[0];
|
||||
|
||||
const char *const *table = TargetAsm->getAsmCBE();
|
||||
|
||||
// Search the translation table if it exists.
|
||||
for (int i = 0; table && table[i]; i += 2)
|
||||
if (c.Codes[0] == table[i]) {
|
||||
delete TargetAsm;
|
||||
return table[i+1];
|
||||
}
|
||||
|
||||
// Default is identity.
|
||||
delete TargetAsm;
|
||||
return c.Codes[0];
|
||||
}
|
||||
|
||||
//TODO: import logic from AsmPrinter.cpp
|
||||
static std::string gccifyAsm(std::string asmstr) {
|
||||
for (std::string::size_type i = 0; i != asmstr.size(); ++i)
|
||||
if (asmstr[i] == '\n')
|
||||
asmstr.replace(i, 1, "\\n");
|
||||
else if (asmstr[i] == '\t')
|
||||
asmstr.replace(i, 1, "\\t");
|
||||
else if (asmstr[i] == '$') {
|
||||
if (asmstr[i + 1] == '{') {
|
||||
std::string::size_type a = asmstr.find_first_of(':', i + 1);
|
||||
std::string::size_type b = asmstr.find_first_of('}', i + 1);
|
||||
std::string n = "%" +
|
||||
asmstr.substr(a + 1, b - a - 1) +
|
||||
asmstr.substr(i + 2, a - i - 2);
|
||||
asmstr.replace(i, b - i + 1, n);
|
||||
i += n.size() - 1;
|
||||
} else
|
||||
asmstr.replace(i, 1, "%");
|
||||
}
|
||||
else if (asmstr[i] == '%')//grr
|
||||
{ asmstr.replace(i, 1, "%%"); ++i;}
|
||||
|
||||
return asmstr;
|
||||
}
|
||||
|
||||
//TODO: assumptions about what consume arguments from the call are likely wrong
|
||||
// handle communitivity
|
||||
void CWriter::visitInlineAsm(CallInst &CI) {
|
||||
InlineAsm* as = cast<InlineAsm>(CI.getCalledValue());
|
||||
InlineAsm::ConstraintInfoVector Constraints = as->ParseConstraints();
|
||||
|
||||
std::vector<std::pair<Value*, int> > ResultVals;
|
||||
if (CI.getType() == Type::getVoidTy(CI.getContext()))
|
||||
;
|
||||
else if (StructType *ST = dyn_cast<StructType>(CI.getType())) {
|
||||
for (unsigned i = 0, e = ST->getNumElements(); i != e; ++i)
|
||||
ResultVals.push_back(std::make_pair(&CI, (int)i));
|
||||
} else {
|
||||
ResultVals.push_back(std::make_pair(&CI, -1));
|
||||
}
|
||||
|
||||
// Fix up the asm string for gcc and emit it.
|
||||
Out << "__asm__ volatile (\"" << gccifyAsm(as->getAsmString()) << "\"\n";
|
||||
Out << " :";
|
||||
|
||||
unsigned ValueCount = 0;
|
||||
bool IsFirst = true;
|
||||
|
||||
// Convert over all the output constraints.
|
||||
for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
|
||||
E = Constraints.end(); I != E; ++I) {
|
||||
|
||||
if (I->Type != InlineAsm::isOutput) {
|
||||
++ValueCount;
|
||||
continue; // Ignore non-output constraints.
|
||||
}
|
||||
|
||||
assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
|
||||
std::string C = InterpretASMConstraint(*I);
|
||||
if (C.empty()) continue;
|
||||
|
||||
if (!IsFirst) {
|
||||
Out << ", ";
|
||||
IsFirst = false;
|
||||
}
|
||||
|
||||
// Unpack the dest.
|
||||
Value *DestVal;
|
||||
int DestValNo = -1;
|
||||
|
||||
if (ValueCount < ResultVals.size()) {
|
||||
DestVal = ResultVals[ValueCount].first;
|
||||
DestValNo = ResultVals[ValueCount].second;
|
||||
} else
|
||||
DestVal = CI.getArgOperand(ValueCount-ResultVals.size());
|
||||
|
||||
if (I->isEarlyClobber)
|
||||
C = "&"+C;
|
||||
|
||||
Out << "\"=" << C << "\"(" << GetValueName(DestVal);
|
||||
if (DestValNo != -1)
|
||||
Out << ".field" << DestValNo; // Multiple retvals.
|
||||
Out << ")";
|
||||
++ValueCount;
|
||||
}
|
||||
|
||||
|
||||
// Convert over all the input constraints.
|
||||
Out << "\n :";
|
||||
IsFirst = true;
|
||||
ValueCount = 0;
|
||||
for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
|
||||
E = Constraints.end(); I != E; ++I) {
|
||||
if (I->Type != InlineAsm::isInput) {
|
||||
++ValueCount;
|
||||
continue; // Ignore non-input constraints.
|
||||
}
|
||||
|
||||
assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
|
||||
std::string C = InterpretASMConstraint(*I);
|
||||
if (C.empty()) continue;
|
||||
|
||||
if (!IsFirst) {
|
||||
Out << ", ";
|
||||
IsFirst = false;
|
||||
}
|
||||
|
||||
assert(ValueCount >= ResultVals.size() && "Input can't refer to result");
|
||||
Value *SrcVal = CI.getArgOperand(ValueCount-ResultVals.size());
|
||||
|
||||
Out << "\"" << C << "\"(";
|
||||
if (!I->isIndirect)
|
||||
writeOperand(SrcVal);
|
||||
else
|
||||
writeOperandDeref(SrcVal);
|
||||
Out << ")";
|
||||
}
|
||||
|
||||
// Convert over the clobber constraints.
|
||||
IsFirst = true;
|
||||
for (InlineAsm::ConstraintInfoVector::iterator I = Constraints.begin(),
|
||||
E = Constraints.end(); I != E; ++I) {
|
||||
if (I->Type != InlineAsm::isClobber)
|
||||
continue; // Ignore non-input constraints.
|
||||
|
||||
assert(I->Codes.size() == 1 && "Too many asm constraint codes to handle");
|
||||
std::string C = InterpretASMConstraint(*I);
|
||||
if (C.empty()) continue;
|
||||
|
||||
if (!IsFirst) {
|
||||
Out << ", ";
|
||||
IsFirst = false;
|
||||
}
|
||||
|
||||
Out << '\"' << C << '"';
|
||||
}
|
||||
|
||||
Out << ")";
|
||||
assert(!"Inline assembly not supported");
|
||||
}
|
||||
|
||||
void CWriter::visitAllocaInst(AllocaInst &I) {
|
||||
@@ -4240,14 +4155,14 @@ void CWriter::visitAtomicCmpXchgInst(AtomicCmpXchgInst &ACXI) {
|
||||
|
||||
class SmearCleanupPass : public llvm::BasicBlockPass {
|
||||
public:
|
||||
SmearCleanupPass(llvm::Module *m, int width)
|
||||
SmearCleanupPass(Module *m, int width)
|
||||
: BasicBlockPass(ID) { module = m; vectorWidth = width; }
|
||||
|
||||
const char *getPassName() const { return "Smear Cleanup Pass"; }
|
||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||
|
||||
static char ID;
|
||||
llvm::Module *module;
|
||||
Module *module;
|
||||
int vectorWidth;
|
||||
};
|
||||
|
||||
@@ -4303,41 +4218,28 @@ SmearCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
assert(toMatch != NULL);
|
||||
|
||||
{
|
||||
// FIXME: generalize this/make it not so hard-coded?
|
||||
Type *matchType = toMatch->getType();
|
||||
const char *smearFuncName = NULL;
|
||||
|
||||
switch (matchType->getTypeID()) {
|
||||
case Type::FloatTyID: smearFuncName = "__smear_float"; break;
|
||||
case Type::DoubleTyID: smearFuncName = "__smear_double"; break;
|
||||
case Type::IntegerTyID: {
|
||||
switch (cast<IntegerType>(matchType)->getBitWidth()) {
|
||||
case 8: smearFuncName = "__smear_i8"; break;
|
||||
case 16: smearFuncName = "__smear_i16"; break;
|
||||
case 32: smearFuncName = "__smear_i32"; break;
|
||||
case 64: smearFuncName = "__smear_i64"; break;
|
||||
}
|
||||
}
|
||||
default: break;
|
||||
}
|
||||
const char *smearFuncName = lGetSmearFunc(matchType);
|
||||
|
||||
if (smearFuncName != NULL) {
|
||||
Function *smearFunc = module->getFunction(smearFuncName);
|
||||
if (smearFunc == NULL) {
|
||||
Constant *sf =
|
||||
module->getOrInsertFunction(smearFuncName, iter->getType(),
|
||||
matchType, NULL);
|
||||
iter->getType(), matchType, NULL);
|
||||
smearFunc = dyn_cast<Function>(sf);
|
||||
assert(smearFunc != NULL);
|
||||
smearFunc->setDoesNotThrow(true);
|
||||
smearFunc->setDoesNotAccessMemory(true);
|
||||
}
|
||||
|
||||
|
||||
llvm::Value *undefResult = llvm::UndefValue::get(vt);
|
||||
assert(smearFunc != NULL);
|
||||
Value *args[1] = { toMatch };
|
||||
ArrayRef<llvm::Value *> argArray(&args[0], &args[1]);
|
||||
Value *args[2] = { undefResult, toMatch };
|
||||
ArrayRef<llvm::Value *> argArray(&args[0], &args[2]);
|
||||
Instruction *smearCall =
|
||||
CallInst::Create(smearFunc, argArray, "smear", (Instruction *)NULL);
|
||||
CallInst::Create(smearFunc, argArray, LLVMGetName(toMatch, "_smear"),
|
||||
(Instruction *)NULL);
|
||||
|
||||
ReplaceInstWithInst(iter, smearCall);
|
||||
|
||||
@@ -4401,6 +4303,155 @@ BitcastCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
return modifiedAny;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// MaskOpsCleanupPass
|
||||
|
||||
/** This pass does various peephole improvements to mask modification
|
||||
operations. In particular, it converts mask XORs with "all true" to
|
||||
calls to __not() and replaces operations like and(not(a), b) to
|
||||
__and_not1(a, b) (and similarly if the second operand has not applied
|
||||
to it...)
|
||||
*/
|
||||
class MaskOpsCleanupPass : public llvm::BasicBlockPass {
|
||||
public:
|
||||
MaskOpsCleanupPass(Module *m)
|
||||
: BasicBlockPass(ID) {
|
||||
Type *mt = LLVMTypes::MaskType;
|
||||
|
||||
// Declare the __not, __and_not1, and __and_not2 functions that we
|
||||
// expect the target to end up providing.
|
||||
notFunc =
|
||||
dyn_cast<Function>(m->getOrInsertFunction("__not", mt, mt, NULL));
|
||||
assert(notFunc != NULL);
|
||||
notFunc->addFnAttr(Attribute::NoUnwind);
|
||||
notFunc->addFnAttr(Attribute::ReadNone);
|
||||
|
||||
andNotFuncs[0] =
|
||||
dyn_cast<Function>(m->getOrInsertFunction("__and_not1", mt, mt, mt,
|
||||
NULL));
|
||||
assert(andNotFuncs[0] != NULL);
|
||||
andNotFuncs[0]->addFnAttr(Attribute::NoUnwind);
|
||||
andNotFuncs[0]->addFnAttr(Attribute::ReadNone);
|
||||
|
||||
andNotFuncs[1] =
|
||||
dyn_cast<Function>(m->getOrInsertFunction("__and_not2", mt, mt, mt,
|
||||
NULL));
|
||||
assert(andNotFuncs[1] != NULL);
|
||||
andNotFuncs[1]->addFnAttr(Attribute::NoUnwind);
|
||||
andNotFuncs[1]->addFnAttr(Attribute::ReadNone);
|
||||
}
|
||||
|
||||
const char *getPassName() const { return "MaskOps Cleanup Pass"; }
|
||||
bool runOnBasicBlock(llvm::BasicBlock &BB);
|
||||
|
||||
private:
|
||||
Value *lGetNotOperand(Value *v) const;
|
||||
|
||||
Function *notFunc, *andNotFuncs[2];
|
||||
|
||||
static char ID;
|
||||
};
|
||||
|
||||
char MaskOpsCleanupPass::ID = 0;
|
||||
|
||||
|
||||
/** Returns true if the given value is a compile-time constant vector of
|
||||
i1s with all elements 'true'.
|
||||
*/
|
||||
static bool
|
||||
lIsAllTrue(Value *v) {
|
||||
if (ConstantVector *cv = dyn_cast<ConstantVector>(v)) {
|
||||
ConstantInt *ci;
|
||||
return (cv->getSplatValue() != NULL &&
|
||||
(ci = dyn_cast<ConstantInt>(cv->getSplatValue())) != NULL &&
|
||||
ci->isOne());
|
||||
}
|
||||
|
||||
#ifndef LLVM_3_0
|
||||
if (ConstantDataVector *cdv = dyn_cast<ConstantDataVector>(v)) {
|
||||
ConstantInt *ci;
|
||||
return (cdv->getSplatValue() != NULL &&
|
||||
(ci = dyn_cast<ConstantInt>(cdv->getSplatValue())) != NULL &&
|
||||
ci->isOne());
|
||||
}
|
||||
#endif
|
||||
|
||||
return false;
|
||||
}
|
||||
|
||||
|
||||
/** Checks to see if the given value is the NOT of some other value. If
|
||||
so, it returns the operand of the NOT; otherwise returns NULL.
|
||||
*/
|
||||
Value *
|
||||
MaskOpsCleanupPass::lGetNotOperand(Value *v) const {
|
||||
if (CallInst *ci = dyn_cast<CallInst>(v))
|
||||
if (ci->getCalledFunction() == notFunc)
|
||||
// Direct call to __not()
|
||||
return ci->getArgOperand(0);
|
||||
|
||||
if (BinaryOperator *bop = dyn_cast<BinaryOperator>(v))
|
||||
if (bop->getOpcode() == Instruction::Xor &&
|
||||
lIsAllTrue(bop->getOperand(1)))
|
||||
// XOR of all-true vector.
|
||||
return bop->getOperand(0);
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
MaskOpsCleanupPass::runOnBasicBlock(llvm::BasicBlock &bb) {
|
||||
bool modifiedAny = false;
|
||||
|
||||
restart:
|
||||
for (BasicBlock::iterator iter = bb.begin(), e = bb.end(); iter != e; ++iter) {
|
||||
BinaryOperator *bop = dyn_cast<BinaryOperator>(&*iter);
|
||||
if (bop == NULL)
|
||||
continue;
|
||||
|
||||
if (bop->getType() != LLVMTypes::MaskType)
|
||||
continue;
|
||||
|
||||
if (bop->getOpcode() == Instruction::Xor) {
|
||||
// Check for XOR with all-true values
|
||||
if (lIsAllTrue(bop->getOperand(1))) {
|
||||
ArrayRef<Value *> arg(bop->getOperand(0));
|
||||
CallInst *notCall = CallInst::Create(notFunc, arg,
|
||||
bop->getName());
|
||||
ReplaceInstWithInst(iter, notCall);
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
else if (bop->getOpcode() == Instruction::And) {
|
||||
// Check each of the operands to see if they have NOT applied
|
||||
// to them.
|
||||
for (int i = 0; i < 2; ++i) {
|
||||
if (Value *notOp = lGetNotOperand(bop->getOperand(i))) {
|
||||
// In notOp we have the target of the NOT operation;
|
||||
// put it in its appropriate spot in the operand array.
|
||||
// Copy in the other operand directly.
|
||||
Value *args[2];
|
||||
args[i] = notOp;
|
||||
args[i ^ 1] = bop->getOperand(i ^ 1);
|
||||
ArrayRef<Value *> argsRef(&args[0], 2);
|
||||
|
||||
// Call the appropriate __and_not* function.
|
||||
CallInst *andNotCall =
|
||||
CallInst::Create(andNotFuncs[i], argsRef, bop->getName());
|
||||
|
||||
ReplaceInstWithInst(iter, andNotCall);
|
||||
modifiedAny = true;
|
||||
goto restart;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return modifiedAny;
|
||||
}
|
||||
|
||||
|
||||
//===----------------------------------------------------------------------===//
|
||||
// External Interface declaration
|
||||
@@ -4432,6 +4483,7 @@ WriteCXXFile(llvm::Module *module, const char *fn, int vectorWidth,
|
||||
pm.add(createCFGSimplificationPass()); // clean up after lower invoke.
|
||||
pm.add(new SmearCleanupPass(module, vectorWidth));
|
||||
pm.add(new BitcastCleanupPass);
|
||||
pm.add(new MaskOpsCleanupPass(module));
|
||||
pm.add(createDeadCodeEliminationPass()); // clean up after smear pass
|
||||
//CO pm.add(createPrintModulePass(&fos));
|
||||
pm.add(new CWriter(fos, includeName, vectorWidth));
|
||||
@@ -4442,5 +4494,3 @@ WriteCXXFile(llvm::Module *module, const char *fn, int vectorWidth,
|
||||
|
||||
return true;
|
||||
}
|
||||
|
||||
#endif // LLVM_2_9
|
||||
|
||||
@@ -17,7 +17,7 @@ syn keyword ispcStatement cbreak ccontinue creturn launch print reference soa sy
|
||||
syn keyword ispcConditional cif
|
||||
syn keyword ispcRepeat cdo cfor cwhile
|
||||
syn keyword ispcBuiltin programCount programIndex
|
||||
syn keyword ispcType export int8 int16 int32 int64
|
||||
syn keyword ispcType export uniform varying int8 int16 int32 int64
|
||||
|
||||
" Default highlighting
|
||||
command -nargs=+ HiLink hi def link <args>
|
||||
|
||||
8
contrib/ispc.vim.README
Normal file
8
contrib/ispc.vim.README
Normal file
@@ -0,0 +1,8 @@
|
||||
To install vim syntax highlighting for ispc files:
|
||||
|
||||
1) Copy ispc.vim into ~/.vim/syntax/ispc.vim (create if necessary)
|
||||
2) Create a filetype for ispc files to correspond to that syntax file
|
||||
To do this, create and append the following line to ~/.vim/ftdetect/ispc.vim
|
||||
|
||||
au BufRead,BufNewFile *.ispc set filetype=ispc
|
||||
|
||||
32
ctx.h
32
ctx.h
@@ -248,6 +248,10 @@ public:
|
||||
new basic block that it starts. */
|
||||
llvm::BasicBlock *GetLabeledBasicBlock(const std::string &label);
|
||||
|
||||
/** Returns a vector of all labels in the context. This is
|
||||
simply the key set of the labelMap */
|
||||
std::vector<std::string> GetLabels();
|
||||
|
||||
/** Called to generate code for 'return' statement; value is the
|
||||
expression in the return statement (if non-NULL), and
|
||||
doCoherenceCheck indicates whether instructions should be generated
|
||||
@@ -272,7 +276,7 @@ public:
|
||||
llvm::Value *None(llvm::Value *mask);
|
||||
|
||||
/** Given a boolean mask value of type LLVMTypes::MaskType, return an
|
||||
i32 value wherein the i'th bit is on if and only if the i'th lane
|
||||
i64 value wherein the i'th bit is on if and only if the i'th lane
|
||||
of the mask is on. */
|
||||
llvm::Value *LaneMask(llvm::Value *mask);
|
||||
|
||||
@@ -338,7 +342,7 @@ public:
|
||||
|
||||
/** Emits debugging information for the function parameter represented
|
||||
by sym. */
|
||||
void EmitFunctionParameterDebugInfo(Symbol *sym);
|
||||
void EmitFunctionParameterDebugInfo(Symbol *sym, int parameterNum);
|
||||
/** @} */
|
||||
|
||||
/** @name IR instruction emission
|
||||
@@ -380,23 +384,23 @@ public:
|
||||
array, for pointer types). */
|
||||
llvm::Value *SmearUniform(llvm::Value *value, const char *name = NULL);
|
||||
|
||||
llvm::Value *BitCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Value *BitCastInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Value *PtrToIntInst(llvm::Value *value, const char *name = NULL);
|
||||
llvm::Value *PtrToIntInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Value *PtrToIntInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Value *IntToPtrInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Value *IntToPtrInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
|
||||
llvm::Instruction *TruncInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Instruction *TruncInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
|
||||
LLVM_TYPE_CONST llvm::Type *type, const char *name = NULL);
|
||||
llvm::Instruction *FPCastInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Type *type, const char *name = NULL);
|
||||
llvm::Instruction *FPCastInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *SExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Instruction *SExtInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *ZExtInst(llvm::Value *value, LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Instruction *ZExtInst(llvm::Value *value, llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
|
||||
/** Given two integer-typed values (but possibly one vector and the
|
||||
@@ -448,7 +452,7 @@ public:
|
||||
instruction is added at the start of the function in the entry
|
||||
basic block; if it should be added to the current basic block, then
|
||||
the atEntryBlock parameter should be false. */
|
||||
llvm::Value *AllocaInst(LLVM_TYPE_CONST llvm::Type *llvmType,
|
||||
llvm::Value *AllocaInst(llvm::Type *llvmType,
|
||||
const char *name = NULL, int align = 0,
|
||||
bool atEntryBlock = true);
|
||||
|
||||
@@ -485,7 +489,7 @@ public:
|
||||
llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
|
||||
const char *name = NULL);
|
||||
|
||||
llvm::PHINode *PhiNode(LLVM_TYPE_CONST llvm::Type *type, int count,
|
||||
llvm::PHINode *PhiNode(llvm::Type *type, int count,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0,
|
||||
llvm::Value *val1, const char *name = NULL);
|
||||
@@ -632,12 +636,12 @@ private:
|
||||
std::vector<CFInfo *> controlFlowInfo;
|
||||
|
||||
/** DIFile object corresponding to the source file where the current
|
||||
function was defined (used for debugging info0. */
|
||||
function was defined (used for debugging info). */
|
||||
llvm::DIFile diFile;
|
||||
|
||||
/** DISubprogram corresponding to this function (used for debugging
|
||||
info). */
|
||||
llvm::DISubprogram diFunction;
|
||||
llvm::DISubprogram diSubprogram;
|
||||
|
||||
/** These correspond to the current set of nested scopes in the
|
||||
function. */
|
||||
|
||||
484
decl.cpp
484
decl.cpp
@@ -33,7 +33,7 @@
|
||||
|
||||
/** @file decl.cpp
|
||||
@brief Implementations of classes related to turning declarations into
|
||||
symbols and types.
|
||||
symbol names and types.
|
||||
*/
|
||||
|
||||
#include "decl.h"
|
||||
@@ -44,6 +44,7 @@
|
||||
#include "stmt.h"
|
||||
#include "expr.h"
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <set>
|
||||
|
||||
static void
|
||||
@@ -55,6 +56,7 @@ lPrintTypeQualifiers(int typeQualifiers) {
|
||||
if (typeQualifiers & TYPEQUAL_TASK) printf("task ");
|
||||
if (typeQualifiers & TYPEQUAL_SIGNED) printf("signed ");
|
||||
if (typeQualifiers & TYPEQUAL_UNSIGNED) printf("unsigned ");
|
||||
if (typeQualifiers & TYPEQUAL_EXPORT) printf("export ");
|
||||
}
|
||||
|
||||
|
||||
@@ -134,7 +136,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
|
||||
}
|
||||
|
||||
if (vectorSize > 0) {
|
||||
const AtomicType *atomicType = dynamic_cast<const AtomicType *>(retType);
|
||||
const AtomicType *atomicType = CastType<AtomicType>(retType);
|
||||
if (atomicType == NULL) {
|
||||
Error(pos, "Only atomic types (int, float, ...) are legal for vector "
|
||||
"types.");
|
||||
@@ -146,7 +148,7 @@ DeclSpecs::GetBaseType(SourcePos pos) const {
|
||||
retType = lApplyTypeQualifiers(typeQualifiers, retType, pos);
|
||||
|
||||
if (soaWidth > 0) {
|
||||
const StructType *st = dynamic_cast<const StructType *>(retType);
|
||||
const StructType *st = CastType<StructType>(retType);
|
||||
|
||||
if (st == NULL) {
|
||||
Error(pos, "Illegal to provide soa<%d> qualifier with non-struct "
|
||||
@@ -188,7 +190,6 @@ lGetStorageClassName(StorageClass storageClass) {
|
||||
case SC_NONE: return "";
|
||||
case SC_EXTERN: return "extern";
|
||||
case SC_EXTERN_C: return "extern \"C\"";
|
||||
case SC_EXPORT: return "export";
|
||||
case SC_STATIC: return "static";
|
||||
case SC_TYPEDEF: return "typedef";
|
||||
default: FATAL("Unhandled storage class in lGetStorageClassName");
|
||||
@@ -217,50 +218,44 @@ Declarator::Declarator(DeclaratorKind dk, SourcePos p)
|
||||
: pos(p), kind(dk) {
|
||||
child = NULL;
|
||||
typeQualifiers = 0;
|
||||
storageClass = SC_NONE;
|
||||
arraySize = -1;
|
||||
sym = NULL;
|
||||
type = NULL;
|
||||
initExpr = NULL;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Declarator::InitFromDeclSpecs(DeclSpecs *ds) {
|
||||
const Type *t = GetType(ds);
|
||||
if (t == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
const Type *baseType = ds->GetBaseType(pos);
|
||||
InitFromType(baseType, ds);
|
||||
|
||||
if (type == NULL) {
|
||||
AssertPos(pos, m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
Symbol *sym = GetSymbol();
|
||||
if (sym != NULL) {
|
||||
sym->type = t;
|
||||
sym->storageClass = ds->storageClass;
|
||||
storageClass = ds->storageClass;
|
||||
|
||||
if (ds->declSpecList.size() > 0 &&
|
||||
CastType<FunctionType>(type) == NULL) {
|
||||
Error(pos, "__declspec specifiers for non-function type \"%s\" are "
|
||||
"not used.", type->GetString().c_str());
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
Symbol *
|
||||
Declarator::GetSymbol() const {
|
||||
// The symbol lives at the last child in the chain, so walk down there
|
||||
// and return the one there.
|
||||
const Declarator *d = this;
|
||||
while (d->child != NULL)
|
||||
d = d->child;
|
||||
return d->sym;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Declarator::Print(int indent) const {
|
||||
printf("%*cdeclarator: [", indent, ' ');
|
||||
pos.Print();
|
||||
|
||||
lPrintTypeQualifiers(typeQualifiers);
|
||||
Symbol *sym = GetSymbol();
|
||||
if (sym != NULL)
|
||||
printf("%s", sym->name.c_str());
|
||||
printf("%s ", lGetStorageClassName(storageClass));
|
||||
if (name.size() > 0)
|
||||
printf("%s", name.c_str());
|
||||
else
|
||||
printf("(null symbol)");
|
||||
printf("(unnamed)");
|
||||
|
||||
printf(", array size = %d", arraySize);
|
||||
|
||||
@@ -294,66 +289,26 @@ Declarator::Print(int indent) const {
|
||||
}
|
||||
|
||||
|
||||
Symbol *
|
||||
Declarator::GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *funArgs) {
|
||||
const FunctionType *type =
|
||||
dynamic_cast<const FunctionType *>(GetType(ds));
|
||||
if (type == NULL)
|
||||
return NULL;
|
||||
|
||||
Symbol *declSym = GetSymbol();
|
||||
Assert(declSym != NULL);
|
||||
|
||||
// Get the symbol for the function from the symbol table. (It should
|
||||
// already have been added to the symbol table by AddGlobal() by the
|
||||
// time we get here.)
|
||||
Symbol *funSym = m->symbolTable->LookupFunction(declSym->name.c_str(), type);
|
||||
if (funSym == NULL)
|
||||
// May be NULL due to error earlier in compilation
|
||||
Assert(m->errorCount > 0);
|
||||
else
|
||||
funSym->pos = pos;
|
||||
|
||||
// Walk down to the declarator for the function. (We have to get past
|
||||
// the stuff that specifies the function's return type before we get to
|
||||
// the function's declarator.)
|
||||
Declarator *d = this;
|
||||
while (d != NULL && d->kind != DK_FUNCTION)
|
||||
d = d->child;
|
||||
Assert(d != NULL);
|
||||
|
||||
for (unsigned int i = 0; i < d->functionParams.size(); ++i) {
|
||||
Symbol *sym = d->GetSymbolForFunctionParameter(i);
|
||||
if (sym->type == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
else
|
||||
sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
|
||||
|
||||
funArgs->push_back(sym);
|
||||
}
|
||||
|
||||
if (funSym != NULL)
|
||||
funSym->type = funSym->type->ResolveUnboundVariability(Variability::Varying);
|
||||
|
||||
return funSym;
|
||||
}
|
||||
|
||||
|
||||
const Type *
|
||||
Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
void
|
||||
Declarator::InitFromType(const Type *baseType, DeclSpecs *ds) {
|
||||
bool hasUniformQual = ((typeQualifiers & TYPEQUAL_UNIFORM) != 0);
|
||||
bool hasVaryingQual = ((typeQualifiers & TYPEQUAL_VARYING) != 0);
|
||||
bool isTask = ((typeQualifiers & TYPEQUAL_TASK) != 0);
|
||||
bool isExported = ((typeQualifiers & TYPEQUAL_EXPORT) != 0);
|
||||
bool isConst = ((typeQualifiers & TYPEQUAL_CONST) != 0);
|
||||
|
||||
if (hasUniformQual && hasVaryingQual) {
|
||||
Error(pos, "Can't provide both \"uniform\" and \"varying\" qualifiers.");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
if (kind != DK_FUNCTION && isTask)
|
||||
if (kind != DK_FUNCTION && isTask) {
|
||||
Error(pos, "\"task\" qualifier illegal in variable declaration.");
|
||||
return;
|
||||
}
|
||||
if (kind != DK_FUNCTION && isExported) {
|
||||
Error(pos, "\"export\" qualifier illegal in variable declaration.");
|
||||
return;
|
||||
}
|
||||
|
||||
Variability variability(Variability::Unbound);
|
||||
if (hasUniformQual)
|
||||
@@ -361,91 +316,125 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
else if (hasVaryingQual)
|
||||
variability = Variability::Varying;
|
||||
|
||||
const Type *type = base;
|
||||
switch (kind) {
|
||||
case DK_BASE:
|
||||
if (kind == DK_BASE) {
|
||||
// All of the type qualifiers should be in the DeclSpecs for the
|
||||
// base declarator
|
||||
Assert(typeQualifiers == 0);
|
||||
Assert(child == NULL);
|
||||
return type;
|
||||
|
||||
case DK_POINTER:
|
||||
AssertPos(pos, typeQualifiers == 0);
|
||||
AssertPos(pos, child == NULL);
|
||||
type = baseType;
|
||||
}
|
||||
else if (kind == DK_POINTER) {
|
||||
/* For now, any pointer to an SOA type gets the slice property; if
|
||||
we add the capability to declare pointers as slices or not,
|
||||
we'll want to set this based on a type qualifier here. */
|
||||
type = new PointerType(type, variability, isConst, type->IsSOAType());
|
||||
if (child != NULL)
|
||||
return child->GetType(type, ds);
|
||||
const Type *ptrType = new PointerType(baseType, variability, isConst,
|
||||
baseType->IsSOAType());
|
||||
if (child != NULL) {
|
||||
child->InitFromType(ptrType, ds);
|
||||
type = child->type;
|
||||
name = child->name;
|
||||
}
|
||||
else
|
||||
return type;
|
||||
break;
|
||||
|
||||
case DK_REFERENCE:
|
||||
if (hasUniformQual)
|
||||
type = ptrType;
|
||||
}
|
||||
else if (kind == DK_REFERENCE) {
|
||||
if (hasUniformQual) {
|
||||
Error(pos, "\"uniform\" qualifier is illegal to apply to references.");
|
||||
if (hasVaryingQual)
|
||||
return;
|
||||
}
|
||||
if (hasVaryingQual) {
|
||||
Error(pos, "\"varying\" qualifier is illegal to apply to references.");
|
||||
if (isConst)
|
||||
return;
|
||||
}
|
||||
if (isConst) {
|
||||
Error(pos, "\"const\" qualifier is to illegal apply to references.");
|
||||
|
||||
return;
|
||||
}
|
||||
// The parser should disallow this already, but double check.
|
||||
if (dynamic_cast<const ReferenceType *>(type) != NULL) {
|
||||
if (CastType<ReferenceType>(baseType) != NULL) {
|
||||
Error(pos, "References to references are illegal.");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
type = new ReferenceType(type);
|
||||
if (child != NULL)
|
||||
return child->GetType(type, ds);
|
||||
const Type *refType = new ReferenceType(baseType);
|
||||
if (child != NULL) {
|
||||
child->InitFromType(refType, ds);
|
||||
type = child->type;
|
||||
name = child->name;
|
||||
}
|
||||
else
|
||||
return type;
|
||||
break;
|
||||
|
||||
case DK_ARRAY:
|
||||
if (Type::Equal(type, AtomicType::Void)) {
|
||||
type = refType;
|
||||
}
|
||||
else if (kind == DK_ARRAY) {
|
||||
if (Type::Equal(baseType, AtomicType::Void)) {
|
||||
Error(pos, "Arrays of \"void\" type are illegal.");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
if (dynamic_cast<const ReferenceType *>(type)) {
|
||||
if (CastType<ReferenceType>(baseType)) {
|
||||
Error(pos, "Arrays of references (type \"%s\") are illegal.",
|
||||
type->GetString().c_str());
|
||||
return NULL;
|
||||
baseType->GetString().c_str());
|
||||
return;
|
||||
}
|
||||
|
||||
type = new ArrayType(type, arraySize);
|
||||
if (child)
|
||||
return child->GetType(type, ds);
|
||||
const Type *arrayType = new ArrayType(baseType, arraySize);
|
||||
if (child != NULL) {
|
||||
child->InitFromType(arrayType, ds);
|
||||
type = child->type;
|
||||
name = child->name;
|
||||
}
|
||||
else
|
||||
return type;
|
||||
break;
|
||||
|
||||
case DK_FUNCTION: {
|
||||
std::vector<const Type *> args;
|
||||
std::vector<std::string> argNames;
|
||||
std::vector<ConstExpr *> argDefaults;
|
||||
std::vector<SourcePos> argPos;
|
||||
|
||||
type = arrayType;
|
||||
}
|
||||
else if (kind == DK_FUNCTION) {
|
||||
llvm::SmallVector<const Type *, 8> args;
|
||||
llvm::SmallVector<std::string, 8> argNames;
|
||||
llvm::SmallVector<Expr *, 8> argDefaults;
|
||||
llvm::SmallVector<SourcePos, 8> argPos;
|
||||
|
||||
// Loop over the function arguments and store the names, types,
|
||||
// default values (if any), and source file positions each one in
|
||||
// the corresponding vector.
|
||||
for (unsigned int i = 0; i < functionParams.size(); ++i) {
|
||||
Declaration *d = functionParams[i];
|
||||
|
||||
Symbol *sym = GetSymbolForFunctionParameter(i);
|
||||
|
||||
if (d->declSpecs->storageClass != SC_NONE)
|
||||
Error(sym->pos, "Storage class \"%s\" is illegal in "
|
||||
"function parameter declaration for parameter \"%s\".",
|
||||
lGetStorageClassName(d->declSpecs->storageClass),
|
||||
sym->name.c_str());
|
||||
if (Type::Equal(sym->type, AtomicType::Void)) {
|
||||
Error(sym->pos, "Parameter with type \"void\" illegal in function "
|
||||
"parameter list.");
|
||||
sym->type = NULL;
|
||||
if (d == NULL) {
|
||||
AssertPos(pos, m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
if (d->declarators.size() == 0) {
|
||||
// function declaration like foo(float), w/o a name for the
|
||||
// parameter; wire up a placeholder Declarator for it
|
||||
d->declarators.push_back(new Declarator(DK_BASE, pos));
|
||||
d->declarators[0]->InitFromDeclSpecs(d->declSpecs);
|
||||
}
|
||||
|
||||
const ArrayType *at = dynamic_cast<const ArrayType *>(sym->type);
|
||||
AssertPos(pos, d->declarators.size() == 1);
|
||||
Declarator *decl = d->declarators[0];
|
||||
if (decl == NULL || decl->type == NULL) {
|
||||
AssertPos(pos, m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
if (decl->name == "") {
|
||||
// Give a name to any anonymous parameter declarations
|
||||
char buf[32];
|
||||
sprintf(buf, "__anon_parameter_%d", i);
|
||||
decl->name = buf;
|
||||
}
|
||||
decl->type = decl->type->ResolveUnboundVariability(Variability::Varying);
|
||||
|
||||
if (d->declSpecs->storageClass != SC_NONE)
|
||||
Error(decl->pos, "Storage class \"%s\" is illegal in "
|
||||
"function parameter declaration for parameter \"%s\".",
|
||||
lGetStorageClassName(d->declSpecs->storageClass),
|
||||
decl->name.c_str());
|
||||
if (Type::Equal(decl->type, AtomicType::Void)) {
|
||||
Error(decl->pos, "Parameter with type \"void\" illegal in function "
|
||||
"parameter list.");
|
||||
decl->type = NULL;
|
||||
}
|
||||
|
||||
const ArrayType *at = CastType<ArrayType>(decl->type);
|
||||
if (at != NULL) {
|
||||
// As in C, arrays are passed to functions as pointers to
|
||||
// their element type. We'll just immediately make this
|
||||
@@ -455,93 +444,94 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
// report this differently than it was originally declared
|
||||
// in the function, but it's not clear that this is a
|
||||
// significant problem.)
|
||||
if (at->GetElementType() == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
const Type *targetType = at->GetElementType();
|
||||
if (targetType == NULL) {
|
||||
AssertPos(pos, m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
const Type *targetType = at->GetElementType();
|
||||
targetType =
|
||||
targetType->ResolveUnboundVariability(Variability::Varying);
|
||||
sym->type = PointerType::GetUniform(targetType);
|
||||
decl->type = PointerType::GetUniform(targetType);
|
||||
|
||||
// Make sure there are no unsized arrays (other than the
|
||||
// first dimension) in function parameter lists.
|
||||
at = dynamic_cast<const ArrayType *>(at->GetElementType());
|
||||
at = CastType<ArrayType>(targetType);
|
||||
while (at != NULL) {
|
||||
if (at->GetElementCount() == 0)
|
||||
Error(sym->pos, "Arrays with unsized dimensions in "
|
||||
Error(decl->pos, "Arrays with unsized dimensions in "
|
||||
"dimensions after the first one are illegal in "
|
||||
"function parameter lists.");
|
||||
at = dynamic_cast<const ArrayType *>(at->GetElementType());
|
||||
at = CastType<ArrayType>(at->GetElementType());
|
||||
}
|
||||
}
|
||||
|
||||
args.push_back(sym->type);
|
||||
argNames.push_back(sym->name);
|
||||
argPos.push_back(sym->pos);
|
||||
args.push_back(decl->type);
|
||||
argNames.push_back(decl->name);
|
||||
argPos.push_back(decl->pos);
|
||||
|
||||
ConstExpr *init = NULL;
|
||||
if (d->declarators.size()) {
|
||||
// Try to find an initializer expression; if there is one,
|
||||
// it lives down to the base declarator.
|
||||
Declarator *decl = d->declarators[0];
|
||||
while (decl->child != NULL) {
|
||||
Assert(decl->initExpr == NULL);
|
||||
Expr *init = NULL;
|
||||
// Try to find an initializer expression.
|
||||
while (decl != NULL) {
|
||||
if (decl->initExpr != NULL) {
|
||||
decl->initExpr = TypeCheck(decl->initExpr);
|
||||
decl->initExpr = Optimize(decl->initExpr);
|
||||
if (decl->initExpr != NULL) {
|
||||
init = dynamic_cast<ConstExpr *>(decl->initExpr);
|
||||
if (init == NULL)
|
||||
init = dynamic_cast<NullPointerExpr *>(decl->initExpr);
|
||||
if (init == NULL)
|
||||
Error(decl->initExpr->pos, "Default value for parameter "
|
||||
"\"%s\" must be a compile-time constant.",
|
||||
decl->name.c_str());
|
||||
}
|
||||
break;
|
||||
}
|
||||
else
|
||||
decl = decl->child;
|
||||
}
|
||||
|
||||
if (decl->initExpr != NULL &&
|
||||
(decl->initExpr = TypeCheck(decl->initExpr)) != NULL &&
|
||||
(decl->initExpr = Optimize(decl->initExpr)) != NULL &&
|
||||
(init = dynamic_cast<ConstExpr *>(decl->initExpr)) == NULL) {
|
||||
Error(decl->initExpr->pos, "Default value for parameter "
|
||||
"\"%s\" must be a compile-time constant.",
|
||||
sym->name.c_str());
|
||||
}
|
||||
}
|
||||
argDefaults.push_back(init);
|
||||
}
|
||||
|
||||
const Type *returnType = type;
|
||||
const Type *returnType = baseType;
|
||||
if (returnType == NULL) {
|
||||
Error(pos, "No return type provided in function declaration.");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
if (dynamic_cast<const FunctionType *>(returnType) != NULL) {
|
||||
|
||||
if (CastType<FunctionType>(returnType) != NULL) {
|
||||
Error(pos, "Illegal to return function type from function.");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
bool isExported = ds && (ds->storageClass == SC_EXPORT);
|
||||
returnType = returnType->ResolveUnboundVariability(Variability::Varying);
|
||||
|
||||
bool isExternC = ds && (ds->storageClass == SC_EXTERN_C);
|
||||
bool isExported = ds && ((ds->typeQualifiers & TYPEQUAL_EXPORT) != 0);
|
||||
bool isTask = ds && ((ds->typeQualifiers & TYPEQUAL_TASK) != 0);
|
||||
|
||||
if (isExported && isTask) {
|
||||
Error(pos, "Function can't have both \"task\" and \"export\" "
|
||||
"qualifiers");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
if (isExternC && isTask) {
|
||||
Error(pos, "Function can't have both \"extern \"C\"\" and \"task\" "
|
||||
"qualifiers");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
if (isExternC && isExported) {
|
||||
Error(pos, "Function can't have both \"extern \"C\"\" and \"export\" "
|
||||
"qualifiers");
|
||||
return NULL;
|
||||
return;
|
||||
}
|
||||
|
||||
if (child == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
AssertPos(pos, m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
const FunctionType *functionType =
|
||||
new FunctionType(returnType, args, argNames, argDefaults,
|
||||
argPos, isTask, isExported, isExternC);
|
||||
functionType = functionType->ResolveUnboundVariability(Variability::Varying);
|
||||
|
||||
// handle any explicit __declspecs on the function
|
||||
if (ds != NULL) {
|
||||
@@ -563,60 +553,12 @@ Declarator::GetType(const Type *base, DeclSpecs *ds) const {
|
||||
}
|
||||
}
|
||||
|
||||
return child->GetType(functionType, ds);
|
||||
}
|
||||
default:
|
||||
FATAL("Unexpected decl kind");
|
||||
return NULL;
|
||||
child->InitFromType(functionType, ds);
|
||||
type = child->type;
|
||||
name = child->name;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const Type *
|
||||
Declarator::GetType(DeclSpecs *ds) const {
|
||||
const Type *baseType = ds->GetBaseType(pos);
|
||||
const Type *type = GetType(baseType, ds);
|
||||
|
||||
if (ds->declSpecList.size() > 0 &&
|
||||
type != NULL &&
|
||||
dynamic_cast<const FunctionType *>(type) == NULL) {
|
||||
Error(pos, "__declspec specifiers for non-function type \"%s\" are "
|
||||
"not used.", type->GetString().c_str());
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
|
||||
|
||||
Symbol *
|
||||
Declarator::GetSymbolForFunctionParameter(int paramNum) const {
|
||||
Assert(paramNum < (int)functionParams.size());
|
||||
Declaration *d = functionParams[paramNum];
|
||||
|
||||
char buf[32];
|
||||
Symbol *sym;
|
||||
if (d->declarators.size() == 0) {
|
||||
// function declaration like foo(float), w/o a name for
|
||||
// the parameter
|
||||
sprintf(buf, "__anon_parameter_%d", paramNum);
|
||||
sym = new Symbol(buf, pos);
|
||||
sym->type = d->declSpecs->GetBaseType(pos);
|
||||
}
|
||||
else {
|
||||
Assert(d->declarators.size() == 1);
|
||||
sym = d->declarators[0]->GetSymbol();
|
||||
if (sym == NULL) {
|
||||
// Handle more complex anonymous declarations like
|
||||
// float (float **).
|
||||
sprintf(buf, "__anon_parameter_%d", paramNum);
|
||||
sym = new Symbol(buf, d->declarators[0]->pos);
|
||||
sym->type = d->declarators[0]->GetType(d->declSpecs);
|
||||
}
|
||||
}
|
||||
return sym;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Declaration
|
||||
|
||||
@@ -646,27 +588,23 @@ Declaration::GetVariableDeclarations() const {
|
||||
|
||||
for (unsigned int i = 0; i < declarators.size(); ++i) {
|
||||
Declarator *decl = declarators[i];
|
||||
if (decl == NULL) {
|
||||
if (decl == NULL || decl->type == NULL) {
|
||||
// Ignore earlier errors
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
Symbol *sym = decl->GetSymbol();
|
||||
if (sym == NULL || sym->type == NULL) {
|
||||
// Ignore errors
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
|
||||
|
||||
if (Type::Equal(sym->type, AtomicType::Void))
|
||||
Error(sym->pos, "\"void\" type variable illegal in declaration.");
|
||||
else if (dynamic_cast<const FunctionType *>(sym->type) == NULL) {
|
||||
if (Type::Equal(decl->type, AtomicType::Void))
|
||||
Error(decl->pos, "\"void\" type variable illegal in declaration.");
|
||||
else if (CastType<FunctionType>(decl->type) == NULL) {
|
||||
decl->type = decl->type->ResolveUnboundVariability(Variability::Varying);
|
||||
Symbol *sym = new Symbol(decl->name, decl->pos, decl->type,
|
||||
decl->storageClass);
|
||||
m->symbolTable->AddVariable(sym);
|
||||
vars.push_back(VariableDeclaration(sym, decl->initExpr));
|
||||
}
|
||||
}
|
||||
|
||||
return vars;
|
||||
}
|
||||
|
||||
@@ -677,25 +615,19 @@ Declaration::DeclareFunctions() {
|
||||
|
||||
for (unsigned int i = 0; i < declarators.size(); ++i) {
|
||||
Declarator *decl = declarators[i];
|
||||
if (decl == NULL) {
|
||||
if (decl == NULL || decl->type == NULL) {
|
||||
// Ignore earlier errors
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
|
||||
Symbol *sym = decl->GetSymbol();
|
||||
if (sym == NULL || sym->type == NULL) {
|
||||
// Ignore errors
|
||||
Assert(m->errorCount > 0);
|
||||
continue;
|
||||
}
|
||||
sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
|
||||
|
||||
if (dynamic_cast<const FunctionType *>(sym->type) == NULL)
|
||||
const FunctionType *ftype = CastType<FunctionType>(decl->type);
|
||||
if (ftype == NULL)
|
||||
continue;
|
||||
|
||||
bool isInline = (declSpecs->typeQualifiers & TYPEQUAL_INLINE);
|
||||
m->AddFunctionDeclaration(sym, isInline);
|
||||
m->AddFunctionDeclaration(decl->name, ftype, decl->storageClass,
|
||||
isInline, decl->pos);
|
||||
}
|
||||
}
|
||||
|
||||
@@ -709,13 +641,14 @@ Declaration::Print(int indent) const {
|
||||
declarators[i]->Print(indent+4);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void
|
||||
GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
|
||||
std::vector<const Type *> *elementTypes,
|
||||
std::vector<std::string> *elementNames,
|
||||
std::vector<SourcePos> *elementPositions) {
|
||||
llvm::SmallVector<const Type *, 8> *elementTypes,
|
||||
llvm::SmallVector<std::string, 8> *elementNames,
|
||||
llvm::SmallVector<SourcePos, 8> *elementPositions) {
|
||||
std::set<std::string> seenNames;
|
||||
for (unsigned int i = 0; i < sd.size(); ++i) {
|
||||
const Type *type = sd[i]->type;
|
||||
@@ -725,38 +658,41 @@ GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
|
||||
// FIXME: making this fake little DeclSpecs here is really
|
||||
// disgusting
|
||||
DeclSpecs ds(type);
|
||||
if (type->IsUniformType())
|
||||
ds.typeQualifiers |= TYPEQUAL_UNIFORM;
|
||||
else if (type->IsVaryingType())
|
||||
ds.typeQualifiers |= TYPEQUAL_VARYING;
|
||||
if (Type::Equal(type, AtomicType::Void) == false) {
|
||||
if (type->IsUniformType())
|
||||
ds.typeQualifiers |= TYPEQUAL_UNIFORM;
|
||||
else if (type->IsVaryingType())
|
||||
ds.typeQualifiers |= TYPEQUAL_VARYING;
|
||||
else if (type->GetSOAWidth() != 0)
|
||||
ds.soaWidth = type->GetSOAWidth();
|
||||
// FIXME: ds.vectorSize?
|
||||
}
|
||||
|
||||
for (unsigned int j = 0; j < sd[i]->declarators->size(); ++j) {
|
||||
Declarator *d = (*sd[i]->declarators)[j];
|
||||
d->InitFromDeclSpecs(&ds);
|
||||
|
||||
Symbol *sym = d->GetSymbol();
|
||||
|
||||
if (Type::Equal(sym->type, AtomicType::Void))
|
||||
if (Type::Equal(d->type, AtomicType::Void))
|
||||
Error(d->pos, "\"void\" type illegal for struct member.");
|
||||
|
||||
const ArrayType *arrayType =
|
||||
dynamic_cast<const ArrayType *>(sym->type);
|
||||
if (arrayType != NULL && arrayType->GetElementCount() == 0) {
|
||||
Error(d->pos, "Unsized arrays aren't allowed in struct "
|
||||
"definitions.");
|
||||
elementTypes->push_back(NULL);
|
||||
}
|
||||
else
|
||||
elementTypes->push_back(sym->type);
|
||||
elementTypes->push_back(d->type);
|
||||
|
||||
if (seenNames.find(sym->name) != seenNames.end())
|
||||
if (seenNames.find(d->name) != seenNames.end())
|
||||
Error(d->pos, "Struct member \"%s\" has same name as a "
|
||||
"previously-declared member.", sym->name.c_str());
|
||||
"previously-declared member.", d->name.c_str());
|
||||
else
|
||||
seenNames.insert(sym->name);
|
||||
seenNames.insert(d->name);
|
||||
|
||||
elementNames->push_back(sym->name);
|
||||
elementPositions->push_back(sym->pos);
|
||||
elementNames->push_back(d->name);
|
||||
elementPositions->push_back(d->pos);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i < (int)elementTypes->size() - 1; ++i) {
|
||||
const ArrayType *arrayType = CastType<ArrayType>((*elementTypes)[i]);
|
||||
|
||||
if (arrayType != NULL && arrayType->GetElementCount() == 0)
|
||||
Error((*elementPositions)[i], "Unsized arrays aren't allowed except "
|
||||
"for the last member in a struct definition.");
|
||||
}
|
||||
}
|
||||
|
||||
54
decl.h
54
decl.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -47,30 +47,21 @@
|
||||
variables--here, that the declaration has the 'static' and 'uniform'
|
||||
qualifiers, and that it's basic type is 'int'. Then for each variable
|
||||
declaration, the Declaraiton class holds an instance of a Declarator,
|
||||
which in turn records the per-variable information like the symbol
|
||||
name, array size (if any), initializer expression, etc.
|
||||
which in turn records the per-variable information like the name, array
|
||||
size (if any), initializer expression, etc.
|
||||
*/
|
||||
|
||||
#ifndef ISPC_DECL_H
|
||||
#define ISPC_DECL_H
|
||||
|
||||
#include "ispc.h"
|
||||
#include <llvm/ADT/SmallVector.h>
|
||||
|
||||
struct VariableDeclaration;
|
||||
|
||||
class Declaration;
|
||||
class Declarator;
|
||||
|
||||
enum StorageClass {
|
||||
SC_NONE,
|
||||
SC_EXTERN,
|
||||
SC_EXPORT,
|
||||
SC_STATIC,
|
||||
SC_TYPEDEF,
|
||||
SC_EXTERN_C
|
||||
};
|
||||
|
||||
|
||||
/* Multiple qualifiers can be provided with types in declarations;
|
||||
therefore, they are set up so that they can be ANDed together into an
|
||||
int. */
|
||||
@@ -82,6 +73,7 @@ enum StorageClass {
|
||||
#define TYPEQUAL_SIGNED (1<<4)
|
||||
#define TYPEQUAL_UNSIGNED (1<<5)
|
||||
#define TYPEQUAL_INLINE (1<<6)
|
||||
#define TYPEQUAL_EXPORT (1<<7)
|
||||
|
||||
/** @brief Representation of the declaration specifiers in a declaration.
|
||||
|
||||
@@ -141,25 +133,11 @@ public:
|
||||
Declarator(DeclaratorKind dk, SourcePos p);
|
||||
|
||||
/** Once a DeclSpecs instance is available, this method completes the
|
||||
initialization of the Symbol, setting its Type accordingly.
|
||||
initialization of the type member.
|
||||
*/
|
||||
void InitFromDeclSpecs(DeclSpecs *ds);
|
||||
|
||||
/** Get the actual type of the combination of Declarator and the given
|
||||
DeclSpecs. If an explicit base type is provided, the declarator is
|
||||
applied to that type; otherwise the base type from the DeclSpecs is
|
||||
used. */
|
||||
const Type *GetType(DeclSpecs *ds) const;
|
||||
const Type *GetType(const Type *base, DeclSpecs *ds) const;
|
||||
|
||||
/** Returns the symbol corresponding to the function declared by this
|
||||
declarator and symbols for its arguments in *args. */
|
||||
Symbol *GetFunctionInfo(DeclSpecs *ds, std::vector<Symbol *> *args);
|
||||
|
||||
Symbol *GetSymbolForFunctionParameter(int paramNum) const;
|
||||
|
||||
/** Returns the symbol associated with the declarator. */
|
||||
Symbol *GetSymbol() const;
|
||||
void InitFromType(const Type *base, DeclSpecs *ds);
|
||||
|
||||
void Print(int indent) const;
|
||||
|
||||
@@ -180,18 +158,24 @@ public:
|
||||
/** Type qualifiers provided with the declarator. */
|
||||
int typeQualifiers;
|
||||
|
||||
StorageClass storageClass;
|
||||
|
||||
/** For array declarators, this gives the declared size of the array.
|
||||
Unsized arrays have arraySize == 0. */
|
||||
int arraySize;
|
||||
|
||||
/** Symbol associated with the declarator. */
|
||||
Symbol *sym;
|
||||
/** Name associated with the declarator. */
|
||||
std::string name;
|
||||
|
||||
/** Initialization expression for the variable. May be NULL. */
|
||||
Expr *initExpr;
|
||||
|
||||
/** Type of the declarator. This is NULL until InitFromDeclSpecs() or
|
||||
InitFromType() is called. */
|
||||
const Type *type;
|
||||
|
||||
/** For function declarations, this holds the Declaration *s for the
|
||||
funciton's parameters. */
|
||||
function's parameters. */
|
||||
std::vector<Declaration *> functionParams;
|
||||
};
|
||||
|
||||
@@ -236,8 +220,8 @@ struct StructDeclaration {
|
||||
/** Given a set of StructDeclaration instances, this returns the types of
|
||||
the elements of the corresponding struct and their names. */
|
||||
extern void GetStructTypesNamesPositions(const std::vector<StructDeclaration *> &sd,
|
||||
std::vector<const Type *> *elementTypes,
|
||||
std::vector<std::string> *elementNames,
|
||||
std::vector<SourcePos> *elementPositions);
|
||||
llvm::SmallVector<const Type *, 8> *elementTypes,
|
||||
llvm::SmallVector<std::string, 8> *elementNames,
|
||||
llvm::SmallVector<SourcePos, 8> *elementPositions);
|
||||
|
||||
#endif // ISPC_DECL_H
|
||||
|
||||
@@ -1,3 +1,81 @@
|
||||
=== v1.2.2 === (20 April 2012)
|
||||
|
||||
This release includes a number of small additions to functionality and a
|
||||
number of bugfixes. New functionality includes:
|
||||
|
||||
* It's now possible to forward declare structures as in C/C++: "struct
|
||||
Foo;". After such a declaration, structs with pointers to "Foo" and
|
||||
functions that take pointers or references to Foo structs can be declared
|
||||
without the entire definition of Foo being available.
|
||||
|
||||
* New built-in types size_t, ptrdiff_t, and [u]intptr_t are now available,
|
||||
corresponding to the equivalent types in C.
|
||||
|
||||
* The standard library now provides atomic_swap*() and
|
||||
atomic_compare_exchange*() functions for void * types.
|
||||
|
||||
* The C++ backend has seen a number of improvements to the quality and
|
||||
readability of generated code.
|
||||
|
||||
A number of bugs have been fixed in this release as well. The most
|
||||
significant are:
|
||||
|
||||
* Fixed a bug where nested loops could cause a compiler crash in some
|
||||
circumstances (issues #240, and #229)
|
||||
|
||||
* Gathers could access invlaid mamory (and cause the program to crash) in
|
||||
some circumstances (#235)
|
||||
|
||||
* References to temporary values are now handled properly when passed to a
|
||||
function that takes a reference typed parameter.
|
||||
|
||||
* A case where incorrect code could be generated for compile-time-constant
|
||||
initializers has been fixed (#234).
|
||||
|
||||
=== v1.2.1 === (6 April 2012)
|
||||
|
||||
This release contains only minor new functionality and is mostly for many
|
||||
small bugfixes and improvements to error handling and error reporting.
|
||||
The new functionality that is present is:
|
||||
|
||||
* Significantly more efficient versions of the float / half conversion
|
||||
routines are now available in the standard library, thanks to Fabian
|
||||
Giesen.
|
||||
|
||||
* The last member of a struct can now be a zero-length array; this allows
|
||||
the trick of dynamically allocating enough storage for the struct and
|
||||
some number of array elements at the end of it.
|
||||
|
||||
Significant bugs fixed include:
|
||||
|
||||
* Issue #205: When a target ISA isn't specified, use the host system's
|
||||
capabilities to choose a target for which it will be able to run the
|
||||
generated code.
|
||||
|
||||
* Issues #215 and #217: Don't allocate storage for global variables that
|
||||
are declared "extern".
|
||||
|
||||
* Issue #197: Allow NULL as a default argument value in a function
|
||||
declaration.
|
||||
|
||||
* Issue #223: Fix bugs where taking the address of a function wouldn't work
|
||||
as expected.
|
||||
|
||||
* Issue #224: When there are overloaded variants of a function that take
|
||||
both reference and const reference parameters, give the non-const
|
||||
reference preference when matching values of that underlying type.
|
||||
|
||||
* Issue #225: An error is issed when a varying lvalue is assigned to a
|
||||
reference type (rather than crashing).
|
||||
|
||||
* Issue #193: Permit conversions from array types to void *, not just the
|
||||
pointer type of the underlying array element.
|
||||
|
||||
* Issue #199: Still evaluate expressions that are cast to (void).
|
||||
|
||||
The documentation has also been improved, with FAQs added to clarify some
|
||||
aspects of the ispc pointer model.
|
||||
|
||||
=== v1.2.0 === (20 March 2012)
|
||||
|
||||
This is a major new release of ispc, with a number of significant
|
||||
|
||||
245
docs/faq.rst
245
docs/faq.rst
@@ -14,12 +14,19 @@ distribution.
|
||||
+ `Why are there multiple versions of exported ispc functions in the assembly output?`_
|
||||
+ `How can I more easily see gathers and scatters in generated assembly?`_
|
||||
|
||||
* Language Details
|
||||
|
||||
+ `What is the difference between "int *foo" and "int foo[]"?`_
|
||||
+ `Why are pointed-to types "uniform" by default?`_
|
||||
+ `What am I getting an error about assigning a varying lvalue to a reference type?`_
|
||||
|
||||
* Interoperability
|
||||
|
||||
+ `How can I supply an initial execution mask in the call from the application?`_
|
||||
+ `How can I generate a single binary executable with support for multiple instruction sets?`_
|
||||
+ `How can I determine at run-time which vector instruction set's instructions were selected to execute?`_
|
||||
+ `Is it possible to inline ispc functions in C/C++ code?`_
|
||||
+ `Why is it illegal to pass "varying" values from C/C++ to ispc functions?`_
|
||||
|
||||
* Programming Techniques
|
||||
|
||||
@@ -27,6 +34,7 @@ distribution.
|
||||
+ `How can a gang of program instances generate variable amounts of output efficiently?`_
|
||||
+ `Is it possible to use ispc for explicit vector programming?`_
|
||||
+ `How can I debug my ispc programs using Valgrind?`_
|
||||
+ `foreach statements generate more complex assembly than I'd expect; what's going on?`_
|
||||
|
||||
Understanding ispc's Output
|
||||
===========================
|
||||
@@ -213,6 +221,125 @@ easier to understand:
|
||||
jmp ___pseudo_scatter_base_offsets32_32 ## TAILCALL
|
||||
|
||||
|
||||
Language Details
|
||||
================
|
||||
|
||||
What is the difference between "int \*foo" and "int foo[]"?
|
||||
-----------------------------------------------------------
|
||||
|
||||
In C and C++, declaring a function to take a parameter ``int *foo`` and
|
||||
``int foo[]`` results in the same type for the parameter. Both are
|
||||
pointers to integers. In ``ispc``, these are different types. The first
|
||||
one is a varying pointer to a uniform integer value in memory, while the
|
||||
second results in a uniform pointer to the start of an array of varying
|
||||
integer values in memory.
|
||||
|
||||
To understand why the first is a varying pointer to a uniform integer,
|
||||
first recall that types without explicit rate qualifiers (``uniform``,
|
||||
``varying``, or ``soa<>``) are ``varying`` by default. Second, recall from
|
||||
the `discussion of pointer types in the ispc User's Guide`_ that pointed-to
|
||||
types without rate qualifiers are ``uniform`` by default. (This second
|
||||
rule is discussed further below, in `Why are pointed-to types "uniform" by
|
||||
default?`_.) The type of ``int *foo`` follows from these.
|
||||
|
||||
.. _discussion of pointer types in the ispc User's Guide: ispc.html#pointer-types
|
||||
|
||||
Conversely, in a function body, ``int foo[10]`` represents a declaration of
|
||||
a 10-element array of varying ``int`` values. In that we'd certainly like
|
||||
to be able to pass such an array to a function that takes a ``int []``
|
||||
parameter, the natural type for an ``int []`` parameter is a uniform
|
||||
pointer to varying integer values.
|
||||
|
||||
In terms of compatibility with C/C++, it's unfortunate that this
|
||||
distinction exists, though any other set of rules seems to introduce more
|
||||
awkwardness than this one. (Though we're interested to hear ideas to
|
||||
improve these rules!).
|
||||
|
||||
Why are pointed-to types "uniform" by default?
|
||||
----------------------------------------------
|
||||
|
||||
In ``ispc``, types without rate qualifiers are "varying" by default, but
|
||||
types pointed to by pointers without rate qualifiers are "uniform" by
|
||||
default. Why this difference?
|
||||
|
||||
::
|
||||
|
||||
int foo; // no rate qualifier, "varying int".
|
||||
uniform int *foo; // pointer type has no rate qualifier, pointed-to does.
|
||||
// "varying pointer to uniform int".
|
||||
int *foo; // neither pointer type nor pointed-to type ("int") have
|
||||
// rate qualifiers. Pointer type is varying by default,
|
||||
// pointed-to is uniform. "varying pointer to uniform int".
|
||||
varying int *foo; // varying pointer to varying int
|
||||
|
||||
The first rule, having types without rate qualifiers be varying by default,
|
||||
is a default that keeps the number of "uniform" or "varying" qualifiers in
|
||||
``ispc`` programs low. Most ``ispc`` programs use mostly "varying"
|
||||
variables, so this rule allows most variables to be declared without also
|
||||
requiring rate qualifiers.
|
||||
|
||||
On a related note, this rule allows many C/C++ functions to be used to
|
||||
define equivalent functions in the SPMD execution model that ``ispc``
|
||||
provides with little or no modification:
|
||||
|
||||
::
|
||||
|
||||
// scalar add in C/C++, SPMD/vector add in ispc
|
||||
int add(int a, int b) { return a + b; }
|
||||
|
||||
This motivation also explains why ``uniform int *foo`` represents a varying
|
||||
pointer; having pointers be varying by default if they don't have rate
|
||||
qualifiers similarly helps with porting code from C/C++ to ``ispc``.
|
||||
|
||||
The tricker issue is why pointed-to types are "uniform" by default. In our
|
||||
experience, data in memory that is accessed via pointers is most often
|
||||
uniform; this generally includes all data that has been allocated and
|
||||
initialized by the C/C++ application code. In practice, "varying" types are
|
||||
more generally (but not exclusively) used for local data in ``ispc``
|
||||
functions. Thus, making the pointed-to type uniform by default leads to
|
||||
more concise code for the most common cases.
|
||||
|
||||
|
||||
What am I getting an error about assigning a varying lvalue to a reference type?
|
||||
--------------------------------------------------------------------------------
|
||||
|
||||
Given code like the following:
|
||||
|
||||
::
|
||||
|
||||
uniform float a[...];
|
||||
int index = ...;
|
||||
float &r = a[index];
|
||||
|
||||
``ispc`` issues the error "Initializer for reference-type variable "r" must
|
||||
have a uniform lvalue type.". The underlying issue stems from how
|
||||
references are represented in the code generated by ``ispc``. Recall that
|
||||
``ispc`` supports both uniform and varying pointer types--a uniform pointer
|
||||
points to the same location in memory for all program instances in the
|
||||
gang, while a varying pointer allows each program instance to have its own
|
||||
pointer value.
|
||||
|
||||
References are represented a pointer in the code generated by ``ispc``,
|
||||
though this is generally opaque to the user; in ``ispc``, they are
|
||||
specifically uniform pointers. This design decision was made so that given
|
||||
code like this:
|
||||
|
||||
::
|
||||
|
||||
extern void func(float &val);
|
||||
float foo = ...;
|
||||
func(foo);
|
||||
|
||||
Then the reference would be handled efficiently as a single pointer, rather
|
||||
than unnecessarily being turned into a gang-size of pointers.
|
||||
|
||||
However, an implication of this decision is that it's not possible for
|
||||
references to refer to completely different things for each of the program
|
||||
instances. (And hence the error that is issued). In cases where a unique
|
||||
per-program-instance pointer is needed, a varying pointer should be used
|
||||
instead of a reference.
|
||||
|
||||
|
||||
Interoperability
|
||||
================
|
||||
|
||||
@@ -391,6 +518,48 @@ linking your applicaiton.
|
||||
``-mattr=+avx`` flag to ``llc``.)
|
||||
|
||||
|
||||
Why is it illegal to pass "varying" values from C/C++ to ispc functions?
|
||||
------------------------------------------------------------------------
|
||||
|
||||
If any of the types in the parameter list to an exported function is
|
||||
"varying" (including recursively, and members of structure types, etc.),
|
||||
then ``ispc`` will issue an error and refuse to compile the function:
|
||||
|
||||
::
|
||||
|
||||
% echo "export int add(int x) { return ++x; }" | ispc
|
||||
<stdin>:1:12: Error: Illegal to return a "varying" type from exported function "foo"
|
||||
<stdin>:1:20: Error: Varying parameter "x" is illegal in an exported function.
|
||||
|
||||
While there's no fundamental reason why this isn't possible, recall the
|
||||
definition of "varying" variables: they have one value for each program
|
||||
instance in the gang. As such, the number of values and amount of storage
|
||||
required to represent a varying variable depends on the gang size
|
||||
(i.e. ``programCount``), which can have different values depending on the
|
||||
compilation target.
|
||||
|
||||
``ispc`` therefore prohibits passing "varying" values between the
|
||||
application and the ``ispc`` program in order to prevent the
|
||||
application-side code from depending on a particular gang size, in order to
|
||||
encourage portability to different gang sizes. (A generally desirable
|
||||
programming practice.)
|
||||
|
||||
For cases where the size of data is actually fixed from the application
|
||||
side, the value can be passed via a pointer to a short ``uniform`` array,
|
||||
as follows:
|
||||
|
||||
::
|
||||
|
||||
export void add4(uniform int ptr[4]) {
|
||||
foreach (i = 0 ... 4)
|
||||
ptr[i]++;
|
||||
}
|
||||
|
||||
On the 4-wide SSE instruction set, this compiles to a single vector add
|
||||
instruction (and associated move instructions), while it still also
|
||||
efficiently computes the correct result on 8-wide AVX targets.
|
||||
|
||||
|
||||
Programming Techniques
|
||||
======================
|
||||
|
||||
@@ -525,3 +694,79 @@ you can use ``--target=sse4`` when compiling to run with ``valgrind``.
|
||||
Note that ``valgrind`` does not yet support programs that use the AVX
|
||||
instruction set.
|
||||
|
||||
foreach statements generate more complex assembly than I'd expect; what's going on?
|
||||
-----------------------------------------------------------------------------------
|
||||
|
||||
Given a simple ``foreach`` loop like the following:
|
||||
|
||||
::
|
||||
|
||||
void foo(uniform float a[], uniform int count) {
|
||||
foreach (i = 0 ... count)
|
||||
a[i] *= 2;
|
||||
}
|
||||
|
||||
|
||||
the ``ispc`` compiler generates approximately 40 instructions--why isn't
|
||||
the generated code simpler?
|
||||
|
||||
There are two main components to the code: one handles
|
||||
``programCount``-sized chunks of elements of the array, and the other
|
||||
handles any excess elements at the end of the array that don't completely
|
||||
fill a gang. The code for the main loop is essentially what one would
|
||||
expect: a vector of values are laoded from the array, the multiply is done,
|
||||
and the result is stored.
|
||||
|
||||
::
|
||||
|
||||
LBB0_2: ## %foreach_full_body
|
||||
movslq %edx, %rdx
|
||||
vmovups (%rdi,%rdx), %ymm1
|
||||
vmulps %ymm0, %ymm1, %ymm1
|
||||
vmovups %ymm1, (%rdi,%rdx)
|
||||
addl $32, %edx
|
||||
addl $8, %eax
|
||||
cmpl %ecx, %eax
|
||||
jl LBB0_2
|
||||
|
||||
|
||||
Then, there is a sequence of instructions that handles any additional
|
||||
elements at the end of the array. (These instructions don't execute if
|
||||
there aren't any left-over values to process, but they do lengthen the
|
||||
amount of generated code.)
|
||||
|
||||
::
|
||||
|
||||
## BB#4: ## %partial_inner_only
|
||||
vmovd %eax, %xmm0
|
||||
vinsertf128 $1, %xmm0, %ymm0, %ymm0
|
||||
vpermilps $0, %ymm0, %ymm0 ## ymm0 = ymm0[0,0,0,0,4,4,4,4]
|
||||
vextractf128 $1, %ymm0, %xmm3
|
||||
vmovd %esi, %xmm2
|
||||
vmovaps LCPI0_1(%rip), %ymm1
|
||||
vextractf128 $1, %ymm1, %xmm4
|
||||
vpaddd %xmm4, %xmm3, %xmm3
|
||||
# ....
|
||||
vmulps LCPI0_0(%rip), %ymm1, %ymm1
|
||||
vmaskmovps %ymm1, %ymm0, (%rdi,%rax)
|
||||
|
||||
|
||||
If you know that the number of elements to be processed will always be an
|
||||
exact multiple of the 8, 16, etc., then adding a simple assignment to
|
||||
``count`` like the one below gives the compiler enough information to be
|
||||
able to eliminate the code for the additional array elements.
|
||||
|
||||
::
|
||||
|
||||
void foo(uniform float a[], uniform int count) {
|
||||
// This assignment doesn't change the value of count
|
||||
// if it's a multiple of 16, but it gives the compiler
|
||||
// insight into this fact, allowing for simpler code to
|
||||
// be generated for the foreach loop.
|
||||
count = (count & ~(16-1));
|
||||
foreach (i = 0 ... count)
|
||||
a[i] *= 2;
|
||||
}
|
||||
|
||||
With this new version of ``foo()``, only the code for the first loop above
|
||||
is generated.
|
||||
|
||||
279
docs/ispc.rst
279
docs/ispc.rst
@@ -121,10 +121,14 @@ Contents:
|
||||
|
||||
* `The ISPC Standard Library`_
|
||||
|
||||
+ `Basic Operations On Data`_
|
||||
|
||||
* `Logical and Selection Operations`_
|
||||
* `Bit Operations`_
|
||||
|
||||
+ `Math Functions`_
|
||||
|
||||
* `Basic Math Functions`_
|
||||
* `Bit-Level Operations`_
|
||||
* `Transcendental Functions`_
|
||||
* `Pseudo-Random Numbers`_
|
||||
|
||||
@@ -143,6 +147,7 @@ Contents:
|
||||
|
||||
* `Converting Between Array-of-Structures and Structure-of-Arrays Layout`_
|
||||
* `Conversions To and From Half-Precision Floats`_
|
||||
* `Converting to sRGB8`_
|
||||
|
||||
+ `Systems Programming Support`_
|
||||
|
||||
@@ -538,7 +543,7 @@ preprocessor runs:
|
||||
* - ISPC
|
||||
- 1
|
||||
- Detecting that the ``ispc`` compiler is processing the file
|
||||
* - ISPC_TARGET_{SSE2,SSE4,AVX}
|
||||
* - ISPC_TARGET_{SSE2,SSE4,AVX,AVX2}
|
||||
- 1
|
||||
- One of these will be set, depending on the compilation target.
|
||||
* - ISPC_POINTER_SIZE
|
||||
@@ -1390,8 +1395,8 @@ Types
|
||||
Basic Types and Type Qualifiers
|
||||
-------------------------------
|
||||
|
||||
``ispc`` is a statically-typed language. It supports a variety of basic
|
||||
types.
|
||||
``ispc`` is a statically-typed language. It supports a variety of core
|
||||
basic types:
|
||||
|
||||
* ``void``: "empty" type representing no value.
|
||||
* ``bool``: boolean value; may be assigned ``true``, ``false``, or the
|
||||
@@ -1408,6 +1413,15 @@ types.
|
||||
* ``unsigned int64``: 64-bit unsigned integer.
|
||||
* ``double``: 64-bit double-precision floating point value.
|
||||
|
||||
There are also a few built-in types related to pointers and memory:
|
||||
|
||||
* ``size_t``: the maximum size of any object (structure or array)
|
||||
* ``ptrdiff_t``: an integer type large enough to represent the difference
|
||||
between two pointers
|
||||
* ``intptr_t``: signed integer type that is large enough to represent
|
||||
a pointer value
|
||||
* ``uintptr_t``: unsigned integer type large enough to represent a pointer
|
||||
|
||||
Implicit type conversion between values of different types is done
|
||||
automatically by the ``ispc`` compiler. Thus, a value of ``float`` type
|
||||
can be assigned to a variable of ``int`` type directly. In binary
|
||||
@@ -1492,13 +1506,17 @@ Defining New Names For Types
|
||||
The ``typedef`` keyword can be used to name types:
|
||||
|
||||
::
|
||||
|
||||
typedef int64 BigInt;
|
||||
typedef float Float3[3];
|
||||
|
||||
typedef Float3 float[3];
|
||||
Following C's syntax, the code above defines ``BigInt`` to have ``int64``
|
||||
type and ``Float3`` to have ``float[3]`` type.
|
||||
|
||||
``typedef`` doesn't create a new type: it just provides an alternative name
|
||||
for an existing type. Thus, in the above example, it is legal to pass a
|
||||
value with ``float[3]`` type to a function that has been declared to take a
|
||||
``Float3`` parameter.
|
||||
Also as in C, ``typedef`` doesn't create a new type: it just provides an
|
||||
alternative name for an existing type. Thus, in the above example, it is
|
||||
legal to pass a value with ``float[3]`` type to a function that has been
|
||||
declared to take a ``Float3`` parameter.
|
||||
|
||||
|
||||
Pointer Types
|
||||
@@ -2150,6 +2168,12 @@ greater than or equal to ``NUM_ITEMS``.
|
||||
// ...
|
||||
}
|
||||
|
||||
Short-circuiting may impose some overhead in the generated code; for cases
|
||||
where short-circuiting is undesirable due to performance impact, see
|
||||
the section `Logical and Selection Operations`_, which introduces helper
|
||||
functions in the standard library that provide these operations without
|
||||
short-circuiting.
|
||||
|
||||
|
||||
Dynamic Memory Allocation
|
||||
-------------------------
|
||||
@@ -2827,6 +2851,123 @@ The ISPC Standard Library
|
||||
compiling ``ispc`` programs. (To disable the standard library, pass the
|
||||
``--nostdlib`` command-line flag to the compiler.)
|
||||
|
||||
Basic Operations On Data
|
||||
------------------------
|
||||
|
||||
Logical and Selection Operations
|
||||
--------------------------------
|
||||
|
||||
Recall from `Expressions`_ that ``ispc`` short-circuits the evaluation of
|
||||
logical and selection operators: given an expression like ``(index < count
|
||||
&& array[index] == 0)``, then ``array[index] == 0`` is only evaluated if
|
||||
``index < count`` is true. This property is useful for writing expressions
|
||||
like the preceeding one, where the second expression may not be safe to
|
||||
evaluate in some cases.
|
||||
|
||||
This short-circuiting can impose overhead in the generated code; additional
|
||||
operations are required to test the first value and to conditionally jump
|
||||
over the code that evaluates the second value. The ``ispc`` compiler does
|
||||
try to mitigate this cost by detecting cases where it is both safe and
|
||||
inexpensive to evaluate both expressions, and skips short-circuiting in the
|
||||
generated code in this case (without there being any programmer-visible
|
||||
change in program behavior.)
|
||||
|
||||
For cases where the compiler can't detect this case but the programmer
|
||||
wants to avoid short-circuiting behavior, the standard library provides a
|
||||
few helper functions. First, ``and()`` and ``or()`` provide
|
||||
non-short-circuiting logical AND and OR operations.
|
||||
|
||||
::
|
||||
|
||||
bool and(bool a, bool b)
|
||||
bool or(bool a, bool b)
|
||||
uniform bool and(uniform bool a, uniform bool b)
|
||||
uniform bool or(uniform bool a, uniform bool b)
|
||||
|
||||
And there are three variants of ``select()`` that select between two values
|
||||
based on a boolean condition. These are the variants of ``select()`` for
|
||||
the ``int8`` type:
|
||||
|
||||
::
|
||||
|
||||
int8 select(bool v, int8 a, int8 b)
|
||||
int8 select(uniform bool v, int8 a, int8 b)
|
||||
uniform int8 select(uniform bool v, uniform int8 a, uniform int8 b)
|
||||
|
||||
There are also variants for ``int16``, ``int32``, ``int64``, ``float``, and
|
||||
``double`` types.
|
||||
|
||||
Bit Operations
|
||||
--------------
|
||||
|
||||
The various variants of ``popcnt()`` return the population count--the
|
||||
number of bits set in the given value.
|
||||
|
||||
::
|
||||
|
||||
uniform int popcnt(uniform int v)
|
||||
int popcnt(int v)
|
||||
uniform int popcnt(bool v)
|
||||
|
||||
|
||||
A few functions determine how many leading bits in the given value are zero
|
||||
and how many of the trailing bits are zero; there are also ``unsigned``
|
||||
variants of these functions and variants that take ``int64`` and ``unsigned
|
||||
int64`` types.
|
||||
|
||||
::
|
||||
|
||||
int32 count_leading_zeros(int32 v)
|
||||
uniform int32 count_leading_zeros(uniform int32 v)
|
||||
int32 count_trailing_zeros(int32 v)
|
||||
uniform int32 count_trailing_zeros(uniform int32 v)
|
||||
|
||||
Sometimes it's useful to convert a ``bool`` value to an integer using sign
|
||||
extension so that the integer's bits are all on if the ``bool`` has the
|
||||
value ``true`` (rather than just having the value one). The
|
||||
``sign_extend()`` functions provide this functionality:
|
||||
|
||||
::
|
||||
|
||||
int sign_extend(bool value)
|
||||
uniform int sign_extend(uniform bool value)
|
||||
|
||||
The ``intbits()`` and ``floatbits()`` functions can be used to implement
|
||||
low-level floating-point bit twiddling. For example, ``intbits()`` returns
|
||||
an ``unsigned int`` that is a bit-for-bit copy of the given ``float``
|
||||
value. (Note: it is **not** the same as ``(int)a``, but corresponds to
|
||||
something like ``*((int *)&a)`` in C.
|
||||
|
||||
::
|
||||
|
||||
float floatbits(unsigned int a);
|
||||
uniform float floatbits(uniform unsigned int a);
|
||||
unsigned int intbits(float a);
|
||||
uniform unsigned int intbits(uniform float a);
|
||||
|
||||
|
||||
The ``intbits()`` and ``floatbits()`` functions have no cost at runtime;
|
||||
they just let the compiler know how to interpret the bits of the given
|
||||
value. They make it possible to efficiently write functions that take
|
||||
advantage of the low-level bit representation of floating-point values.
|
||||
|
||||
For example, the ``abs()`` function in the standard library is implemented
|
||||
as follows:
|
||||
|
||||
::
|
||||
|
||||
float abs(float a) {
|
||||
unsigned int i = intbits(a);
|
||||
i &= 0x7fffffff;
|
||||
return floatbits(i);
|
||||
}
|
||||
|
||||
This code directly clears the high order bit to ensure that the given
|
||||
floating-point value is positive. This compiles down to a single ``andps``
|
||||
instruction when used with an Intel® SSE target, for example.
|
||||
|
||||
|
||||
|
||||
Math Functions
|
||||
--------------
|
||||
|
||||
@@ -2919,77 +3060,6 @@ quite efficient.)
|
||||
uniform unsigned int low,
|
||||
uniform unsigned int high)
|
||||
|
||||
Bit-Level Operations
|
||||
--------------------
|
||||
|
||||
|
||||
The various variants of ``popcnt()`` return the population count--the
|
||||
number of bits set in the given value.
|
||||
|
||||
::
|
||||
|
||||
uniform int popcnt(uniform int v)
|
||||
int popcnt(int v)
|
||||
uniform int popcnt(bool v)
|
||||
|
||||
|
||||
A few functions determine how many leading bits in the given value are zero
|
||||
and how many of the trailing bits are zero; there are also ``unsigned``
|
||||
variants of these functions and variants that take ``int64`` and ``unsigned
|
||||
int64`` types.
|
||||
|
||||
::
|
||||
|
||||
int32 count_leading_zeros(int32 v)
|
||||
uniform int32 count_leading_zeros(uniform int32 v)
|
||||
int32 count_trailing_zeros(int32 v)
|
||||
uniform int32 count_trailing_zeros(uniform int32 v)
|
||||
|
||||
Sometimes it's useful to convert a ``bool`` value to an integer using sign
|
||||
extension so that the integer's bits are all on if the ``bool`` has the
|
||||
value ``true`` (rather than just having the value one). The
|
||||
``sign_extend()`` functions provide this functionality:
|
||||
|
||||
::
|
||||
|
||||
int sign_extend(bool value)
|
||||
uniform int sign_extend(uniform bool value)
|
||||
|
||||
The ``intbits()`` and ``floatbits()`` functions can be used to implement
|
||||
low-level floating-point bit twiddling. For example, ``intbits()`` returns
|
||||
an ``unsigned int`` that is a bit-for-bit copy of the given ``float``
|
||||
value. (Note: it is **not** the same as ``(int)a``, but corresponds to
|
||||
something like ``*((int *)&a)`` in C.
|
||||
|
||||
::
|
||||
|
||||
float floatbits(unsigned int a);
|
||||
uniform float floatbits(uniform unsigned int a);
|
||||
unsigned int intbits(float a);
|
||||
uniform unsigned int intbits(uniform float a);
|
||||
|
||||
|
||||
The ``intbits()`` and ``floatbits()`` functions have no cost at runtime;
|
||||
they just let the compiler know how to interpret the bits of the given
|
||||
value. They make it possible to efficiently write functions that take
|
||||
advantage of the low-level bit representation of floating-point values.
|
||||
|
||||
For example, the ``abs()`` function in the standard library is implemented
|
||||
as follows:
|
||||
|
||||
::
|
||||
|
||||
float abs(float a) {
|
||||
unsigned int i = intbits(a);
|
||||
i &= 0x7fffffff;
|
||||
return floatbits(i);
|
||||
}
|
||||
|
||||
This code directly clears the high order bit to ensure that the given
|
||||
floating-point value is positive. This compiles down to a single ``andps``
|
||||
instruction when used with an Intel® SSE target, for example.
|
||||
|
||||
|
||||
Transcendental Functions
|
||||
------------------------
|
||||
|
||||
@@ -3027,8 +3097,8 @@ The corresponding inverse functions are also available:
|
||||
uniform float acos(uniform float x)
|
||||
float atan(float x)
|
||||
uniform float atan(uniform float x)
|
||||
float atan2(float x, float y)
|
||||
uniform float atan2(uniform float x, uniform float y)
|
||||
float atan2(float y, float x)
|
||||
uniform float atan2(uniform float y, uniform float x)
|
||||
|
||||
If both sine and cosine are needed, then the ``sincos()`` call computes
|
||||
both more efficiently than two calls to the respective individual
|
||||
@@ -3077,7 +3147,7 @@ library. State for the RNG is maintained in an instance of the
|
||||
::
|
||||
|
||||
struct RNGState;
|
||||
void seed_rng(varying RNGState * uniform state, uniform int seed)
|
||||
void seed_rng(varying RNGState * uniform state, int seed)
|
||||
void seed_rng(uniform RNGState * uniform state, uniform int seed)
|
||||
|
||||
After the RNG is seeded, the ``random()`` function can be used to get a
|
||||
@@ -3622,6 +3692,22 @@ precise.
|
||||
uniform int16 float_to_half_fast(uniform float f)
|
||||
|
||||
|
||||
Converting to sRGB8
|
||||
-------------------
|
||||
|
||||
The sRGB color space is used in many applications in graphics and imaging;
|
||||
see the `Wikipedia page on sRGB`_ for more information. The ``ispc``
|
||||
standard library provides two functions for converting floating-point color
|
||||
values to 8-bit values in the sRGB space.
|
||||
|
||||
.. _Wikipedia page on sRGB: http://en.wikipedia.org/wiki/SRGB
|
||||
|
||||
::
|
||||
|
||||
int float_to_srgb8(float v)
|
||||
uniform int float_to_srgb8(uniform float v)
|
||||
|
||||
|
||||
Systems Programming Support
|
||||
---------------------------
|
||||
|
||||
@@ -3732,6 +3818,13 @@ For global atomics, only atomic swap is available for these types:
|
||||
float atomic_swap_global(uniform float * uniform ptr, float value)
|
||||
double atomic_swap_global(uniform double * uniform ptr, double value)
|
||||
|
||||
Finally, "swap" (but none of these other atomics) is available for pointer
|
||||
types:
|
||||
|
||||
::
|
||||
|
||||
void *atomic_swap_{local,global}(void * * uniform ptr, void * value)
|
||||
|
||||
There are also variants of the atomic that take ``uniform`` values for the
|
||||
operand and return a ``uniform`` result. These correspond to a single
|
||||
atomic operation being performed for the entire gang of program instances,
|
||||
@@ -3756,6 +3849,13 @@ rather than one per program instance.
|
||||
uniform int32 atomic_swap_{local,global}(uniform int32 * uniform ptr,
|
||||
uniform int32 newval)
|
||||
|
||||
And similarly for pointers:
|
||||
|
||||
::
|
||||
|
||||
uniform void *atomic_swap_{local,global}(void * * uniform ptr,
|
||||
void *newval)
|
||||
|
||||
Be careful that you use the atomic function that you mean to; consider the
|
||||
following code:
|
||||
|
||||
@@ -3797,12 +3897,18 @@ the same location in memory!)
|
||||
int32 atomic_xor_{local,global}(uniform int32 * varying ptr, int32 value)
|
||||
int32 atomic_swap_{local,global}(uniform int32 * varying ptr, int32 value)
|
||||
|
||||
And:
|
||||
|
||||
::
|
||||
|
||||
void *atomic_swap_{local,global}(void * * ptr, void *value)
|
||||
|
||||
There are also atomic "compare and exchange" functions. Compare and
|
||||
exchange atomically compares the value in "val" to "compare"--if they
|
||||
match, it assigns "newval" to "val". In either case, the old value of
|
||||
"val" is returned. (As with the other atomic operations, there are also
|
||||
``unsigned`` and 64-bit variants of this function. Furthermore, there are
|
||||
``float`` and ``double`` variants as well.)
|
||||
``float``, ``double``, and ``void *`` variants as well.)
|
||||
|
||||
::
|
||||
|
||||
@@ -3824,6 +3930,11 @@ code.
|
||||
|
||||
void memory_barrier();
|
||||
|
||||
Note that this barrier is *not* needed for coordinating reads and writes
|
||||
among the program instances in a gang; it's only needed for coordinating
|
||||
between multiple hardware threads running on different cores. See the
|
||||
section `Data Races Within a Gang`_ for the guarantees provided about
|
||||
memory read/write ordering across a gang.
|
||||
|
||||
Prefetches
|
||||
----------
|
||||
|
||||
@@ -2,6 +2,24 @@
|
||||
ispc News
|
||||
=========
|
||||
|
||||
ispc 1.2.1 is Released
|
||||
----------------------
|
||||
|
||||
This is a bugfix release, fixing approximately 20 bugs in the system and
|
||||
improving error handling and error reporting. New functionality includes
|
||||
very efficient float/half conversion routines thanks to Fabian
|
||||
Giesen. See the `1.2.1 release notes`_ for details.
|
||||
|
||||
.. _1.2.1 release notes: https://github.com/ispc/ispc/tree/master/docs/ReleaseNotes.txt
|
||||
|
||||
ispc 1.2.0 is Released
|
||||
-----------------------
|
||||
|
||||
A new major release was posted on March 20, 2012. This release includes
|
||||
significant new functionality for cleanly handling "structure of arrays"
|
||||
(SoA) data layout and a new model for how uniform and varying are handled
|
||||
with structure types.
|
||||
|
||||
Paper on ispc To Appear in InPar 2012
|
||||
-------------------------------------
|
||||
|
||||
|
||||
@@ -624,7 +624,7 @@ gathers happen.)
|
||||
|
||||
extern "C" {
|
||||
void ISPCInstrument(const char *fn, const char *note,
|
||||
int line, int mask);
|
||||
int line, uint64_t mask);
|
||||
}
|
||||
|
||||
This function is passed the file name of the ``ispc`` file running, a short
|
||||
@@ -637,7 +637,7 @@ as follows:
|
||||
|
||||
::
|
||||
|
||||
ISPCInstrument("foo.ispc", "function entry", 55, 0xf);
|
||||
ISPCInstrument("foo.ispc", "function entry", 55, 0xfull);
|
||||
|
||||
This call indicates that at the currently executing program has just
|
||||
entered the function defined at line 55 of the file ``foo.ispc``, with a
|
||||
|
||||
@@ -31,7 +31,7 @@ PROJECT_NAME = "Intel SPMD Program Compiler"
|
||||
# This could be handy for archiving the generated documentation or
|
||||
# if some version control system is used.
|
||||
|
||||
PROJECT_NUMBER = 1.2.0
|
||||
PROJECT_NUMBER = 1.2.2
|
||||
|
||||
# The OUTPUT_DIRECTORY tag is used to specify the (relative or absolute)
|
||||
# base path where the generated documentation will be put.
|
||||
|
||||
@@ -210,7 +210,7 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
|
||||
{ { 1.0f, 0.0f, -2.2f }, 0.5f } };
|
||||
RNGState rngstate;
|
||||
|
||||
seed_rng(&rngstate, y0);
|
||||
seed_rng(&rngstate, programIndex + (y0 << (programIndex & 15)));
|
||||
float invSamples = 1.f / nsubsamples;
|
||||
|
||||
foreach_tiled(y = y0 ... y1, x = 0 ... w,
|
||||
|
||||
@@ -211,7 +211,7 @@ static void ao_scanlines(uniform int y0, uniform int y1, uniform int w,
|
||||
{ { 1.0f, 0.0f, -2.2f }, 0.5f } };
|
||||
RNGState rngstate;
|
||||
|
||||
seed_rng(&rngstate, y0);
|
||||
seed_rng(&rngstate, programIndex + (y0 << (programIndex & 15)));
|
||||
|
||||
// Compute the mapping between the 'programCount'-wide program
|
||||
// instances running in parallel and samples in the image.
|
||||
|
||||
@@ -87,7 +87,7 @@ int main(int argc, char** argv) {
|
||||
framebuffer.clear();
|
||||
reset_and_start_timer();
|
||||
for (int j = 0; j < nframes; ++j)
|
||||
ispc::RenderStatic(&input->header, &input->arrays,
|
||||
ispc::RenderStatic(input->header, input->arrays,
|
||||
VISUALIZE_LIGHT_COUNT,
|
||||
framebuffer.r, framebuffer.g, framebuffer.b);
|
||||
double mcycles = get_elapsed_mcycles() / nframes;
|
||||
|
||||
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -259,13 +259,13 @@ static FORCEINLINE TYPE NAME(TYPE a, int32_t b) { \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
#define SMEAR(VTYPE, NAME, STYPE) \
|
||||
static FORCEINLINE VTYPE __smear_##NAME(STYPE v) { \
|
||||
VTYPE ret; \
|
||||
for (int i = 0; i < 16; ++i) \
|
||||
ret.v[i] = v; \
|
||||
return ret; \
|
||||
} \
|
||||
#define SMEAR(VTYPE, NAME, STYPE) \
|
||||
static FORCEINLINE VTYPE __smear_##NAME(VTYPE retType, STYPE v) { \
|
||||
VTYPE ret; \
|
||||
for (int i = 0; i < 16; ++i) \
|
||||
ret.v[i] = v; \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
#define BROADCAST(VTYPE, NAME, STYPE) \
|
||||
static FORCEINLINE VTYPE __broadcast_##NAME(VTYPE v, int index) { \
|
||||
@@ -311,8 +311,8 @@ INSERT_EXTRACT(__vec1_d, double)
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// mask ops
|
||||
|
||||
static FORCEINLINE uint32_t __movmsk(__vec16_i1 mask) {
|
||||
return mask.v;
|
||||
static FORCEINLINE uint64_t __movmsk(__vec16_i1 mask) {
|
||||
return (uint64_t)mask.v;
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_i1 __equal(__vec16_i1 a, __vec16_i1 b) {
|
||||
@@ -339,6 +339,24 @@ static FORCEINLINE __vec16_i1 __or(__vec16_i1 a, __vec16_i1 b) {
|
||||
return r;
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_i1 __not(__vec16_i1 v) {
|
||||
__vec16_i1 r;
|
||||
r.v = ~v.v;
|
||||
return r;
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_i1 __and_not1(__vec16_i1 a, __vec16_i1 b) {
|
||||
__vec16_i1 r;
|
||||
r.v = ~a.v & b.v;
|
||||
return r;
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_i1 __and_not2(__vec16_i1 a, __vec16_i1 b) {
|
||||
__vec16_i1 r;
|
||||
r.v = a.v & ~b.v;
|
||||
return r;
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_i1 __select(__vec16_i1 mask, __vec16_i1 a,
|
||||
__vec16_i1 b) {
|
||||
__vec16_i1 r;
|
||||
@@ -374,6 +392,12 @@ static FORCEINLINE void __store(__vec16_i1 *p, __vec16_i1 v, int align) {
|
||||
*ptr = v.v;
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_i1 __smear_i1(__vec16_i1, int v) {
|
||||
return __vec16_i1(v, v, v, v, v, v, v, v,
|
||||
v, v, v, v, v, v, v, v);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// int8
|
||||
|
||||
@@ -581,6 +605,121 @@ ROTATE(__vec16_f, float, float)
|
||||
SHUFFLES(__vec16_f, float, float)
|
||||
LOAD_STORE(__vec16_f, float)
|
||||
|
||||
static FORCEINLINE float __exp_uniform_float(float v) {
|
||||
return expf(v);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_f __exp_varying_float(__vec16_f v) {
|
||||
__vec16_f ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
ret.v[i] = expf(v.v[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static FORCEINLINE float __log_uniform_float(float v) {
|
||||
return logf(v);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_f __log_varying_float(__vec16_f v) {
|
||||
__vec16_f ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
ret.v[i] = logf(v.v[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static FORCEINLINE float __pow_uniform_float(float a, float b) {
|
||||
return powf(a, b);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec16_f __pow_varying_float(__vec16_f a, __vec16_f b) {
|
||||
__vec16_f ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
ret.v[i] = powf(a.v[i], b.v[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static FORCEINLINE int __intbits(float v) {
|
||||
union {
|
||||
float f;
|
||||
int i;
|
||||
} u;
|
||||
u.f = v;
|
||||
return u.i;
|
||||
}
|
||||
|
||||
static FORCEINLINE float __floatbits(int v) {
|
||||
union {
|
||||
float f;
|
||||
int i;
|
||||
} u;
|
||||
u.i = v;
|
||||
return u.f;
|
||||
}
|
||||
|
||||
static FORCEINLINE float __half_to_float_uniform(int16_t h) {
|
||||
static const uint32_t shifted_exp = 0x7c00 << 13; // exponent mask after shift
|
||||
|
||||
int32_t o = ((int32_t)(h & 0x7fff)) << 13; // exponent/mantissa bits
|
||||
uint32_t exp = shifted_exp & o; // just the exponent
|
||||
o += (127 - 15) << 23; // exponent adjust
|
||||
|
||||
// handle exponent special cases
|
||||
if (exp == shifted_exp) // Inf/NaN?
|
||||
o += (128 - 16) << 23; // extra exp adjust
|
||||
else if (exp == 0) { // Zero/Denormal?
|
||||
o += 1 << 23; // extra exp adjust
|
||||
o = __intbits(__floatbits(o) - __floatbits(113 << 23)); // renormalize
|
||||
}
|
||||
|
||||
o |= ((int32_t)(h & 0x8000)) << 16; // sign bit
|
||||
return __floatbits(o);
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE __vec16_f __half_to_float_varying(__vec16_i16 v) {
|
||||
__vec16_f ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
ret.v[i] = __half_to_float_uniform(v.v[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE int16_t __float_to_half_uniform(float f) {
|
||||
uint32_t sign_mask = 0x80000000u;
|
||||
int32_t o;
|
||||
|
||||
int32_t fint = __intbits(f);
|
||||
int32_t sign = fint & sign_mask;
|
||||
fint ^= sign;
|
||||
|
||||
int32_t f32infty = 255 << 23;
|
||||
o = (fint > f32infty) ? 0x7e00 : 0x7c00;
|
||||
|
||||
// (De)normalized number or zero
|
||||
// update fint unconditionally to save the blending; we don't need it
|
||||
// anymore for the Inf/NaN case anyway.
|
||||
const uint32_t round_mask = ~0xfffu;
|
||||
const int32_t magic = 15 << 23;
|
||||
const int32_t f16infty = 31 << 23;
|
||||
|
||||
int32_t fint2 = __intbits(__floatbits(fint & round_mask) * __floatbits(magic)) - round_mask;
|
||||
fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed
|
||||
|
||||
if (fint < f32infty)
|
||||
o = fint2 >> 13; // Take the bits!
|
||||
|
||||
return (o | (sign >> 16));
|
||||
}
|
||||
|
||||
|
||||
static FORCEINLINE __vec16_i16 __float_to_half_varying(__vec16_f v) {
|
||||
__vec16_i16 ret;
|
||||
for (int i = 0; i < 16; ++i)
|
||||
ret.v[i] = __float_to_half_uniform(v.v[i]);
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// double
|
||||
|
||||
|
||||
1688
examples/intrinsics/generic-32.h
Normal file
1688
examples/intrinsics/generic-32.h
Normal file
File diff suppressed because it is too large
Load Diff
1817
examples/intrinsics/generic-64.h
Normal file
1817
examples/intrinsics/generic-64.h
Normal file
File diff suppressed because it is too large
Load Diff
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -224,8 +224,8 @@ CAST_BITS_SCALAR(double, int64_t)
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// mask ops
|
||||
|
||||
static FORCEINLINE uint32_t __movmsk(__vec4_i1 mask) {
|
||||
return _mm_movemask_ps(mask.v);
|
||||
static FORCEINLINE uint64_t __movmsk(__vec4_i1 mask) {
|
||||
return (uint64_t)_mm_movemask_ps(mask.v);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i1 __equal(__vec4_i1 a, __vec4_i1 b) {
|
||||
@@ -266,6 +266,10 @@ static FORCEINLINE void __store(__vec4_i1 *p, __vec4_i1 value, int align) {
|
||||
_mm_storeu_ps((float *)(&p->v), value.v);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i1 __smear_i1(__vec4_i1, int v) {
|
||||
return __vec4_i1(v, v, v, v);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// int8
|
||||
|
||||
@@ -489,7 +493,7 @@ static FORCEINLINE void __insert_element(__vec4_i8 *v, int index, int8_t val) {
|
||||
((int8_t *)v)[index] = val;
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i8 __smear_i8(int8_t v) {
|
||||
static FORCEINLINE __vec4_i8 __smear_i8(__vec4_i8, int8_t v) {
|
||||
return _mm_set1_epi8(v);
|
||||
}
|
||||
|
||||
@@ -748,7 +752,7 @@ static FORCEINLINE void __insert_element(__vec4_i16 *v, int index, int16_t val)
|
||||
((int16_t *)v)[index] = val;
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i16 __smear_i16(int16_t v) {
|
||||
static FORCEINLINE __vec4_i16 __smear_i16(__vec4_i16, int16_t v) {
|
||||
return _mm_set1_epi16(v);
|
||||
}
|
||||
|
||||
@@ -985,7 +989,7 @@ static FORCEINLINE __vec4_i32 __select(__vec4_i1 mask, __vec4_i32 a, __vec4_i32
|
||||
_mm_castsi128_ps(a.v), mask.v));
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i32 __smear_i32(int32_t v) {
|
||||
static FORCEINLINE __vec4_i32 __smear_i32(__vec4_i32, int32_t v) {
|
||||
return _mm_set1_epi32(v);
|
||||
}
|
||||
|
||||
@@ -1246,7 +1250,7 @@ static FORCEINLINE __vec4_i64 __select(__vec4_i1 mask, __vec4_i64 a, __vec4_i64
|
||||
return __vec4_i64(_mm_castpd_si128(r0), _mm_castpd_si128(r1));
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i64 __smear_i64(int64_t v) {
|
||||
static FORCEINLINE __vec4_i64 __smear_i64(__vec4_i64, int64_t v) {
|
||||
return __vec4_i64(v, v, v, v);
|
||||
}
|
||||
|
||||
@@ -1350,7 +1354,7 @@ static FORCEINLINE __vec4_f __select(__vec4_i1 mask, __vec4_f a, __vec4_f b) {
|
||||
return _mm_blendv_ps(b.v, a.v, mask.v);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_f __smear_float(float v) {
|
||||
static FORCEINLINE __vec4_f __smear_float(__vec4_f, float v) {
|
||||
return _mm_set1_ps(v);
|
||||
}
|
||||
|
||||
@@ -1482,7 +1486,7 @@ static FORCEINLINE __vec4_d __select(__vec4_i1 mask, __vec4_d a, __vec4_d b) {
|
||||
return __vec4_d(r0, r1);
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_d __smear_double(double v) {
|
||||
static FORCEINLINE __vec4_d __smear_double(__vec4_d, double v) {
|
||||
return __vec4_d(_mm_set1_pd(v), _mm_set1_pd(v));
|
||||
}
|
||||
|
||||
@@ -1582,11 +1586,13 @@ static FORCEINLINE __vec4_i16 __cast_sext(__vec4_i16, __vec4_i8 val) {
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i8 __cast_sext(__vec4_i8, __vec4_i1 v) {
|
||||
return __select(v, __smear_i8(0xff), __smear_i8(0));
|
||||
return __select(v, __smear_i8(__vec4_i8(), 0xff),
|
||||
__smear_i8(__vec4_i8(), 0));
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i16 __cast_sext(__vec4_i16, __vec4_i1 v) {
|
||||
return __select(v, __smear_i16(0xffff), __smear_i16(0));
|
||||
return __select(v, __smear_i16(__vec4_i16(), 0xffff),
|
||||
__smear_i16(__vec4_i16(), 0));
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i32 __cast_sext(__vec4_i32, __vec4_i1 v) {
|
||||
@@ -1646,11 +1652,12 @@ static FORCEINLINE __vec4_i16 __cast_zext(__vec4_i16, __vec4_i8 val) {
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i8 __cast_zext(__vec4_i8, __vec4_i1 v) {
|
||||
return __select(v, __smear_i8(1), __smear_i8(0));
|
||||
return __select(v, __smear_i8(__vec4_i8(), 1), __smear_i8(__vec4_i8(), 0));
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i16 __cast_zext(__vec4_i16, __vec4_i1 v) {
|
||||
return __select(v, __smear_i16(1), __smear_i16(0));
|
||||
return __select(v, __smear_i16(__vec4_i16(), 1),
|
||||
__smear_i16(__vec4_i16(), 0));
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i32 __cast_zext(__vec4_i32, __vec4_i1 v) {
|
||||
@@ -1658,7 +1665,7 @@ static FORCEINLINE __vec4_i32 __cast_zext(__vec4_i32, __vec4_i1 v) {
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_i64 __cast_zext(__vec4_i64, __vec4_i1 v) {
|
||||
return __select(v, __smear_i64(1), __smear_i64(0));
|
||||
return __select(v, __smear_i64(__vec4_i64(), 1), __smear_i64(__vec4_i64(), 0));
|
||||
}
|
||||
|
||||
// truncations
|
||||
@@ -1818,11 +1825,11 @@ static FORCEINLINE __vec4_d __cast_uitofp(__vec4_d, __vec4_i64 val) {
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_f __cast_uitofp(__vec4_f, __vec4_i1 v) {
|
||||
return __select(v, __smear_float(1.), __smear_float(0.));
|
||||
return __select(v, __smear_float(__vec4_f(), 1.), __smear_float(__vec4_f(), 0.));
|
||||
}
|
||||
|
||||
static FORCEINLINE __vec4_d __cast_uitofp(__vec4_d, __vec4_i1 v) {
|
||||
return __select(v, __smear_double(1.), __smear_double(0.));
|
||||
return __select(v, __smear_double(__vec4_d(), 1.), __smear_double(__vec4_d(), 0.));
|
||||
}
|
||||
|
||||
// float/double to signed int
|
||||
@@ -2613,8 +2620,8 @@ lGatherBaseOffsets32(RetVec, RetScalar, unsigned char *p, __vec4_i32 offsets,
|
||||
RetScalar r[4];
|
||||
#if 1
|
||||
// "Fast gather" trick...
|
||||
offsets = __select(mask, offsets, __smear_i32(0));
|
||||
constOffset = __select(mask, constOffset, __smear_i32(0));
|
||||
offsets = __select(mask, offsets, __smear_i32(__vec4_i32(), 0));
|
||||
constOffset = __select(mask, constOffset, __smear_i32(__vec4_i32(), 0));
|
||||
|
||||
int offset = scale * _mm_extract_epi32(offsets.v, 0) + _mm_extract_epi32(constOffset.v, 0);
|
||||
RetScalar *ptr = (RetScalar *)(p + offset);
|
||||
@@ -2671,8 +2678,8 @@ lGatherBaseOffsets64(RetVec, RetScalar, unsigned char *p, __vec4_i64 offsets,
|
||||
RetScalar r[4];
|
||||
#if 1
|
||||
// "Fast gather" trick...
|
||||
offsets = __select(mask, offsets, __smear_i64(0));
|
||||
constOffset = __select(mask, constOffset, __smear_i64(0));
|
||||
offsets = __select(mask, offsets, __smear_i64(__vec4_i64(), 0));
|
||||
constOffset = __select(mask, constOffset, __smear_i64(__vec4_i64(), 0));
|
||||
|
||||
int64_t offset = scale * _mm_extract_epi64(offsets.v[0], 0) + _mm_extract_epi64(constOffset.v[0], 0);
|
||||
RetScalar *ptr = (RetScalar *)(p + offset);
|
||||
@@ -2756,8 +2763,8 @@ __gather_base_offsets32_i32(uint8_t *p, __vec4_i32 offsets, uint32_t scale,
|
||||
__m128i r = _mm_set_epi32(0, 0, 0, 0);
|
||||
#if 1
|
||||
// "Fast gather"...
|
||||
offsets = __select(mask, offsets, __smear_i32(0));
|
||||
constOffset = __select(mask, constOffset, __smear_i32(0));
|
||||
offsets = __select(mask, offsets, __smear_i32(__vec4_i32(), 0));
|
||||
constOffset = __select(mask, constOffset, __smear_i32(__vec4_i32(), 0));
|
||||
|
||||
int offset = scale * _mm_extract_epi32(offsets.v, 0) +
|
||||
_mm_extract_epi32(constOffset.v, 0);
|
||||
|
||||
@@ -43,9 +43,15 @@ extern "C" {
|
||||
#endif /* __cplusplus */
|
||||
__inline__ uint64_t rdtsc() {
|
||||
uint32_t low, high;
|
||||
#ifdef __x86_64
|
||||
__asm__ __volatile__ (
|
||||
"xorl %%eax,%%eax \n cpuid"
|
||||
::: "%rax", "%rbx", "%rcx", "%rdx" );
|
||||
#else
|
||||
__asm__ __volatile__ (
|
||||
"xorl %%eax,%%eax \n cpuid"
|
||||
::: "%eax", "%ebx", "%ecx", "%edx" );
|
||||
#endif
|
||||
__asm__ __volatile__ (
|
||||
"rdtsc" : "=a" (low), "=d" (high));
|
||||
return (uint64_t)high << 32 | low;
|
||||
|
||||
30
expr.h
30
expr.h
@@ -284,6 +284,10 @@ public:
|
||||
int EstimateCost() const;
|
||||
|
||||
Expr *baseExpr, *index;
|
||||
|
||||
private:
|
||||
mutable const Type *type;
|
||||
mutable const PointerType *lvalueType;
|
||||
};
|
||||
|
||||
|
||||
@@ -320,6 +324,9 @@ public:
|
||||
member is found. (i.e. this is true if the MemberExpr was a '->'
|
||||
operator, and is false if it was a '.' operator. */
|
||||
bool dereferenceExpr;
|
||||
|
||||
protected:
|
||||
mutable const Type *type, *lvalueType;
|
||||
};
|
||||
|
||||
|
||||
@@ -584,6 +591,7 @@ public:
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
int EstimateCost() const;
|
||||
llvm::Constant *GetConstant(const Type *type) const;
|
||||
|
||||
Expr *expr;
|
||||
};
|
||||
@@ -651,20 +659,26 @@ public:
|
||||
function overloading, this method resolves which actual function
|
||||
the arguments match best. If the argCouldBeNULL parameter is
|
||||
non-NULL, each element indicates whether the corresponding argument
|
||||
is the number zero, indicating that it could be a NULL pointer.
|
||||
This parameter may be NULL (for cases where overload resolution is
|
||||
being done just given type information without the parameter
|
||||
argument expressions being available. It returns true on success.
|
||||
is the number zero, indicating that it could be a NULL pointer, and
|
||||
if argIsConstant is non-NULL, each element indicates whether the
|
||||
corresponding argument is a compile-time constant value. Both of
|
||||
these parameters may be NULL (for cases where overload resolution
|
||||
is being done just given type information without the parameter
|
||||
argument expressions being available. This function returns true
|
||||
on success.
|
||||
*/
|
||||
bool ResolveOverloads(SourcePos argPos,
|
||||
const std::vector<const Type *> &argTypes,
|
||||
const std::vector<bool> *argCouldBeNULL = NULL);
|
||||
const std::vector<bool> *argCouldBeNULL = NULL,
|
||||
const std::vector<bool> *argIsConstant = NULL);
|
||||
Symbol *GetMatchingFunction();
|
||||
|
||||
private:
|
||||
bool tryResolve(int (*matchFunc)(const Type *, const Type *),
|
||||
SourcePos argPos, const std::vector<const Type *> &argTypes,
|
||||
const std::vector<bool> *argCouldBeNULL);
|
||||
std::vector<Symbol *> getCandidateFunctions(int argCount) const;
|
||||
static int computeOverloadCost(const FunctionType *ftype,
|
||||
const std::vector<const Type *> &argTypes,
|
||||
const std::vector<bool> *argCouldBeNULL,
|
||||
const std::vector<bool> *argIsConstant);
|
||||
|
||||
/** Name of the function that is being called. */
|
||||
std::string name;
|
||||
|
||||
50
func.cpp
50
func.cpp
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
Copyright (c) 2011-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -66,9 +66,8 @@
|
||||
#include <llvm/Support/ToolOutputFile.h>
|
||||
#include <llvm/Assembly/PrintModulePass.h>
|
||||
|
||||
Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
Function::Function(Symbol *s, Stmt *c) {
|
||||
sym = s;
|
||||
args = a;
|
||||
code = c;
|
||||
|
||||
maskSymbol = m->symbolTable->LookupVariable("__mask");
|
||||
@@ -101,12 +100,20 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
printf("\n\n\n");
|
||||
}
|
||||
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
const FunctionType *type = CastType<FunctionType>(sym->type);
|
||||
Assert(type != NULL);
|
||||
|
||||
for (unsigned int i = 0; i < args.size(); ++i)
|
||||
if (dynamic_cast<const ReferenceType *>(args[i]->type) == NULL)
|
||||
args[i]->parentFunction = this;
|
||||
for (int i = 0; i < type->GetNumParameters(); ++i) {
|
||||
const char *paramName = type->GetParameterName(i).c_str();
|
||||
Symbol *sym = m->symbolTable->LookupVariable(paramName);
|
||||
if (sym == NULL)
|
||||
Assert(strncmp(paramName, "__anon_parameter_", 17) == 0);
|
||||
args.push_back(sym);
|
||||
|
||||
const Type *t = type->GetParameterType(i);
|
||||
if (sym != NULL && CastType<ReferenceType>(t) == NULL)
|
||||
sym->parentFunction = this;
|
||||
}
|
||||
|
||||
if (type->isTask) {
|
||||
threadIndexSym = m->symbolTable->LookupVariable("threadIndex");
|
||||
@@ -125,7 +132,7 @@ Function::Function(Symbol *s, const std::vector<Symbol *> &a, Stmt *c) {
|
||||
|
||||
const Type *
|
||||
Function::GetReturnType() const {
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
const FunctionType *type = CastType<FunctionType>(sym->type);
|
||||
Assert(type != NULL);
|
||||
return type->GetReturnType();
|
||||
}
|
||||
@@ -133,7 +140,7 @@ Function::GetReturnType() const {
|
||||
|
||||
const FunctionType *
|
||||
Function::GetType() const {
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
const FunctionType *type = CastType<FunctionType>(sym->type);
|
||||
Assert(type != NULL);
|
||||
return type;
|
||||
}
|
||||
@@ -145,7 +152,8 @@ Function::GetType() const {
|
||||
'mem2reg' pass will in turn promote to SSA registers..
|
||||
*/
|
||||
static void
|
||||
lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol *> &args,
|
||||
lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const
|
||||
std::vector<Symbol *> &args,
|
||||
FunctionEmitContext *ctx) {
|
||||
// We expect the argument structure to come in as a poitner to a
|
||||
// structure. Confirm and figure out its type here.
|
||||
@@ -157,9 +165,13 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol
|
||||
llvm::dyn_cast<const llvm::StructType>(pt->getElementType());
|
||||
|
||||
// Get the type of the argument we're copying in and its Symbol pointer
|
||||
LLVM_TYPE_CONST llvm::Type *argType = argStructType->getElementType(i);
|
||||
llvm::Type *argType = argStructType->getElementType(i);
|
||||
Symbol *sym = args[i];
|
||||
|
||||
if (sym == NULL)
|
||||
// anonymous parameter, so don't worry about it
|
||||
return;
|
||||
|
||||
// allocate space to copy the parameter in to
|
||||
sym->storagePtr = ctx->AllocaInst(argType, sym->name.c_str());
|
||||
|
||||
@@ -170,7 +182,7 @@ lCopyInTaskParameter(int i, llvm::Value *structArgPtr, const std::vector<Symbol
|
||||
// memory
|
||||
llvm::Value *ptrval = ctx->LoadInst(ptr, sym->name.c_str());
|
||||
ctx->StoreInst(ptrval, sym->storagePtr);
|
||||
ctx->EmitFunctionParameterDebugInfo(sym);
|
||||
ctx->EmitFunctionParameterDebugInfo(sym, i);
|
||||
}
|
||||
|
||||
|
||||
@@ -186,14 +198,14 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
// value
|
||||
maskSymbol->storagePtr = ctx->GetFullMaskPointer();
|
||||
|
||||
// add debugging info for __mask, programIndex, ...
|
||||
// add debugging info for __mask
|
||||
maskSymbol->pos = firstStmtPos;
|
||||
ctx->EmitVariableDebugInfo(maskSymbol);
|
||||
|
||||
#if 0
|
||||
llvm::BasicBlock *entryBBlock = ctx->GetCurrentBasicBlock();
|
||||
#endif
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
const FunctionType *type = CastType<FunctionType>(sym->type);
|
||||
Assert(type != NULL);
|
||||
if (type->isTask == true) {
|
||||
// For tasks, we there should always be three parmeters: the
|
||||
@@ -240,13 +252,17 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
||||
llvm::Function::arg_iterator argIter = function->arg_begin();
|
||||
for (unsigned int i = 0; i < args.size(); ++i, ++argIter) {
|
||||
Symbol *sym = args[i];
|
||||
if (sym == NULL)
|
||||
// anonymous function parameter
|
||||
continue;
|
||||
|
||||
argIter->setName(sym->name.c_str());
|
||||
|
||||
// Allocate stack storage for the parameter and emit code
|
||||
// to store the its value there.
|
||||
sym->storagePtr = ctx->AllocaInst(argIter->getType(), sym->name.c_str());
|
||||
ctx->StoreInst(argIter, sym->storagePtr);
|
||||
ctx->EmitFunctionParameterDebugInfo(sym);
|
||||
ctx->EmitFunctionParameterDebugInfo(sym, i);
|
||||
}
|
||||
|
||||
// If the number of actual function arguments is equal to the
|
||||
@@ -415,11 +431,11 @@ Function::GenerateIR() {
|
||||
// If the function is 'export'-qualified, emit a second version of
|
||||
// it without a mask parameter and without name mangling so that
|
||||
// the application can call it
|
||||
const FunctionType *type = dynamic_cast<const FunctionType *>(sym->type);
|
||||
const FunctionType *type = CastType<FunctionType>(sym->type);
|
||||
Assert(type != NULL);
|
||||
if (type->isExported) {
|
||||
if (!type->isTask) {
|
||||
LLVM_TYPE_CONST llvm::FunctionType *ftype =
|
||||
llvm::FunctionType *ftype =
|
||||
type->LLVMFunctionType(g->ctx);
|
||||
llvm::GlobalValue::LinkageTypes linkage = llvm::GlobalValue::ExternalLinkage;
|
||||
std::string functionName = sym->name;
|
||||
|
||||
4
func.h
4
func.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2011, Intel Corporation
|
||||
Copyright (c) 2011-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -43,7 +43,7 @@
|
||||
|
||||
class Function {
|
||||
public:
|
||||
Function(Symbol *sym, const std::vector<Symbol *> &args, Stmt *code);
|
||||
Function(Symbol *sym, Stmt *code);
|
||||
|
||||
const Type *GetReturnType() const;
|
||||
const FunctionType *GetType() const;
|
||||
|
||||
278
ispc.cpp
278
ispc.cpp
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -54,14 +54,8 @@
|
||||
#include <llvm/Target/TargetMachine.h>
|
||||
#include <llvm/Target/TargetOptions.h>
|
||||
#include <llvm/Target/TargetData.h>
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
#include <llvm/Support/TargetRegistry.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
#else
|
||||
#include <llvm/Target/TargetRegistry.h>
|
||||
#include <llvm/Target/TargetSelect.h>
|
||||
#include <llvm/Target/SubtargetFeature.h>
|
||||
#endif
|
||||
#include <llvm/Support/TargetRegistry.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
#include <llvm/Support/Host.h>
|
||||
|
||||
Globals *g;
|
||||
@@ -70,9 +64,82 @@ Module *m;
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Target
|
||||
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
static void __cpuid(int info[4], int infoType) {
|
||||
__asm__ __volatile__ ("cpuid"
|
||||
: "=a" (info[0]), "=b" (info[1]), "=c" (info[2]), "=d" (info[3])
|
||||
: "0" (infoType));
|
||||
}
|
||||
|
||||
/* Save %ebx in case it's the PIC register */
|
||||
static void __cpuidex(int info[4], int level, int count) {
|
||||
__asm__ __volatile__ ("xchg{l}\t{%%}ebx, %1\n\t"
|
||||
"cpuid\n\t"
|
||||
"xchg{l}\t{%%}ebx, %1\n\t"
|
||||
: "=a" (info[0]), "=r" (info[1]), "=c" (info[2]), "=d" (info[3])
|
||||
: "0" (level), "2" (count));
|
||||
}
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
|
||||
|
||||
static const char *
|
||||
lGetSystemISA() {
|
||||
int info[4];
|
||||
__cpuid(info, 1);
|
||||
|
||||
if ((info[2] & (1 << 28)) != 0) {
|
||||
// AVX1 for sure. Do we have AVX2?
|
||||
// Call cpuid with eax=7, ecx=0
|
||||
__cpuidex(info, 7, 0);
|
||||
if ((info[1] & (1 << 5)) != 0)
|
||||
return "avx2";
|
||||
else
|
||||
return "avx";
|
||||
}
|
||||
else if ((info[2] & (1 << 19)) != 0)
|
||||
return "sse4";
|
||||
else if ((info[3] & (1 << 26)) != 0)
|
||||
return "sse2";
|
||||
else {
|
||||
fprintf(stderr, "Unable to detect supported SSE/AVX ISA. Exiting.\n");
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static const char *supportedCPUs[] = {
|
||||
"atom", "penryn", "core2", "corei7", "corei7-avx"
|
||||
};
|
||||
|
||||
|
||||
bool
|
||||
Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
bool pic, Target *t) {
|
||||
if (isa == NULL) {
|
||||
if (cpu != NULL) {
|
||||
// If a CPU was specified explicitly, try to pick the best
|
||||
// possible ISA based on that.
|
||||
if (!strcmp(cpu, "sandybridge") ||
|
||||
!strcmp(cpu, "corei7-avx"))
|
||||
isa = "avx";
|
||||
else if (!strcmp(cpu, "corei7") ||
|
||||
!strcmp(cpu, "penryn"))
|
||||
isa = "sse4";
|
||||
else
|
||||
isa = "sse2";
|
||||
fprintf(stderr, "Notice: no --target specified on command-line. "
|
||||
"Using ISA \"%s\" based on specified CPU \"%s\".\n", isa,
|
||||
cpu);
|
||||
}
|
||||
else {
|
||||
// No CPU and no ISA, so use CPUID to figure out what this CPU
|
||||
// supports.
|
||||
isa = lGetSystemISA();
|
||||
fprintf(stderr, "Notice: no --target specified on command-line. "
|
||||
"Using system ISA \"%s\".\n", isa);
|
||||
}
|
||||
}
|
||||
|
||||
if (cpu == NULL) {
|
||||
std::string hostCPU = llvm::sys::getHostCPUName();
|
||||
if (hostCPU.size() > 0)
|
||||
@@ -82,19 +149,24 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
cpu = "generic";
|
||||
}
|
||||
}
|
||||
else {
|
||||
bool foundCPU = false;
|
||||
for (int i = 0; i < int(sizeof(supportedCPUs) / sizeof(supportedCPUs[0]));
|
||||
++i) {
|
||||
if (!strcmp(cpu, supportedCPUs[i])) {
|
||||
foundCPU = true;
|
||||
break;
|
||||
}
|
||||
}
|
||||
if (foundCPU == false) {
|
||||
fprintf(stderr, "Error: CPU type \"%s\" unknown. Supported CPUs: "
|
||||
"%s.\n", cpu, SupportedTargetCPUs().c_str());
|
||||
return false;
|
||||
}
|
||||
}
|
||||
|
||||
t->cpu = cpu;
|
||||
|
||||
if (isa == NULL) {
|
||||
if (!strcasecmp(cpu, "atom"))
|
||||
isa = "sse2";
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
else if (!strcasecmp(cpu, "sandybridge") ||
|
||||
!strcasecmp(cpu, "corei7-avx"))
|
||||
isa = "avx";
|
||||
#endif // LLVM_3_0
|
||||
else
|
||||
isa = "sse4";
|
||||
}
|
||||
if (arch == NULL)
|
||||
arch = "x86-64";
|
||||
|
||||
@@ -125,13 +197,15 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->arch = arch;
|
||||
}
|
||||
|
||||
// This is the case for most of them
|
||||
t->hasHalf = t->hasTranscendentals = false;
|
||||
|
||||
if (!strcasecmp(isa, "sse2")) {
|
||||
t->isa = Target::SSE2;
|
||||
t->nativeVectorWidth = 4;
|
||||
t->vectorWidth = 4;
|
||||
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse2-x2")) {
|
||||
@@ -140,7 +214,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4")) {
|
||||
@@ -149,7 +222,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->vectorWidth = 4;
|
||||
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2")) {
|
||||
@@ -158,7 +230,6 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-4")) {
|
||||
@@ -166,41 +237,59 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->nativeVectorWidth = 4;
|
||||
t->vectorWidth = 4;
|
||||
t->maskingIsFree = true;
|
||||
t->allOffMaskIsSafe = true;
|
||||
t->maskBitCount = 1;
|
||||
t->hasHalf = true;
|
||||
t->hasTranscendentals = true;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-8")) {
|
||||
t->isa = Target::GENERIC;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
t->maskingIsFree = true;
|
||||
t->allOffMaskIsSafe = true;
|
||||
t->maskBitCount = 1;
|
||||
t->hasHalf = true;
|
||||
t->hasTranscendentals = true;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-16")) {
|
||||
t->isa = Target::GENERIC;
|
||||
t->nativeVectorWidth = 16;
|
||||
t->vectorWidth = 16;
|
||||
t->maskingIsFree = true;
|
||||
t->allOffMaskIsSafe = true;
|
||||
t->maskBitCount = 1;
|
||||
t->hasHalf = true;
|
||||
t->hasTranscendentals = true;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-32")) {
|
||||
t->isa = Target::GENERIC;
|
||||
t->nativeVectorWidth = 32;
|
||||
t->vectorWidth = 32;
|
||||
t->maskingIsFree = true;
|
||||
t->maskBitCount = 1;
|
||||
t->hasHalf = true;
|
||||
t->hasTranscendentals = true;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-64")) {
|
||||
t->isa = Target::GENERIC;
|
||||
t->nativeVectorWidth = 64;
|
||||
t->vectorWidth = 64;
|
||||
t->maskingIsFree = true;
|
||||
t->maskBitCount = 1;
|
||||
t->hasHalf = true;
|
||||
t->hasTranscendentals = true;
|
||||
}
|
||||
else if (!strcasecmp(isa, "generic-1")) {
|
||||
t->isa = Target::GENERIC;
|
||||
t->nativeVectorWidth = 1;
|
||||
t->vectorWidth = 1;
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
else if (!strcasecmp(isa, "avx")) {
|
||||
t->isa = Target::AVX;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+avx,+popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx-x2")) {
|
||||
@@ -209,19 +298,17 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->vectorWidth = 16;
|
||||
t->attributes = "+avx,+popcnt,+cmov";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
}
|
||||
#endif // LLVM 3.0+
|
||||
#if defined(LLVM_3_1svn)
|
||||
#ifndef LLVM_3_0
|
||||
else if (!strcasecmp(isa, "avx2")) {
|
||||
t->isa = Target::AVX2;
|
||||
t->nativeVectorWidth = 8;
|
||||
t->vectorWidth = 8;
|
||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
t->hasHalf = true;
|
||||
}
|
||||
else if (!strcasecmp(isa, "avx2-x2")) {
|
||||
t->isa = Target::AVX2;
|
||||
@@ -229,10 +316,10 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
t->vectorWidth = 16;
|
||||
t->attributes = "+avx2,+popcnt,+cmov,+f16c";
|
||||
t->maskingIsFree = false;
|
||||
t->allOffMaskIsSafe = false;
|
||||
t->maskBitCount = 32;
|
||||
t->hasHalf = true;
|
||||
}
|
||||
#endif // LLVM 3.1
|
||||
#endif // !LLVM_3_0
|
||||
else {
|
||||
fprintf(stderr, "Target ISA \"%s\" is unknown. Choices are: %s\n",
|
||||
isa, SupportedTargetISAs());
|
||||
@@ -243,23 +330,23 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
||||
llvm::TargetMachine *targetMachine = t->GetTargetMachine();
|
||||
const llvm::TargetData *targetData = targetMachine->getTargetData();
|
||||
t->is32Bit = (targetData->getPointerSize() == 4);
|
||||
Assert(t->vectorWidth <= ISPC_MAX_NVEC);
|
||||
}
|
||||
|
||||
return !error;
|
||||
}
|
||||
|
||||
|
||||
const char *
|
||||
std::string
|
||||
Target::SupportedTargetCPUs() {
|
||||
return "atom, barcelona, core2, corei7, "
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
"corei7-avx, "
|
||||
#endif
|
||||
"istanbul, nocona, penryn, "
|
||||
#ifdef LLVM_2_9
|
||||
"sandybridge, "
|
||||
#endif
|
||||
"westmere";
|
||||
std::string ret;
|
||||
int count = sizeof(supportedCPUs) / sizeof(supportedCPUs[0]);
|
||||
for (int i = 0; i < count; ++i) {
|
||||
ret += supportedCPUs[i];
|
||||
if (i != count - 1)
|
||||
ret += ", ";
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
@@ -271,14 +358,11 @@ Target::SupportedTargetArchs() {
|
||||
|
||||
const char *
|
||||
Target::SupportedTargetISAs() {
|
||||
return "sse2, sse2-x2, sse4, sse4-x2"
|
||||
#ifndef LLVM_2_9
|
||||
", avx, avx-x2"
|
||||
#endif // !LLVM_2_9
|
||||
#ifdef LLVM_3_1svn
|
||||
return "sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2"
|
||||
#ifndef LLVM_3_0
|
||||
", avx2, avx2-x2"
|
||||
#endif // LLVM_3_1svn
|
||||
", generic-4, generic-8, generic-16, generic-1";
|
||||
#endif // !LLVM_3_0
|
||||
", generic-1, generic-4, generic-8, generic-16, generic-32";
|
||||
}
|
||||
|
||||
|
||||
@@ -286,10 +370,10 @@ std::string
|
||||
Target::GetTripleString() const {
|
||||
llvm::Triple triple;
|
||||
// Start with the host triple as the default
|
||||
#if defined(LLVM_3_1) || defined(LLVM_3_1svn)
|
||||
triple.setTriple(llvm::sys::getDefaultTargetTriple());
|
||||
#else
|
||||
#ifdef LLVM_3_0
|
||||
triple.setTriple(llvm::sys::getHostTriple());
|
||||
#else
|
||||
triple.setTriple(llvm::sys::getDefaultTargetTriple());
|
||||
#endif
|
||||
|
||||
// And override the arch in the host triple based on what the user
|
||||
@@ -315,30 +399,17 @@ Target::GetTargetMachine() const {
|
||||
|
||||
llvm::Reloc::Model relocModel = generatePIC ? llvm::Reloc::PIC_ :
|
||||
llvm::Reloc::Default;
|
||||
#if defined(LLVM_3_1svn)
|
||||
std::string featuresString = attributes;
|
||||
llvm::TargetOptions options;
|
||||
if (g->opt.fastMath == true)
|
||||
options.UnsafeFPMath = 1;
|
||||
llvm::TargetMachine *targetMachine =
|
||||
target->createTargetMachine(triple, cpu, featuresString, options,
|
||||
relocModel);
|
||||
#elif defined(LLVM_3_0)
|
||||
#ifdef LLVM_3_0
|
||||
std::string featuresString = attributes;
|
||||
llvm::TargetMachine *targetMachine =
|
||||
target->createTargetMachine(triple, cpu, featuresString, relocModel);
|
||||
#else // LLVM 2.9
|
||||
#ifdef ISPC_IS_APPLE
|
||||
relocModel = llvm::Reloc::PIC_;
|
||||
#endif // ISPC_IS_APPLE
|
||||
std::string featuresString = cpu + std::string(",") + attributes;
|
||||
#else
|
||||
std::string featuresString = attributes;
|
||||
llvm::TargetOptions options;
|
||||
llvm::TargetMachine *targetMachine =
|
||||
target->createTargetMachine(triple, featuresString);
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
targetMachine->setRelocationModel(relocModel);
|
||||
#endif // !ISPC_IS_WINDOWS
|
||||
#endif // LLVM_2_9
|
||||
|
||||
target->createTargetMachine(triple, cpu, featuresString, options,
|
||||
relocModel);
|
||||
#endif // !LLVM_3_0
|
||||
Assert(targetMachine != NULL);
|
||||
|
||||
targetMachine->setAsmVerbosityDefault(true);
|
||||
@@ -367,7 +438,7 @@ Target::GetISAString() const {
|
||||
|
||||
|
||||
static bool
|
||||
lGenericTypeLayoutIndeterminate(LLVM_TYPE_CONST llvm::Type *type) {
|
||||
lGenericTypeLayoutIndeterminate(llvm::Type *type) {
|
||||
if (type->isPrimitiveType() || type->isIntegerTy())
|
||||
return false;
|
||||
|
||||
@@ -376,18 +447,18 @@ lGenericTypeLayoutIndeterminate(LLVM_TYPE_CONST llvm::Type *type) {
|
||||
type == LLVMTypes::Int1VectorType)
|
||||
return true;
|
||||
|
||||
LLVM_TYPE_CONST llvm::ArrayType *at =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(type);
|
||||
llvm::ArrayType *at =
|
||||
llvm::dyn_cast<llvm::ArrayType>(type);
|
||||
if (at != NULL)
|
||||
return lGenericTypeLayoutIndeterminate(at->getElementType());
|
||||
|
||||
LLVM_TYPE_CONST llvm::PointerType *pt =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::PointerType>(type);
|
||||
llvm::PointerType *pt =
|
||||
llvm::dyn_cast<llvm::PointerType>(type);
|
||||
if (pt != NULL)
|
||||
return false;
|
||||
|
||||
LLVM_TYPE_CONST llvm::StructType *st =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::StructType>(type);
|
||||
llvm::StructType *st =
|
||||
llvm::dyn_cast<llvm::StructType>(type);
|
||||
if (st != NULL) {
|
||||
for (int i = 0; i < (int)st->getNumElements(); ++i)
|
||||
if (lGenericTypeLayoutIndeterminate(st->getElementType(i)))
|
||||
@@ -395,29 +466,24 @@ lGenericTypeLayoutIndeterminate(LLVM_TYPE_CONST llvm::Type *type) {
|
||||
return false;
|
||||
}
|
||||
|
||||
Assert(llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(type));
|
||||
Assert(llvm::isa<llvm::VectorType>(type));
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
llvm::Value *
|
||||
Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type,
|
||||
Target::SizeOf(llvm::Type *type,
|
||||
llvm::BasicBlock *insertAtEnd) {
|
||||
if (isa == Target::GENERIC &&
|
||||
lGenericTypeLayoutIndeterminate(type)) {
|
||||
llvm::Value *index[1] = { LLVMInt32(1) };
|
||||
LLVM_TYPE_CONST llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
|
||||
llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
|
||||
llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
llvm::ArrayRef<llvm::Value *> arrayRef(&index[0], &index[1]);
|
||||
llvm::Instruction *gep =
|
||||
llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "sizeof_gep",
|
||||
insertAtEnd);
|
||||
#else
|
||||
llvm::Instruction *gep =
|
||||
llvm::GetElementPtrInst::Create(voidPtr, &index[0], &index[1],
|
||||
"sizeof_gep", insertAtEnd);
|
||||
#endif
|
||||
|
||||
if (is32Bit || g->opt.force32BitAddressing)
|
||||
return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
|
||||
"sizeof_int", insertAtEnd);
|
||||
@@ -428,7 +494,9 @@ Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type,
|
||||
|
||||
const llvm::TargetData *td = GetTargetMachine()->getTargetData();
|
||||
Assert(td != NULL);
|
||||
uint64_t byteSize = td->getTypeSizeInBits(type) / 8;
|
||||
uint64_t bitSize = td->getTypeSizeInBits(type);
|
||||
Assert((bitSize % 8) == 0);
|
||||
uint64_t byteSize = bitSize / 8;
|
||||
if (is32Bit || g->opt.force32BitAddressing)
|
||||
return LLVMInt32((int32_t)byteSize);
|
||||
else
|
||||
@@ -437,23 +505,18 @@ Target::SizeOf(LLVM_TYPE_CONST llvm::Type *type,
|
||||
|
||||
|
||||
llvm::Value *
|
||||
Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element,
|
||||
Target::StructOffset(llvm::Type *type, int element,
|
||||
llvm::BasicBlock *insertAtEnd) {
|
||||
if (isa == Target::GENERIC &&
|
||||
lGenericTypeLayoutIndeterminate(type) == true) {
|
||||
llvm::Value *indices[2] = { LLVMInt32(0), LLVMInt32(element) };
|
||||
LLVM_TYPE_CONST llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
|
||||
llvm::PointerType *ptrType = llvm::PointerType::get(type, 0);
|
||||
llvm::Value *voidPtr = llvm::ConstantPointerNull::get(ptrType);
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
llvm::ArrayRef<llvm::Value *> arrayRef(&indices[0], &indices[2]);
|
||||
llvm::Instruction *gep =
|
||||
llvm::GetElementPtrInst::Create(voidPtr, arrayRef, "offset_gep",
|
||||
insertAtEnd);
|
||||
#else
|
||||
llvm::Instruction *gep =
|
||||
llvm::GetElementPtrInst::Create(voidPtr, &indices[0], &indices[2],
|
||||
"offset_gep", insertAtEnd);
|
||||
#endif
|
||||
|
||||
if (is32Bit || g->opt.force32BitAddressing)
|
||||
return new llvm::PtrToIntInst(gep, LLVMTypes::Int32Type,
|
||||
"offset_int", insertAtEnd);
|
||||
@@ -464,9 +527,12 @@ Target::StructOffset(LLVM_TYPE_CONST llvm::Type *type, int element,
|
||||
|
||||
const llvm::TargetData *td = GetTargetMachine()->getTargetData();
|
||||
Assert(td != NULL);
|
||||
LLVM_TYPE_CONST llvm::StructType *structType =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::StructType>(type);
|
||||
Assert(structType != NULL);
|
||||
llvm::StructType *structType =
|
||||
llvm::dyn_cast<llvm::StructType>(type);
|
||||
if (structType == NULL || structType->isSized() == false) {
|
||||
Assert(m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
const llvm::StructLayout *sl = td->getStructLayout(structType);
|
||||
Assert(sl != NULL);
|
||||
|
||||
@@ -552,7 +618,9 @@ llvm::DIFile
|
||||
SourcePos::GetDIFile() const {
|
||||
std::string directory, filename;
|
||||
GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename);
|
||||
return m->diBuilder->createFile(filename, directory);
|
||||
llvm::DIFile ret = m->diBuilder->createFile(filename, directory);
|
||||
Assert(ret.Verify());
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
|
||||
67
ispc.h
67
ispc.h
@@ -38,10 +38,10 @@
|
||||
#ifndef ISPC_H
|
||||
#define ISPC_H
|
||||
|
||||
#define ISPC_VERSION "1.2.1dev"
|
||||
#define ISPC_VERSION "1.2.3dev"
|
||||
|
||||
#if !defined(LLVM_2_9) && !defined(LLVM_3_0) && !defined(LLVM_3_0svn) && !defined(LLVM_3_1svn)
|
||||
#error "Only LLVM 2.9, 3.0, and the 3.1 development branch are supported"
|
||||
#if !defined(LLVM_3_0) && !defined(LLVM_3_1) && !defined(LLVM_3_2)
|
||||
#error "Only LLVM 3.0, 3.1, and the 3.2 development branch are supported"
|
||||
#endif
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
@@ -58,20 +58,10 @@
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
#define Assert(expr) \
|
||||
((void)((expr) ? 0 : __Assert (#expr, __FILE__, __LINE__)))
|
||||
#define __Assert(expr, file, line) \
|
||||
((void)fprintf(stderr, "%s:%u: Assertion failed: \"%s\"\n" \
|
||||
"***\n*** Please file a bug report at " \
|
||||
"https://github.com/ispc/ispc/issues\n*** (Including as much " \
|
||||
"information as you can about how to reproduce this error).\n" \
|
||||
"*** You have apparently encountered a bug in the compiler that " \
|
||||
"we'd like to fix!\n***\n", file, line, expr), abort(), 0)
|
||||
|
||||
/** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
|
||||
targets.
|
||||
*/
|
||||
#define ISPC_MAX_NVEC 16
|
||||
#define ISPC_MAX_NVEC 64
|
||||
|
||||
// Forward declarations of a number of widely-used LLVM types
|
||||
namespace llvm {
|
||||
@@ -92,12 +82,6 @@ namespace llvm {
|
||||
class Value;
|
||||
}
|
||||
|
||||
// llvm::Type *s are no longer const in llvm 3.0
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
#define LLVM_TYPE_CONST
|
||||
#else
|
||||
#define LLVM_TYPE_CONST const
|
||||
#endif
|
||||
|
||||
class ArrayType;
|
||||
class AST;
|
||||
@@ -116,6 +100,15 @@ class SymbolTable;
|
||||
class Type;
|
||||
struct VariableDeclaration;
|
||||
|
||||
enum StorageClass {
|
||||
SC_NONE,
|
||||
SC_EXTERN,
|
||||
SC_STATIC,
|
||||
SC_TYPEDEF,
|
||||
SC_EXTERN_C
|
||||
};
|
||||
|
||||
|
||||
/** @brief Representation of a range of positions in a source file.
|
||||
|
||||
This class represents a range of characters in a source file
|
||||
@@ -142,11 +135,25 @@ struct SourcePos {
|
||||
bool operator==(const SourcePos &p2) const;
|
||||
};
|
||||
|
||||
|
||||
/** Returns a SourcePos that encompasses the extent of both of the given
|
||||
extents. */
|
||||
SourcePos Union(const SourcePos &p1, const SourcePos &p2);
|
||||
|
||||
|
||||
|
||||
// Assert
|
||||
|
||||
extern void DoAssert(const char *file, int line, const char *expr);
|
||||
extern void DoAssertPos(SourcePos pos, const char *file, int line, const char *expr);
|
||||
|
||||
#define Assert(expr) \
|
||||
((void)((expr) ? 0 : ((void)DoAssert (__FILE__, __LINE__, #expr), 0)))
|
||||
|
||||
#define AssertPos(pos, expr) \
|
||||
((void)((expr) ? 0 : ((void)DoAssertPos (pos, __FILE__, __LINE__, #expr), 0)))
|
||||
|
||||
|
||||
/** @brief Structure that defines a compilation target
|
||||
|
||||
This structure defines a compilation target for the ispc compiler.
|
||||
@@ -164,7 +171,7 @@ struct Target {
|
||||
|
||||
/** Returns a comma-delimited string giving the names of the currently
|
||||
supported target CPUs. */
|
||||
static const char *SupportedTargetCPUs();
|
||||
static std::string SupportedTargetCPUs();
|
||||
|
||||
/** Returns a comma-delimited string giving the names of the currently
|
||||
supported target architectures. */
|
||||
@@ -182,13 +189,13 @@ struct Target {
|
||||
const char *GetISAString() const;
|
||||
|
||||
/** Returns the size of the given type */
|
||||
llvm::Value *SizeOf(LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Value *SizeOf(llvm::Type *type,
|
||||
llvm::BasicBlock *insertAtEnd);
|
||||
|
||||
/** Given a structure type and an element number in the structure,
|
||||
returns a value corresponding to the number of bytes from the start
|
||||
of the structure where the element is located. */
|
||||
llvm::Value *StructOffset(LLVM_TYPE_CONST llvm::Type *type,
|
||||
llvm::Value *StructOffset(llvm::Type *type,
|
||||
int element, llvm::BasicBlock *insertAtEnd);
|
||||
|
||||
/** llvm Target object representing this target. */
|
||||
@@ -236,16 +243,18 @@ struct Target {
|
||||
natively. */
|
||||
bool maskingIsFree;
|
||||
|
||||
/** Is it safe to run code with the mask all if: e.g. on SSE, the fast
|
||||
gather trick assumes that at least one program instance is running
|
||||
(so that it can safely assume that the array base pointer is
|
||||
valid). */
|
||||
bool allOffMaskIsSafe;
|
||||
|
||||
/** How many bits are used to store each element of the mask: e.g. this
|
||||
is 32 on SSE/AVX, since that matches the HW better, but it's 1 for
|
||||
the generic target. */
|
||||
int maskBitCount;
|
||||
|
||||
/** Indicates whether the target has native support for float/half
|
||||
conversions. */
|
||||
bool hasHalf;
|
||||
|
||||
/** Indicates whether the target has support for transcendentals (beyond
|
||||
sqrt, which we assume that all of them handle). */
|
||||
bool hasTranscendentals;
|
||||
};
|
||||
|
||||
|
||||
|
||||
28
ispc.vcxproj
28
ispc.vcxproj
@@ -29,6 +29,8 @@
|
||||
<ClCompile Include="gen-bitcode-generic-4.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-8.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-16.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-32.cpp" />
|
||||
<ClCompile Include="gen-bitcode-generic-64.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse2-x2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse4.cpp" />
|
||||
@@ -264,6 +266,32 @@
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-16.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-generic-32.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-32.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-32.ll | python bitcode2cpp.py builtins\target-generic-32.ll > gen-bitcode-generic-32.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-32.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-32.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-32.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="builtins\target-generic-64.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll > gen-bitcode-generic-64.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-64.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-64.ll | python bitcode2cpp.py builtins\target-generic-64.ll > gen-bitcode-generic-64.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-64.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-64.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-64.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="lex.ll">
|
||||
<FileType>Document</FileType>
|
||||
|
||||
123
lex.ll
123
lex.ll
@@ -43,6 +43,7 @@
|
||||
#include <stdint.h>
|
||||
|
||||
static uint64_t lParseBinary(const char *ptr, SourcePos pos, char **endPtr);
|
||||
static int lParseInteger(bool dotdotdot);
|
||||
static void lCComment(SourcePos *);
|
||||
static void lCppComment(SourcePos *);
|
||||
static void lHandleCppHash(SourcePos *);
|
||||
@@ -322,7 +323,8 @@ inline int ispcRand() {
|
||||
%option nounistd
|
||||
|
||||
WHITESPACE [ \t\r]+
|
||||
INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[kMG]?
|
||||
INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[uUlL]*[kMG]?[uUlL]*
|
||||
INT_NUMBER_DOTDOTDOT (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))[uUlL]*[kMG]?[uUlL]*\.\.\.
|
||||
FLOAT_NUMBER (([0-9]+|(([0-9]+\.[0-9]*[fF]?)|(\.[0-9]+)))([eE][-+]?[0-9]+)?[fF]?)
|
||||
HEX_FLOAT_NUMBER (0x[01](\.[0-9a-fA-F]*)?p[-+]?[0-9]+[fF]?)
|
||||
|
||||
@@ -406,53 +408,14 @@ L?\"(\\.|[^\\"])*\" { lStringConst(&yylval, &yylloc); return TOKEN_STRING_LITERA
|
||||
return TOKEN_IDENTIFIER;
|
||||
}
|
||||
|
||||
{INT_NUMBER}+(u|U|l|L)*? {
|
||||
{INT_NUMBER} {
|
||||
RT;
|
||||
int ls = 0, us = 0;
|
||||
return lParseInteger(false);
|
||||
}
|
||||
|
||||
char *endPtr = NULL;
|
||||
if (yytext[0] == '0' && yytext[1] == 'b')
|
||||
yylval.intVal = lParseBinary(yytext+2, yylloc, &endPtr);
|
||||
else {
|
||||
#if defined(ISPC_IS_WINDOWS) && !defined(__MINGW32__)
|
||||
yylval.intVal = _strtoui64(yytext, &endPtr, 0);
|
||||
#else
|
||||
// FIXME: should use strtouq and then issue an error if we can't
|
||||
// fit into 64 bits...
|
||||
yylval.intVal = strtoull(yytext, &endPtr, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool kilo = false, mega = false, giga = false;
|
||||
for (; *endPtr; endPtr++) {
|
||||
if (*endPtr == 'k')
|
||||
kilo = true;
|
||||
else if (*endPtr == 'M')
|
||||
mega = true;
|
||||
else if (*endPtr == 'G')
|
||||
giga = true;
|
||||
else if (*endPtr == 'l' || *endPtr == 'L')
|
||||
ls++;
|
||||
else if (*endPtr == 'u' || *endPtr == 'U')
|
||||
us++;
|
||||
}
|
||||
if (kilo)
|
||||
yylval.intVal *= 1024;
|
||||
if (mega)
|
||||
yylval.intVal *= 1024*1024;
|
||||
if (giga)
|
||||
yylval.intVal *= 1024*1024*1024;
|
||||
|
||||
if (ls >= 2)
|
||||
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
|
||||
else if (ls == 1)
|
||||
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
|
||||
|
||||
// See if we can fit this into a 32-bit integer...
|
||||
if ((yylval.intVal & 0xffffffff) == yylval.intVal)
|
||||
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
|
||||
else
|
||||
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
|
||||
{INT_NUMBER_DOTDOTDOT} {
|
||||
RT;
|
||||
return lParseInteger(true);
|
||||
}
|
||||
|
||||
|
||||
@@ -562,6 +525,72 @@ lParseBinary(const char *ptr, SourcePos pos, char **endPtr) {
|
||||
}
|
||||
|
||||
|
||||
static int
|
||||
lParseInteger(bool dotdotdot) {
|
||||
int ls = 0, us = 0;
|
||||
|
||||
char *endPtr = NULL;
|
||||
if (yytext[0] == '0' && yytext[1] == 'b')
|
||||
yylval.intVal = lParseBinary(yytext+2, yylloc, &endPtr);
|
||||
else {
|
||||
#if defined(ISPC_IS_WINDOWS) && !defined(__MINGW32__)
|
||||
yylval.intVal = _strtoui64(yytext, &endPtr, 0);
|
||||
#else
|
||||
// FIXME: should use strtouq and then issue an error if we can't
|
||||
// fit into 64 bits...
|
||||
yylval.intVal = strtoull(yytext, &endPtr, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
bool kilo = false, mega = false, giga = false;
|
||||
for (; *endPtr; endPtr++) {
|
||||
if (*endPtr == 'k')
|
||||
kilo = true;
|
||||
else if (*endPtr == 'M')
|
||||
mega = true;
|
||||
else if (*endPtr == 'G')
|
||||
giga = true;
|
||||
else if (*endPtr == 'l' || *endPtr == 'L')
|
||||
ls++;
|
||||
else if (*endPtr == 'u' || *endPtr == 'U')
|
||||
us++;
|
||||
else
|
||||
Assert(dotdotdot && *endPtr == '.');
|
||||
}
|
||||
if (kilo)
|
||||
yylval.intVal *= 1024;
|
||||
if (mega)
|
||||
yylval.intVal *= 1024*1024;
|
||||
if (giga)
|
||||
yylval.intVal *= 1024*1024*1024;
|
||||
|
||||
if (dotdotdot) {
|
||||
if (ls >= 2)
|
||||
return us ? TOKEN_UINT64DOTDOTDOT_CONSTANT : TOKEN_INT64DOTDOTDOT_CONSTANT;
|
||||
else if (ls == 1)
|
||||
return us ? TOKEN_UINT32DOTDOTDOT_CONSTANT : TOKEN_INT32DOTDOTDOT_CONSTANT;
|
||||
|
||||
// See if we can fit this into a 32-bit integer...
|
||||
if ((yylval.intVal & 0xffffffff) == yylval.intVal)
|
||||
return us ? TOKEN_UINT32DOTDOTDOT_CONSTANT : TOKEN_INT32DOTDOTDOT_CONSTANT;
|
||||
else
|
||||
return us ? TOKEN_UINT64DOTDOTDOT_CONSTANT : TOKEN_INT64DOTDOTDOT_CONSTANT;
|
||||
}
|
||||
else {
|
||||
if (ls >= 2)
|
||||
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
|
||||
else if (ls == 1)
|
||||
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
|
||||
|
||||
// See if we can fit this into a 32-bit integer...
|
||||
if ((yylval.intVal & 0xffffffff) == yylval.intVal)
|
||||
return us ? TOKEN_UINT32_CONSTANT : TOKEN_INT32_CONSTANT;
|
||||
else
|
||||
return us ? TOKEN_UINT64_CONSTANT : TOKEN_INT64_CONSTANT;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Handle a C-style comment in the source.
|
||||
*/
|
||||
static void
|
||||
@@ -675,7 +704,7 @@ lEscapeChar(char *str, char *pChar, SourcePos *pos)
|
||||
str = tail - 1;
|
||||
break;
|
||||
default:
|
||||
Error(*pos, "Bad character escape sequence: '%s'\n.", str);
|
||||
Error(*pos, "Bad character escape sequence: '%s'.", str);
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
216
llvmutil.cpp
216
llvmutil.cpp
@@ -43,44 +43,44 @@
|
||||
#include <set>
|
||||
#include <map>
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::VoidType = NULL;
|
||||
LLVM_TYPE_CONST llvm::PointerType *LLVMTypes::VoidPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::PointerIntType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::BoolType = NULL;
|
||||
llvm::Type *LLVMTypes::VoidType = NULL;
|
||||
llvm::PointerType *LLVMTypes::VoidPointerType = NULL;
|
||||
llvm::Type *LLVMTypes::PointerIntType = NULL;
|
||||
llvm::Type *LLVMTypes::BoolType = NULL;
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8Type = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16Type = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32Type = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64Type = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleType = NULL;
|
||||
llvm::Type *LLVMTypes::Int8Type = NULL;
|
||||
llvm::Type *LLVMTypes::Int16Type = NULL;
|
||||
llvm::Type *LLVMTypes::Int32Type = NULL;
|
||||
llvm::Type *LLVMTypes::Int64Type = NULL;
|
||||
llvm::Type *LLVMTypes::FloatType = NULL;
|
||||
llvm::Type *LLVMTypes::DoubleType = NULL;
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8PointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16PointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32PointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64PointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoublePointerType = NULL;
|
||||
llvm::Type *LLVMTypes::Int8PointerType = NULL;
|
||||
llvm::Type *LLVMTypes::Int16PointerType = NULL;
|
||||
llvm::Type *LLVMTypes::Int32PointerType = NULL;
|
||||
llvm::Type *LLVMTypes::Int64PointerType = NULL;
|
||||
llvm::Type *LLVMTypes::FloatPointerType = NULL;
|
||||
llvm::Type *LLVMTypes::DoublePointerType = NULL;
|
||||
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::MaskType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::BoolVectorType = NULL;
|
||||
llvm::VectorType *LLVMTypes::MaskType = NULL;
|
||||
llvm::VectorType *LLVMTypes::BoolVectorType = NULL;
|
||||
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int1VectorType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int8VectorType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int16VectorType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int32VectorType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::Int64VectorType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::FloatVectorType = NULL;
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::DoubleVectorType = NULL;
|
||||
llvm::VectorType *LLVMTypes::Int1VectorType = NULL;
|
||||
llvm::VectorType *LLVMTypes::Int8VectorType = NULL;
|
||||
llvm::VectorType *LLVMTypes::Int16VectorType = NULL;
|
||||
llvm::VectorType *LLVMTypes::Int32VectorType = NULL;
|
||||
llvm::VectorType *LLVMTypes::Int64VectorType = NULL;
|
||||
llvm::VectorType *LLVMTypes::FloatVectorType = NULL;
|
||||
llvm::VectorType *LLVMTypes::DoubleVectorType = NULL;
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int8VectorPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int16VectorPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int32VectorPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::Int64VectorPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::FloatVectorPointerType = NULL;
|
||||
LLVM_TYPE_CONST llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL;
|
||||
llvm::Type *LLVMTypes::Int8VectorPointerType = NULL;
|
||||
llvm::Type *LLVMTypes::Int16VectorPointerType = NULL;
|
||||
llvm::Type *LLVMTypes::Int32VectorPointerType = NULL;
|
||||
llvm::Type *LLVMTypes::Int64VectorPointerType = NULL;
|
||||
llvm::Type *LLVMTypes::FloatVectorPointerType = NULL;
|
||||
llvm::Type *LLVMTypes::DoubleVectorPointerType = NULL;
|
||||
|
||||
LLVM_TYPE_CONST llvm::VectorType *LLVMTypes::VoidPointerVectorType = NULL;
|
||||
llvm::VectorType *LLVMTypes::VoidPointerVectorType = NULL;
|
||||
|
||||
llvm::Constant *LLVMTrue = NULL;
|
||||
llvm::Constant *LLVMFalse = NULL;
|
||||
@@ -473,9 +473,9 @@ LLVMBoolVector(const bool *bvec) {
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMIntAsType(int64_t val, LLVM_TYPE_CONST llvm::Type *type) {
|
||||
LLVM_TYPE_CONST llvm::VectorType *vecType =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(type);
|
||||
LLVMIntAsType(int64_t val, llvm::Type *type) {
|
||||
llvm::VectorType *vecType =
|
||||
llvm::dyn_cast<llvm::VectorType>(type);
|
||||
|
||||
if (vecType != NULL) {
|
||||
llvm::Constant *v = llvm::ConstantInt::get(vecType->getElementType(),
|
||||
@@ -491,9 +491,9 @@ LLVMIntAsType(int64_t val, LLVM_TYPE_CONST llvm::Type *type) {
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMUIntAsType(uint64_t val, LLVM_TYPE_CONST llvm::Type *type) {
|
||||
LLVM_TYPE_CONST llvm::VectorType *vecType =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(type);
|
||||
LLVMUIntAsType(uint64_t val, llvm::Type *type) {
|
||||
llvm::VectorType *vecType =
|
||||
llvm::dyn_cast<llvm::VectorType>(type);
|
||||
|
||||
if (vecType != NULL) {
|
||||
llvm::Constant *v = llvm::ConstantInt::get(vecType->getElementType(),
|
||||
@@ -642,8 +642,8 @@ LLVMFlattenInsertChain(llvm::InsertElementInst *ie, int vectorWidth,
|
||||
bool
|
||||
LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts) {
|
||||
// Make sure we do in fact have a vector of integer values here
|
||||
LLVM_TYPE_CONST llvm::VectorType *vt =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v->getType());
|
||||
llvm::VectorType *vt =
|
||||
llvm::dyn_cast<llvm::VectorType>(v->getType());
|
||||
Assert(vt != NULL);
|
||||
Assert(llvm::isa<llvm::IntegerType>(vt->getElementType()));
|
||||
|
||||
@@ -657,7 +657,7 @@ LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts) {
|
||||
|
||||
// Deal with the fact that LLVM3.1 and previous versions have different
|
||||
// representations for vectors of constant ints...
|
||||
#ifdef LLVM_3_1svn
|
||||
#ifndef LLVM_3_0
|
||||
llvm::ConstantDataVector *cv = llvm::dyn_cast<llvm::ConstantDataVector>(v);
|
||||
if (cv == NULL)
|
||||
return false;
|
||||
@@ -678,7 +678,7 @@ LLVMExtractVectorInts(llvm::Value *v, int64_t ret[], int *nElts) {
|
||||
ret[i] = ci->getSExtValue();
|
||||
}
|
||||
return true;
|
||||
#endif // LLVM_3_1svn
|
||||
#endif // !LLVM_3_0
|
||||
}
|
||||
|
||||
|
||||
@@ -696,7 +696,7 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
static bool
|
||||
lIsExactMultiple(llvm::Value *val, int baseValue, int vectorLength,
|
||||
std::vector<llvm::PHINode *> &seenPhis) {
|
||||
if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(val->getType()) == false) {
|
||||
if (llvm::isa<llvm::VectorType>(val->getType()) == false) {
|
||||
// If we've worked down to a constant int, then the moment of truth
|
||||
// has arrived...
|
||||
llvm::ConstantInt *ci = llvm::dyn_cast<llvm::ConstantInt>(val);
|
||||
@@ -780,7 +780,7 @@ static bool
|
||||
lAllDivBaseEqual(llvm::Value *val, int64_t baseValue, int vectorLength,
|
||||
std::vector<llvm::PHINode *> &seenPhis,
|
||||
bool &canAdd) {
|
||||
Assert(llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(val->getType()));
|
||||
Assert(llvm::isa<llvm::VectorType>(val->getType()));
|
||||
// Make sure the base value is a positive power of 2
|
||||
Assert(baseValue > 0 && (baseValue & (baseValue-1)) == 0);
|
||||
|
||||
@@ -790,7 +790,7 @@ lAllDivBaseEqual(llvm::Value *val, int64_t baseValue, int vectorLength,
|
||||
|
||||
int64_t vecVals[ISPC_MAX_NVEC];
|
||||
int nElts;
|
||||
if (llvm::isa<LLVM_TYPE_CONST llvm::VectorType>(val->getType()) &&
|
||||
if (llvm::isa<llvm::VectorType>(val->getType()) &&
|
||||
LLVMExtractVectorInts(val, vecVals, &nElts)) {
|
||||
// If we have a vector of compile-time constant integer values,
|
||||
// then go ahead and check them directly..
|
||||
@@ -880,7 +880,7 @@ lAllDivBaseEqual(llvm::Value *val, int64_t baseValue, int vectorLength,
|
||||
// the addConstants[], mod baseValue. If we round that up to the
|
||||
// next power of 2, we'll have a value that will be no greater than
|
||||
// baseValue and sometimes less.
|
||||
int maxMod = addConstants[0] % baseValue;
|
||||
int maxMod = int(addConstants[0] % baseValue);
|
||||
for (int i = 1; i < vectorLength; ++i)
|
||||
maxMod = std::max(maxMod, int(addConstants[i] % baseValue));
|
||||
int requiredAlignment = lRoundUpPow2(maxMod);
|
||||
@@ -947,7 +947,7 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
if (cv != NULL)
|
||||
return (cv->getSplatValue() != NULL);
|
||||
|
||||
#ifdef LLVM_3_1svn
|
||||
#ifndef LLVM_3_0
|
||||
llvm::ConstantDataVector *cdv = llvm::dyn_cast<llvm::ConstantDataVector>(v);
|
||||
if (cdv != NULL)
|
||||
return (cdv->getSplatValue() != NULL);
|
||||
@@ -1074,8 +1074,8 @@ lVectorValuesAllEqual(llvm::Value *v, int vectorLength,
|
||||
*/
|
||||
bool
|
||||
LLVMVectorValuesAllEqual(llvm::Value *v) {
|
||||
LLVM_TYPE_CONST llvm::VectorType *vt =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v->getType());
|
||||
llvm::VectorType *vt =
|
||||
llvm::dyn_cast<llvm::VectorType>(v->getType());
|
||||
Assert(vt != NULL);
|
||||
int vectorLength = vt->getNumElements();
|
||||
|
||||
@@ -1102,7 +1102,7 @@ lVectorIsLinear(llvm::Value *v, int vectorLength, int stride,
|
||||
*/
|
||||
static bool
|
||||
lVectorIsLinearConstantInts(
|
||||
#ifdef LLVM_3_1svn
|
||||
#ifndef LLVM_3_0
|
||||
llvm::ConstantDataVector *cv,
|
||||
#else
|
||||
llvm::ConstantVector *cv,
|
||||
@@ -1111,7 +1111,7 @@ lVectorIsLinearConstantInts(
|
||||
int stride) {
|
||||
// Flatten the vector out into the elements array
|
||||
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
|
||||
#ifdef LLVM_3_1svn
|
||||
#ifndef LLVM_3_0
|
||||
for (int i = 0; i < (int)cv->getNumElements(); ++i)
|
||||
elements.push_back(cv->getElementAsConstant(i));
|
||||
#else
|
||||
@@ -1152,7 +1152,7 @@ lCheckMulForLinear(llvm::Value *op0, llvm::Value *op1, int vectorLength,
|
||||
int stride, std::vector<llvm::PHINode *> &seenPhis) {
|
||||
// Is the first operand a constant integer value splatted across all of
|
||||
// the lanes?
|
||||
#ifdef LLVM_3_1svn
|
||||
#ifndef LLVM_3_0
|
||||
llvm::ConstantDataVector *cv = llvm::dyn_cast<llvm::ConstantDataVector>(op0);
|
||||
#else
|
||||
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(op0);
|
||||
@@ -1226,7 +1226,7 @@ lVectorIsLinear(llvm::Value *v, int vectorLength, int stride,
|
||||
std::vector<llvm::PHINode *> &seenPhis) {
|
||||
// First try the easy case: if the values are all just constant
|
||||
// integers and have the expected stride between them, then we're done.
|
||||
#ifdef LLVM_3_1svn
|
||||
#ifndef LLVM_3_0
|
||||
llvm::ConstantDataVector *cv = llvm::dyn_cast<llvm::ConstantDataVector>(v);
|
||||
#else
|
||||
llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v);
|
||||
@@ -1344,8 +1344,8 @@ lVectorIsLinear(llvm::Value *v, int vectorLength, int stride,
|
||||
*/
|
||||
bool
|
||||
LLVMVectorIsLinear(llvm::Value *v, int stride) {
|
||||
LLVM_TYPE_CONST llvm::VectorType *vt =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v->getType());
|
||||
llvm::VectorType *vt =
|
||||
llvm::dyn_cast<llvm::VectorType>(v->getType());
|
||||
Assert(vt != NULL);
|
||||
int vectorLength = vt->getNumElements();
|
||||
|
||||
@@ -1390,19 +1390,38 @@ LLVMDumpValue(llvm::Value *v) {
|
||||
|
||||
|
||||
static llvm::Value *
|
||||
lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore,
|
||||
lExtractFirstVectorElement(llvm::Value *v,
|
||||
std::map<llvm::PHINode *, llvm::PHINode *> &phiMap) {
|
||||
// If it's not an instruction (i.e. is a constant), then we can just
|
||||
// emit an extractelement instruction and let the regular optimizer do
|
||||
// the rest.
|
||||
if (llvm::isa<llvm::Instruction>(v) == false)
|
||||
return llvm::ExtractElementInst::Create(v, LLVMInt32(0), "first_elt",
|
||||
insertBefore);
|
||||
|
||||
LLVM_TYPE_CONST llvm::VectorType *vt =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v->getType());
|
||||
llvm::VectorType *vt =
|
||||
llvm::dyn_cast<llvm::VectorType>(v->getType());
|
||||
Assert(vt != NULL);
|
||||
|
||||
// First, handle various constant types; do the extraction manually, as
|
||||
// appropriate.
|
||||
if (llvm::isa<llvm::ConstantAggregateZero>(v) == true) {
|
||||
Assert(vt->getElementType()->isIntegerTy());
|
||||
return llvm::ConstantInt::get(vt->getElementType(), 0);
|
||||
}
|
||||
if (llvm::ConstantVector *cv = llvm::dyn_cast<llvm::ConstantVector>(v)) {
|
||||
#ifndef LLVM_3_0
|
||||
return cv->getOperand(0);
|
||||
#else
|
||||
llvm::SmallVector<llvm::Constant *, ISPC_MAX_NVEC> elements;
|
||||
cv->getVectorElements(elements);
|
||||
return elements[0];
|
||||
#endif // !LLVM_3_0
|
||||
}
|
||||
#ifndef LLVM_3_0
|
||||
if (llvm::ConstantDataVector *cdv =
|
||||
llvm::dyn_cast<llvm::ConstantDataVector>(v))
|
||||
return cdv->getElementAsConstant(0);
|
||||
#endif // !LLVM_3_0
|
||||
|
||||
// Otherwise, all that we should have at this point is an instruction
|
||||
// of some sort
|
||||
Assert(llvm::isa<llvm::Constant>(v) == false);
|
||||
Assert(llvm::isa<llvm::Instruction>(v) == true);
|
||||
|
||||
std::string newName = v->getName().str() + std::string(".elt0");
|
||||
|
||||
// Rewrite regular binary operators and casts to the scalarized
|
||||
@@ -1410,20 +1429,24 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore,
|
||||
llvm::BinaryOperator *bop = llvm::dyn_cast<llvm::BinaryOperator>(v);
|
||||
if (bop != NULL) {
|
||||
llvm::Value *v0 = lExtractFirstVectorElement(bop->getOperand(0),
|
||||
insertBefore, phiMap);
|
||||
phiMap);
|
||||
llvm::Value *v1 = lExtractFirstVectorElement(bop->getOperand(1),
|
||||
insertBefore, phiMap);
|
||||
phiMap);
|
||||
// Note that the new binary operator is inserted immediately before
|
||||
// the previous vector one
|
||||
return llvm::BinaryOperator::Create(bop->getOpcode(), v0, v1,
|
||||
newName, insertBefore);
|
||||
newName, bop);
|
||||
}
|
||||
|
||||
llvm::CastInst *cast = llvm::dyn_cast<llvm::CastInst>(v);
|
||||
if (cast != NULL) {
|
||||
llvm::Value *v = lExtractFirstVectorElement(cast->getOperand(0),
|
||||
insertBefore, phiMap);
|
||||
phiMap);
|
||||
// Similarly, the equivalent scalar cast instruction goes right
|
||||
// before the vector cast
|
||||
return llvm::CastInst::Create(cast->getOpcode(), v,
|
||||
vt->getElementType(), newName,
|
||||
insertBefore);
|
||||
cast);
|
||||
}
|
||||
|
||||
llvm::PHINode *phi = llvm::dyn_cast<llvm::PHINode>(v);
|
||||
@@ -1438,18 +1461,17 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore,
|
||||
// return the pointer and not get stuck in an infinite loop.
|
||||
//
|
||||
// The insertion point for the new phi node also has to be the
|
||||
// start of the bblock of the original phi node, which isn't
|
||||
// necessarily the same bblock as insertBefore is in!
|
||||
// start of the bblock of the original phi node.
|
||||
llvm::Instruction *phiInsertPos = phi->getParent()->begin();
|
||||
llvm::PHINode *scalarPhi =
|
||||
llvm::PHINode::Create(vt->getElementType(),
|
||||
phi->getNumIncomingValues(), newName,
|
||||
phiInsertPos);
|
||||
phi->getNumIncomingValues(),
|
||||
newName, phiInsertPos);
|
||||
phiMap[phi] = scalarPhi;
|
||||
|
||||
for (unsigned i = 0; i < phi->getNumIncomingValues(); ++i) {
|
||||
llvm::Value *v = lExtractFirstVectorElement(phi->getIncomingValue(i),
|
||||
insertBefore, phiMap);
|
||||
phiMap);
|
||||
scalarPhi->addIncoming(v, phi->getIncomingBlock(i));
|
||||
}
|
||||
|
||||
@@ -1466,15 +1488,22 @@ lExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore,
|
||||
}
|
||||
|
||||
// Worst case, for everything else, just do a regular extract element
|
||||
return llvm::ExtractElementInst::Create(v, LLVMInt32(0), "first_elt",
|
||||
insertBefore);
|
||||
// instruction, which we insert immediately after the instruction we
|
||||
// have here.
|
||||
llvm::Instruction *insertAfter = llvm::dyn_cast<llvm::Instruction>(v);
|
||||
Assert(insertAfter != NULL);
|
||||
llvm::Instruction *ee =
|
||||
llvm::ExtractElementInst::Create(v, LLVMInt32(0), "first_elt",
|
||||
(llvm::Instruction *)NULL);
|
||||
ee->insertAfter(insertAfter);
|
||||
return ee;
|
||||
}
|
||||
|
||||
|
||||
llvm::Value *
|
||||
LLVMExtractFirstVectorElement(llvm::Value *v, llvm::Instruction *insertBefore) {
|
||||
LLVMExtractFirstVectorElement(llvm::Value *v) {
|
||||
std::map<llvm::PHINode *, llvm::PHINode *> phiMap;
|
||||
llvm::Value *ret = lExtractFirstVectorElement(v, insertBefore, phiMap);
|
||||
llvm::Value *ret = lExtractFirstVectorElement(v, phiMap);
|
||||
return ret;
|
||||
}
|
||||
|
||||
@@ -1489,8 +1518,8 @@ LLVMConcatVectors(llvm::Value *v1, llvm::Value *v2,
|
||||
llvm::Instruction *insertBefore) {
|
||||
Assert(v1->getType() == v2->getType());
|
||||
|
||||
LLVM_TYPE_CONST llvm::VectorType *vt =
|
||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::VectorType>(v1->getType());
|
||||
llvm::VectorType *vt =
|
||||
llvm::dyn_cast<llvm::VectorType>(v1->getType());
|
||||
Assert(vt != NULL);
|
||||
|
||||
int32_t identity[ISPC_MAX_NVEC];
|
||||
@@ -1518,12 +1547,29 @@ LLVMShuffleVectors(llvm::Value *v1, llvm::Value *v2, int32_t shuf[],
|
||||
shufVec.push_back(LLVMInt32(shuf[i]));
|
||||
}
|
||||
|
||||
#ifndef LLVM_2_9
|
||||
llvm::ArrayRef<llvm::Constant *> aref(&shufVec[0], &shufVec[shufSize]);
|
||||
llvm::Value *vec = llvm::ConstantVector::get(aref);
|
||||
#else // LLVM_2_9
|
||||
llvm::Value *vec = llvm::ConstantVector::get(shufVec);
|
||||
#endif
|
||||
|
||||
return new llvm::ShuffleVectorInst(v1, v2, vec, "shuffle", insertBefore);
|
||||
}
|
||||
|
||||
|
||||
const char *
|
||||
LLVMGetName(llvm::Value *v, const char *s) {
|
||||
if (v == NULL) return s;
|
||||
std::string ret = v->getName();
|
||||
ret += s;
|
||||
return strdup(ret.c_str());
|
||||
}
|
||||
|
||||
|
||||
const char *
|
||||
LLVMGetName(const char *op, llvm::Value *v1, llvm::Value *v2) {
|
||||
std::string r = op;
|
||||
r += "_";
|
||||
r += v1->getName().str();
|
||||
r += "_";
|
||||
r += v2->getName().str();
|
||||
return strdup(r.c_str());
|
||||
}
|
||||
|
||||
|
||||
84
llvmutil.h
84
llvmutil.h
@@ -48,57 +48,50 @@ namespace llvm {
|
||||
class InsertElementInst;
|
||||
}
|
||||
|
||||
// llvm::Type *s are no longer const in llvm 3.0
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
#define LLVM_TYPE_CONST
|
||||
#else
|
||||
#define LLVM_TYPE_CONST const
|
||||
#endif
|
||||
|
||||
|
||||
/** This structure holds pointers to a variety of LLVM types; code
|
||||
elsewhere can use them from here, ratherthan needing to make more
|
||||
verbose LLVM API calls.
|
||||
*/
|
||||
struct LLVMTypes {
|
||||
static LLVM_TYPE_CONST llvm::Type *VoidType;
|
||||
static LLVM_TYPE_CONST llvm::PointerType *VoidPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *PointerIntType;
|
||||
static LLVM_TYPE_CONST llvm::Type *BoolType;
|
||||
static llvm::Type *VoidType;
|
||||
static llvm::PointerType *VoidPointerType;
|
||||
static llvm::Type *PointerIntType;
|
||||
static llvm::Type *BoolType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::Type *Int8Type;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int16Type;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int32Type;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int64Type;
|
||||
static LLVM_TYPE_CONST llvm::Type *FloatType;
|
||||
static LLVM_TYPE_CONST llvm::Type *DoubleType;
|
||||
static llvm::Type *Int8Type;
|
||||
static llvm::Type *Int16Type;
|
||||
static llvm::Type *Int32Type;
|
||||
static llvm::Type *Int64Type;
|
||||
static llvm::Type *FloatType;
|
||||
static llvm::Type *DoubleType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::Type *Int8PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int16PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int32PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int64PointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *FloatPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *DoublePointerType;
|
||||
static llvm::Type *Int8PointerType;
|
||||
static llvm::Type *Int16PointerType;
|
||||
static llvm::Type *Int32PointerType;
|
||||
static llvm::Type *Int64PointerType;
|
||||
static llvm::Type *FloatPointerType;
|
||||
static llvm::Type *DoublePointerType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::VectorType *MaskType;
|
||||
static llvm::VectorType *MaskType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::VectorType *BoolVectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int1VectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int8VectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int16VectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int32VectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *Int64VectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *FloatVectorType;
|
||||
static LLVM_TYPE_CONST llvm::VectorType *DoubleVectorType;
|
||||
static llvm::VectorType *BoolVectorType;
|
||||
static llvm::VectorType *Int1VectorType;
|
||||
static llvm::VectorType *Int8VectorType;
|
||||
static llvm::VectorType *Int16VectorType;
|
||||
static llvm::VectorType *Int32VectorType;
|
||||
static llvm::VectorType *Int64VectorType;
|
||||
static llvm::VectorType *FloatVectorType;
|
||||
static llvm::VectorType *DoubleVectorType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::Type *Int8VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int16VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int32VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *Int64VectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *FloatVectorPointerType;
|
||||
static LLVM_TYPE_CONST llvm::Type *DoubleVectorPointerType;
|
||||
static llvm::Type *Int8VectorPointerType;
|
||||
static llvm::Type *Int16VectorPointerType;
|
||||
static llvm::Type *Int32VectorPointerType;
|
||||
static llvm::Type *Int64VectorPointerType;
|
||||
static llvm::Type *FloatVectorPointerType;
|
||||
static llvm::Type *DoubleVectorPointerType;
|
||||
|
||||
static LLVM_TYPE_CONST llvm::VectorType *VoidPointerVectorType;
|
||||
static llvm::VectorType *VoidPointerVectorType;
|
||||
};
|
||||
|
||||
/** These variables hold the corresponding LLVM constant values as a
|
||||
@@ -175,11 +168,11 @@ extern llvm::Constant *LLVMDoubleVector(double f);
|
||||
|
||||
/** Returns a constant integer or vector (according to the given type) of
|
||||
the given signed integer value. */
|
||||
extern llvm::Constant *LLVMIntAsType(int64_t, LLVM_TYPE_CONST llvm::Type *t);
|
||||
extern llvm::Constant *LLVMIntAsType(int64_t, llvm::Type *t);
|
||||
|
||||
/** Returns a constant integer or vector (according to the given type) of
|
||||
the given unsigned integer value. */
|
||||
extern llvm::Constant *LLVMUIntAsType(uint64_t, LLVM_TYPE_CONST llvm::Type *t);
|
||||
extern llvm::Constant *LLVMUIntAsType(uint64_t, llvm::Type *t);
|
||||
|
||||
/** Returns an LLVM boolean vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
@@ -281,8 +274,7 @@ extern void LLVMDumpValue(llvm::Value *v);
|
||||
worth of values just to extract the first element, in cases where only
|
||||
the first element's value is needed.
|
||||
*/
|
||||
extern llvm::Value *LLVMExtractFirstVectorElement(llvm::Value *v,
|
||||
llvm::Instruction *insertBefore);
|
||||
extern llvm::Value *LLVMExtractFirstVectorElement(llvm::Value *v);
|
||||
|
||||
/** This function takes two vectors, expected to be the same length, and
|
||||
returns a new vector of twice the length that represents concatenating
|
||||
@@ -298,4 +290,10 @@ extern llvm::Value *LLVMShuffleVectors(llvm::Value *v1, llvm::Value *v2,
|
||||
int32_t shuf[], int shufSize,
|
||||
llvm::Instruction *insertBefore);
|
||||
|
||||
/** Utility routines to concat strings with the names of existing values to
|
||||
create meaningful new names for instruction values.
|
||||
*/
|
||||
extern const char *LLVMGetName(llvm::Value *v, const char *);
|
||||
extern const char *LLVMGetName(const char *op, llvm::Value *v1, llvm::Value *v2);
|
||||
|
||||
#endif // ISPC_LLVMUTIL_H
|
||||
|
||||
40
main.cpp
40
main.cpp
@@ -44,16 +44,9 @@
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#include <time.h>
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
#include <llvm/Support/PrettyStackTrace.h>
|
||||
#include <llvm/Support/Signals.h>
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
#include <llvm/Support/TargetRegistry.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
#else
|
||||
#include <llvm/Target/TargetRegistry.h>
|
||||
#include <llvm/Target/TargetSelect.h>
|
||||
#include <llvm/Target/SubtargetFeature.h>
|
||||
#endif
|
||||
#include <llvm/Support/TargetRegistry.h>
|
||||
#include <llvm/Support/TargetSelect.h>
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#define strcasecmp stricmp
|
||||
@@ -67,12 +60,12 @@ static void
|
||||
lPrintVersion() {
|
||||
printf("Intel(r) SPMD Program Compiler (ispc), %s (build %s @ %s, LLVM %s)\n",
|
||||
ISPC_VERSION, BUILD_VERSION, BUILD_DATE,
|
||||
#ifdef LLVM_2_9
|
||||
"2.9"
|
||||
#elif defined(LLVM_3_0) || defined(LLVM_3_0svn)
|
||||
#if defined(LLVM_3_0)
|
||||
"3.0"
|
||||
#elif defined(LLVM_3_1) || defined(LLVM_3_1svn)
|
||||
#elif defined(LLVM_3_1)
|
||||
"3.1"
|
||||
#elif defined(LLVM_3_2)
|
||||
"3.2"
|
||||
#else
|
||||
#error "Unhandled LLVM version"
|
||||
#endif
|
||||
@@ -91,12 +84,10 @@ usage(int ret) {
|
||||
Target::SupportedTargetArchs());
|
||||
printf(" [--c++-include-file=<name>]\t\tSpecify name of file to emit in #include statement in generated C++ code.\n");
|
||||
printf(" [--cpu=<cpu>]\t\t\tSelect target CPU type\n");
|
||||
printf(" <cpu>={%s}\n", Target::SupportedTargetCPUs());
|
||||
printf(" <cpu>={%s}\n", Target::SupportedTargetCPUs().c_str());
|
||||
printf(" [-D<foo>]\t\t\t\t#define given value when running preprocessor\n");
|
||||
printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n");
|
||||
#ifndef LLVM_2_9
|
||||
printf(" [--emit-c++]\t\t\tEmit a C++ source file as output\n");
|
||||
#endif // !LLVM_2_9
|
||||
printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n");
|
||||
printf(" [--emit-obj]\t\t\tGenerate object file file as output (default)\n");
|
||||
printf(" [-g]\t\t\t\tGenerate debugging information\n");
|
||||
@@ -202,17 +193,18 @@ static void lGetAllArgs(int Argc, char *Argv[], int &argc, char *argv[128]) {
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lSignal(void *) {
|
||||
FATAL("Unhandled signal sent to process; terminating.");
|
||||
}
|
||||
|
||||
|
||||
int main(int Argc, char *Argv[]) {
|
||||
int argc;
|
||||
char *argv[128];
|
||||
lGetAllArgs(Argc, Argv, argc, argv);
|
||||
|
||||
#if 0
|
||||
// Use LLVM's little utility function to print out nice stack traces if
|
||||
// we crash
|
||||
llvm::sys::PrintStackTraceOnErrorSignal();
|
||||
llvm::PrettyStackTraceProgram X(argc, argv);
|
||||
#endif
|
||||
llvm::sys::AddSignalHandler(lSignal, NULL);
|
||||
|
||||
// initialize available LLVM targets
|
||||
LLVMInitializeX86TargetInfo();
|
||||
@@ -220,9 +212,7 @@ int main(int Argc, char *Argv[]) {
|
||||
LLVMInitializeX86AsmPrinter();
|
||||
LLVMInitializeX86AsmParser();
|
||||
LLVMInitializeX86Disassembler();
|
||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||
LLVMInitializeX86TargetMC();
|
||||
#endif
|
||||
|
||||
char *file = NULL;
|
||||
const char *headerFileName = NULL;
|
||||
@@ -279,10 +269,8 @@ int main(int Argc, char *Argv[]) {
|
||||
}
|
||||
else if (!strcmp(argv[i], "--emit-asm"))
|
||||
ot = Module::Asm;
|
||||
#ifndef LLVM_2_9
|
||||
else if (!strcmp(argv[i], "--emit-c++"))
|
||||
ot = Module::CXX;
|
||||
#endif // !LLVM_2_9
|
||||
else if (!strcmp(argv[i], "--emit-llvm"))
|
||||
ot = Module::Bitcode;
|
||||
else if (!strcmp(argv[i], "--emit-obj"))
|
||||
|
||||
702
module.cpp
702
module.cpp
File diff suppressed because it is too large
Load Diff
19
module.h
19
module.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -59,30 +59,33 @@ public:
|
||||
int CompileFile();
|
||||
|
||||
/** Add a named type definition to the module. */
|
||||
void AddTypeDef(Symbol *sym);
|
||||
void AddTypeDef(const std::string &name, const Type *type,
|
||||
SourcePos pos);
|
||||
|
||||
/** Add a new global variable corresponding to the given Symbol to the
|
||||
module. If non-NULL, initExpr gives the initiailizer expression
|
||||
for the global's inital value. */
|
||||
void AddGlobalVariable(Symbol *sym, Expr *initExpr, bool isConst);
|
||||
void AddGlobalVariable(const std::string &name, const Type *type,
|
||||
Expr *initExpr, bool isConst,
|
||||
StorageClass storageClass, SourcePos pos);
|
||||
|
||||
/** Add a declaration of the function defined by the given function
|
||||
symbol to the module. */
|
||||
void AddFunctionDeclaration(Symbol *funSym, bool isInline);
|
||||
void AddFunctionDeclaration(const std::string &name,
|
||||
const FunctionType *ftype,
|
||||
StorageClass sc, bool isInline, SourcePos pos);
|
||||
|
||||
/** Adds the function described by the declaration information and the
|
||||
provided statements to the module. */
|
||||
void AddFunctionDefinition(Symbol *sym, const std::vector<Symbol *> &args,
|
||||
Stmt *code);
|
||||
void AddFunctionDefinition(const std::string &name,
|
||||
const FunctionType *ftype, Stmt *code);
|
||||
|
||||
/** After a source file has been compiled, output can be generated in a
|
||||
number of different formats. */
|
||||
enum OutputType { Asm, /** Generate text assembly language output */
|
||||
Bitcode, /** Generate LLVM IR bitcode output */
|
||||
Object, /** Generate a native object file */
|
||||
#ifndef LLVM_2_9
|
||||
CXX, /** Generate a C++ file */
|
||||
#endif // !LLVM_2_9
|
||||
Header /** Generate a C/C++ header file with
|
||||
declarations of 'export'ed functions, global
|
||||
variables, and the types used by them. */
|
||||
|
||||
184
parse.yy
184
parse.yy
@@ -173,8 +173,11 @@ struct ForeachDimension {
|
||||
}
|
||||
|
||||
|
||||
%token TOKEN_INT32_CONSTANT TOKEN_UINT32_CONSTANT TOKEN_INT64_CONSTANT
|
||||
%token TOKEN_UINT64_CONSTANT TOKEN_FLOAT_CONSTANT TOKEN_STRING_C_LITERAL
|
||||
%token TOKEN_INT32_CONSTANT TOKEN_UINT32_CONSTANT
|
||||
%token TOKEN_INT64_CONSTANT TOKEN_UINT64_CONSTANT
|
||||
%token TOKEN_INT32DOTDOTDOT_CONSTANT TOKEN_UINT32DOTDOTDOT_CONSTANT
|
||||
%token TOKEN_INT64DOTDOTDOT_CONSTANT TOKEN_UINT64DOTDOTDOT_CONSTANT
|
||||
%token TOKEN_FLOAT_CONSTANT TOKEN_STRING_C_LITERAL
|
||||
%token TOKEN_IDENTIFIER TOKEN_STRING_LITERAL TOKEN_TYPE_NAME TOKEN_NULL
|
||||
%token TOKEN_PTR_OP TOKEN_INC_OP TOKEN_DEC_OP TOKEN_LEFT_OP TOKEN_RIGHT_OP
|
||||
%token TOKEN_LE_OP TOKEN_GE_OP TOKEN_EQ_OP TOKEN_NE_OP
|
||||
@@ -196,7 +199,7 @@ struct ForeachDimension {
|
||||
%token TOKEN_CIF TOKEN_CDO TOKEN_CFOR TOKEN_CWHILE TOKEN_CBREAK
|
||||
%token TOKEN_CCONTINUE TOKEN_CRETURN TOKEN_SYNC TOKEN_PRINT TOKEN_ASSERT
|
||||
|
||||
%type <expr> primary_expression postfix_expression
|
||||
%type <expr> primary_expression postfix_expression integer_dotdotdot
|
||||
%type <expr> unary_expression cast_expression funcall_expression launch_expression
|
||||
%type <expr> multiplicative_expression additive_expression shift_expression
|
||||
%type <expr> relational_expression equality_expression and_expression
|
||||
@@ -250,6 +253,12 @@ struct ForeachDimension {
|
||||
|
||||
string_constant
|
||||
: TOKEN_STRING_LITERAL { $$ = new std::string(*yylval.stringVal); }
|
||||
| string_constant TOKEN_STRING_LITERAL
|
||||
{
|
||||
std::string s = *((std::string *)$1);
|
||||
s += *yylval.stringVal;
|
||||
$$ = new std::string(s);
|
||||
}
|
||||
;
|
||||
|
||||
primary_expression
|
||||
@@ -382,7 +391,7 @@ argument_expression_list
|
||||
{
|
||||
ExprList *argList = dynamic_cast<ExprList *>($1);
|
||||
if (argList == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@1, m->errorCount > 0);
|
||||
argList = new ExprList(@3);
|
||||
}
|
||||
argList->exprs.push_back($3);
|
||||
@@ -540,8 +549,8 @@ rate_qualified_type_specifier
|
||||
if ($2 == NULL)
|
||||
$$ = NULL;
|
||||
else {
|
||||
int soaWidth = $1;
|
||||
const StructType *st = dynamic_cast<const StructType *>($2);
|
||||
int soaWidth = (int)$1;
|
||||
const StructType *st = CastType<StructType>($2);
|
||||
if (st == NULL) {
|
||||
Error(@1, "\"soa\" qualifier is illegal with non-struct type \"%s\".",
|
||||
$2->GetString().c_str());
|
||||
@@ -614,15 +623,17 @@ declaration_statement
|
||||
: declaration
|
||||
{
|
||||
if ($1 == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@1, m->errorCount > 0);
|
||||
$$ = NULL;
|
||||
}
|
||||
else if ($1->declSpecs->storageClass == SC_TYPEDEF) {
|
||||
for (unsigned int i = 0; i < $1->declarators.size(); ++i) {
|
||||
if ($1->declarators[i] == NULL)
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@1, m->errorCount > 0);
|
||||
else
|
||||
m->AddTypeDef($1->declarators[i]->GetSymbol());
|
||||
m->AddTypeDef($1->declarators[i]->name,
|
||||
$1->declarators[i]->type,
|
||||
$1->declarators[i]->pos);
|
||||
}
|
||||
$$ = NULL;
|
||||
}
|
||||
@@ -778,7 +789,7 @@ init_declarator_list
|
||||
{
|
||||
std::vector<Declarator *> *dl = (std::vector<Declarator *> *)$1;
|
||||
if (dl == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@1, m->errorCount > 0);
|
||||
dl = new std::vector<Declarator *>;
|
||||
}
|
||||
if ($3 != NULL)
|
||||
@@ -801,7 +812,6 @@ storage_class_specifier
|
||||
: TOKEN_TYPEDEF { $$ = SC_TYPEDEF; }
|
||||
| TOKEN_EXTERN { $$ = SC_EXTERN; }
|
||||
| TOKEN_EXTERN TOKEN_STRING_C_LITERAL { $$ = SC_EXTERN_C; }
|
||||
| TOKEN_EXPORT { $$ = SC_EXPORT; }
|
||||
| TOKEN_STATIC { $$ = SC_STATIC; }
|
||||
;
|
||||
|
||||
@@ -843,9 +853,9 @@ struct_or_union_specifier
|
||||
: struct_or_union struct_or_union_name '{' struct_declaration_list '}'
|
||||
{
|
||||
if ($4 != NULL) {
|
||||
std::vector<const Type *> elementTypes;
|
||||
std::vector<std::string> elementNames;
|
||||
std::vector<SourcePos> elementPositions;
|
||||
llvm::SmallVector<const Type *, 8> elementTypes;
|
||||
llvm::SmallVector<std::string, 8> elementNames;
|
||||
llvm::SmallVector<SourcePos, 8> elementPositions;
|
||||
GetStructTypesNamesPositions(*$4, &elementTypes, &elementNames,
|
||||
&elementPositions);
|
||||
StructType *st = new StructType($2, elementTypes, elementNames,
|
||||
@@ -859,12 +869,11 @@ struct_or_union_specifier
|
||||
| struct_or_union '{' struct_declaration_list '}'
|
||||
{
|
||||
if ($3 != NULL) {
|
||||
std::vector<const Type *> elementTypes;
|
||||
std::vector<std::string> elementNames;
|
||||
std::vector<SourcePos> elementPositions;
|
||||
llvm::SmallVector<const Type *, 8> elementTypes;
|
||||
llvm::SmallVector<std::string, 8> elementNames;
|
||||
llvm::SmallVector<SourcePos, 8> elementPositions;
|
||||
GetStructTypesNamesPositions(*$3, &elementTypes, &elementNames,
|
||||
&elementPositions);
|
||||
// FIXME: should be unbound
|
||||
$$ = new StructType("", elementTypes, elementNames, elementPositions,
|
||||
false, Variability::Unbound, @1);
|
||||
}
|
||||
@@ -882,12 +891,11 @@ struct_or_union_specifier
|
||||
| struct_or_union struct_or_union_name
|
||||
{
|
||||
const Type *st = m->symbolTable->LookupType($2);
|
||||
if (!st) {
|
||||
std::vector<std::string> alternates = m->symbolTable->ClosestTypeMatch($2);
|
||||
std::string alts = lGetAlternates(alternates);
|
||||
Error(@2, "Struct type \"%s\" unknown.%s", $2, alts.c_str());
|
||||
if (st == NULL) {
|
||||
st = new UndefinedStructType($2, Variability::Unbound, false, @2);
|
||||
m->symbolTable->AddType($2, st, @2);
|
||||
}
|
||||
else if (dynamic_cast<const StructType *>(st) == NULL)
|
||||
else if (CastType<StructType>(st) == NULL)
|
||||
Error(@2, "Type \"%s\" is not a struct type! (%s)", $2,
|
||||
st->GetString().c_str());
|
||||
$$ = st;
|
||||
@@ -910,7 +918,7 @@ struct_declaration_list
|
||||
{
|
||||
std::vector<StructDeclaration *> *sdl = (std::vector<StructDeclaration *> *)$1;
|
||||
if (sdl == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@1, m->errorCount > 0);
|
||||
sdl = new std::vector<StructDeclaration *>;
|
||||
}
|
||||
if ($2 != NULL)
|
||||
@@ -976,6 +984,11 @@ specifier_qualifier_list
|
||||
"function declarations.");
|
||||
$$ = $2;
|
||||
}
|
||||
else if ($1 == TYPEQUAL_EXPORT) {
|
||||
Error(@1, "\"export\" qualifier is illegal outside of "
|
||||
"function declarations.");
|
||||
$$ = $2;
|
||||
}
|
||||
else
|
||||
FATAL("Unhandled type qualifier in parser.");
|
||||
}
|
||||
@@ -1000,7 +1013,7 @@ struct_declarator_list
|
||||
{
|
||||
std::vector<Declarator *> *sdl = (std::vector<Declarator *> *)$1;
|
||||
if (sdl == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@1, m->errorCount > 0);
|
||||
sdl = new std::vector<Declarator *>;
|
||||
}
|
||||
if ($3 != NULL)
|
||||
@@ -1047,7 +1060,7 @@ enum_specifier
|
||||
$$ = NULL;
|
||||
}
|
||||
else {
|
||||
const EnumType *enumType = dynamic_cast<const EnumType *>(type);
|
||||
const EnumType *enumType = CastType<EnumType>(type);
|
||||
if (enumType == NULL) {
|
||||
Error(@2, "Type \"%s\" is not an enum type (%s).", $2,
|
||||
type->GetString().c_str());
|
||||
@@ -1074,7 +1087,7 @@ enumerator_list
|
||||
{
|
||||
std::vector<Symbol *> *symList = $1;
|
||||
if (symList == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@1, m->errorCount > 0);
|
||||
symList = new std::vector<Symbol *>;
|
||||
}
|
||||
if ($3 != NULL)
|
||||
@@ -1108,6 +1121,7 @@ type_qualifier
|
||||
| TOKEN_UNIFORM { $$ = TYPEQUAL_UNIFORM; }
|
||||
| TOKEN_VARYING { $$ = TYPEQUAL_VARYING; }
|
||||
| TOKEN_TASK { $$ = TYPEQUAL_TASK; }
|
||||
| TOKEN_EXPORT { $$ = TYPEQUAL_EXPORT; }
|
||||
| TOKEN_INLINE { $$ = TYPEQUAL_INLINE; }
|
||||
| TOKEN_SIGNED { $$ = TYPEQUAL_SIGNED; }
|
||||
| TOKEN_UNSIGNED { $$ = TYPEQUAL_UNSIGNED; }
|
||||
@@ -1160,7 +1174,7 @@ direct_declarator
|
||||
: TOKEN_IDENTIFIER
|
||||
{
|
||||
Declarator *d = new Declarator(DK_BASE, @1);
|
||||
d->sym = new Symbol(yytext, @1);
|
||||
d->name = yytext;
|
||||
$$ = d;
|
||||
}
|
||||
| '(' declarator ')'
|
||||
@@ -1335,8 +1349,10 @@ type_name
|
||||
{
|
||||
if ($1 == NULL || $2 == NULL)
|
||||
$$ = NULL;
|
||||
else
|
||||
$$ = $2->GetType($1, NULL);
|
||||
else {
|
||||
$2->InitFromType($1, NULL);
|
||||
$$ = $2->type;
|
||||
}
|
||||
}
|
||||
;
|
||||
|
||||
@@ -1471,7 +1487,7 @@ initializer_list
|
||||
{
|
||||
ExprList *exprList = $1;
|
||||
if (exprList == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@1, m->errorCount > 0);
|
||||
exprList = new ExprList(@3);
|
||||
}
|
||||
exprList->exprs.push_back($3);
|
||||
@@ -1542,7 +1558,7 @@ statement_list
|
||||
{
|
||||
StmtList *sl = (StmtList *)$1;
|
||||
if (sl == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@1, m->errorCount > 0);
|
||||
sl = new StmtList(@2);
|
||||
}
|
||||
sl->Add($2);
|
||||
@@ -1614,11 +1630,34 @@ foreach_active_identifier
|
||||
}
|
||||
;
|
||||
|
||||
integer_dotdotdot
|
||||
: TOKEN_INT32DOTDOTDOT_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformInt32->GetAsConstType(),
|
||||
(int32_t)yylval.intVal, @1);
|
||||
}
|
||||
| TOKEN_UINT32DOTDOTDOT_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformUInt32->GetAsConstType(),
|
||||
(uint32_t)yylval.intVal, @1);
|
||||
}
|
||||
| TOKEN_INT64DOTDOTDOT_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformInt64->GetAsConstType(),
|
||||
(int64_t)yylval.intVal, @1);
|
||||
}
|
||||
| TOKEN_UINT64DOTDOTDOT_CONSTANT {
|
||||
$$ = new ConstExpr(AtomicType::UniformUInt64->GetAsConstType(),
|
||||
(uint64_t)yylval.intVal, @1);
|
||||
}
|
||||
;
|
||||
|
||||
foreach_dimension_specifier
|
||||
: foreach_identifier '=' assignment_expression TOKEN_DOTDOTDOT assignment_expression
|
||||
{
|
||||
$$ = new ForeachDimension($1, $3, $5);
|
||||
}
|
||||
| foreach_identifier '=' integer_dotdotdot assignment_expression
|
||||
{
|
||||
$$ = new ForeachDimension($1, $3, $4);
|
||||
}
|
||||
;
|
||||
|
||||
foreach_dimension_list
|
||||
@@ -1631,7 +1670,7 @@ foreach_dimension_list
|
||||
{
|
||||
std::vector<ForeachDimension *> *dv = $1;
|
||||
if (dv == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@1, m->errorCount > 0);
|
||||
dv = new std::vector<ForeachDimension *>;
|
||||
}
|
||||
if ($3 != NULL)
|
||||
@@ -1669,7 +1708,7 @@ iteration_statement
|
||||
{
|
||||
std::vector<ForeachDimension *> *dims = $3;
|
||||
if (dims == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@3, m->errorCount > 0);
|
||||
dims = new std::vector<ForeachDimension *>;
|
||||
}
|
||||
for (unsigned int i = 0; i < dims->size(); ++i)
|
||||
@@ -1679,7 +1718,7 @@ iteration_statement
|
||||
{
|
||||
std::vector<ForeachDimension *> *dims = $3;
|
||||
if (dims == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@3, m->errorCount > 0);
|
||||
dims = new std::vector<ForeachDimension *>;
|
||||
}
|
||||
|
||||
@@ -1697,7 +1736,7 @@ iteration_statement
|
||||
{
|
||||
std::vector<ForeachDimension *> *dims = $3;
|
||||
if (dims == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@3, m->errorCount > 0);
|
||||
dims = new std::vector<ForeachDimension *>;
|
||||
}
|
||||
|
||||
@@ -1708,7 +1747,7 @@ iteration_statement
|
||||
{
|
||||
std::vector<ForeachDimension *> *dims = $3;
|
||||
if (dims == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(@1, m->errorCount > 0);
|
||||
dims = new std::vector<ForeachDimension *>;
|
||||
}
|
||||
|
||||
@@ -1804,6 +1843,7 @@ external_declaration
|
||||
for (unsigned int i = 0; i < $1->declarators.size(); ++i)
|
||||
lAddDeclaration($1->declSpecs, $1->declarators[i]);
|
||||
}
|
||||
| ';'
|
||||
;
|
||||
|
||||
function_definition
|
||||
@@ -1817,11 +1857,18 @@ function_definition
|
||||
}
|
||||
compound_statement
|
||||
{
|
||||
std::vector<Symbol *> args;
|
||||
if ($2 != NULL) {
|
||||
Symbol *sym = $2->GetFunctionInfo($1, &args);
|
||||
if (sym != NULL)
|
||||
m->AddFunctionDefinition(sym, args, $4);
|
||||
$2->InitFromDeclSpecs($1);
|
||||
const FunctionType *funcType = CastType<FunctionType>($2->type);
|
||||
if (funcType == NULL)
|
||||
AssertPos(@1, m->errorCount > 0);
|
||||
else if ($1->storageClass == SC_TYPEDEF)
|
||||
Error(@1, "Illegal \"typedef\" provided with function definition.");
|
||||
else {
|
||||
Stmt *code = $4;
|
||||
if (code == NULL) code = new StmtList(@4);
|
||||
m->AddFunctionDefinition($2->name, funcType, code);
|
||||
}
|
||||
}
|
||||
m->symbolTable->PopScope(); // push in lAddFunctionParams();
|
||||
}
|
||||
@@ -1931,35 +1978,27 @@ lAddDeclaration(DeclSpecs *ds, Declarator *decl) {
|
||||
// Error happened earlier during parsing
|
||||
return;
|
||||
|
||||
decl->InitFromDeclSpecs(ds);
|
||||
if (ds->storageClass == SC_TYPEDEF)
|
||||
m->AddTypeDef(decl->GetSymbol());
|
||||
m->AddTypeDef(decl->name, decl->type, decl->pos);
|
||||
else {
|
||||
const Type *t = decl->GetType(ds);
|
||||
if (t == NULL) {
|
||||
if (decl->type == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
Symbol *sym = decl->GetSymbol();
|
||||
if (sym == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
const FunctionType *ft = dynamic_cast<const FunctionType *>(t);
|
||||
decl->type = decl->type->ResolveUnboundVariability(Variability::Varying);
|
||||
|
||||
const FunctionType *ft = CastType<FunctionType>(decl->type);
|
||||
if (ft != NULL) {
|
||||
sym->type = ft;
|
||||
sym->storageClass = ds->storageClass;
|
||||
bool isInline = (ds->typeQualifiers & TYPEQUAL_INLINE);
|
||||
m->AddFunctionDeclaration(sym, isInline);
|
||||
m->AddFunctionDeclaration(decl->name, ft, ds->storageClass,
|
||||
isInline, decl->pos);
|
||||
}
|
||||
else {
|
||||
if (sym->type == NULL)
|
||||
Assert(m->errorCount > 0);
|
||||
else
|
||||
sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
|
||||
bool isConst = (ds->typeQualifiers & TYPEQUAL_CONST) != 0;
|
||||
m->AddGlobalVariable(sym, decl->initExpr, isConst);
|
||||
m->AddGlobalVariable(decl->name, decl->type, decl->initExpr,
|
||||
isConst, decl->storageClass, decl->pos);
|
||||
}
|
||||
}
|
||||
}
|
||||
@@ -1973,7 +2012,7 @@ lAddFunctionParams(Declarator *decl) {
|
||||
m->symbolTable->PushScope();
|
||||
|
||||
if (decl == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(decl->pos, m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -1981,27 +2020,24 @@ lAddFunctionParams(Declarator *decl) {
|
||||
while (decl->kind != DK_FUNCTION && decl->child != NULL)
|
||||
decl = decl->child;
|
||||
if (decl->kind != DK_FUNCTION) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(decl->pos, m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
// now loop over its parameters and add them to the symbol table
|
||||
for (unsigned int i = 0; i < decl->functionParams.size(); ++i) {
|
||||
Declaration *pdecl = decl->functionParams[i];
|
||||
if (pdecl == NULL || pdecl->declarators.size() == 0)
|
||||
// zero size declarators array corresponds to an anonymous
|
||||
// parameter
|
||||
continue;
|
||||
Assert(pdecl->declarators.size() == 1);
|
||||
Symbol *sym = pdecl->declarators[0]->GetSymbol();
|
||||
if (sym == NULL || sym->type == NULL)
|
||||
Assert(m->errorCount > 0);
|
||||
Assert(pdecl != NULL && pdecl->declarators.size() == 1);
|
||||
Declarator *declarator = pdecl->declarators[0];
|
||||
if (declarator == NULL)
|
||||
AssertPos(decl->pos, m->errorCount > 0);
|
||||
else {
|
||||
sym->type = sym->type->ResolveUnboundVariability(Variability::Varying);
|
||||
Symbol *sym = new Symbol(declarator->name, declarator->pos,
|
||||
declarator->type, declarator->storageClass);
|
||||
#ifndef NDEBUG
|
||||
bool ok = m->symbolTable->AddVariable(sym);
|
||||
if (ok == false)
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(decl->pos, m->errorCount > 0);
|
||||
#else
|
||||
m->symbolTable->AddVariable(sym);
|
||||
#endif
|
||||
@@ -2064,8 +2100,6 @@ lGetStorageClassString(StorageClass sc) {
|
||||
return "";
|
||||
case SC_EXTERN:
|
||||
return "extern";
|
||||
case SC_EXPORT:
|
||||
return "export";
|
||||
case SC_STATIC:
|
||||
return "static";
|
||||
case SC_TYPEDEF:
|
||||
@@ -2157,7 +2191,7 @@ lFinalizeEnumeratorSymbols(std::vector<Symbol *> &enums,
|
||||
if (enums[i]->constValue != NULL) {
|
||||
/* Already has a value, so first update nextVal with it. */
|
||||
int count = enums[i]->constValue->AsUInt32(&nextVal);
|
||||
Assert(count == 1);
|
||||
AssertPos(enums[i]->pos, count == 1);
|
||||
++nextVal;
|
||||
|
||||
/* When the source file as being parsed, the ConstExpr for any
|
||||
@@ -2170,7 +2204,7 @@ lFinalizeEnumeratorSymbols(std::vector<Symbol *> &enums,
|
||||
enums[i]->pos);
|
||||
castExpr = Optimize(castExpr);
|
||||
enums[i]->constValue = dynamic_cast<ConstExpr *>(castExpr);
|
||||
Assert(enums[i]->constValue != NULL);
|
||||
AssertPos(enums[i]->pos, enums[i]->constValue != NULL);
|
||||
}
|
||||
else {
|
||||
enums[i]->constValue = new ConstExpr(enumType, nextVal++,
|
||||
|
||||
18
run_tests.py
18
run_tests.py
@@ -17,6 +17,10 @@ import shlex
|
||||
import platform
|
||||
import tempfile
|
||||
|
||||
# disable fancy error/warning printing with ANSI colors, so grepping for error
|
||||
# messages doesn't get confused
|
||||
os.environ["TERM"] = "dumb"
|
||||
|
||||
# This script is affected by http://bugs.python.org/issue5261 on OSX 10.5 Leopard
|
||||
# git history has a workaround for that issue.
|
||||
|
||||
@@ -28,8 +32,10 @@ parser.add_option("-r", "--random-shuffle", dest="random", help="Randomly order
|
||||
default=False, action="store_true")
|
||||
parser.add_option("-g", "--generics-include", dest="include_file", help="Filename for header implementing functions for generics",
|
||||
default=None)
|
||||
parser.add_option("-f", "--ispc-flags", dest="ispc_flags", help="Additional flags for ispc (-g, -O1, ...)",
|
||||
default="")
|
||||
parser.add_option('-t', '--target', dest='target',
|
||||
help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16)',
|
||||
help='Set compilation target (sse2, sse2-x2, sse4, sse4-x2, avx, avx-x2, generic-4, generic-8, generic-16, generic-32)',
|
||||
default="sse4")
|
||||
parser.add_option('-a', '--arch', dest='arch',
|
||||
help='Set architecture (x86, x86-64)',
|
||||
@@ -53,6 +59,10 @@ if not is_windows:
|
||||
else:
|
||||
ispc_exe = "../Release/ispc.exe"
|
||||
|
||||
ispc_exe += " " + options.ispc_flags
|
||||
|
||||
print ispc_exe
|
||||
|
||||
is_generic_target = (options.target.find("generic-") != -1 and
|
||||
options.target != "generic-1")
|
||||
if is_generic_target and options.include_file == None:
|
||||
@@ -65,6 +75,12 @@ if is_generic_target and options.include_file == None:
|
||||
elif options.target == "generic-16":
|
||||
sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-16.h\n")
|
||||
options.include_file = "examples/intrinsics/generic-16.h"
|
||||
elif options.target == "generic-32":
|
||||
sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-32.h\n")
|
||||
options.include_file = "examples/intrinsics/generic-32.h"
|
||||
elif options.target == "generic-64":
|
||||
sys.stderr.write("No generics #include specified; using examples/intrinsics/generic-64.h\n")
|
||||
options.include_file = "examples/intrinsics/generic-64.h"
|
||||
|
||||
if options.compiler_exe == None:
|
||||
if is_windows:
|
||||
|
||||
468
stdlib.ispc
468
stdlib.ispc
@@ -1,6 +1,6 @@
|
||||
// -*- mode: c++ -*-
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -355,7 +355,8 @@ static inline uniform bool all(bool v) {
|
||||
#else
|
||||
int32 match = __sext_varying_bool((__sext_varying_bool(v) & __mask) == __mask);
|
||||
#endif
|
||||
return __movmsk(match) == (1 << programCount) - 1;
|
||||
return __movmsk(match) == ((programCount == 64) ? ~0ull :
|
||||
((1ull << programCount) - 1));
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
@@ -388,14 +389,14 @@ __declspec(safe)
|
||||
static inline uniform int popcnt(bool v) {
|
||||
// As with any() and all(), only count across the active lanes
|
||||
#ifdef ISPC_TARGET_GENERIC
|
||||
return __popcnt_int32(__movmsk(v & __mask));
|
||||
return __popcnt_int64(__movmsk(v & __mask));
|
||||
#else
|
||||
return __popcnt_int32(__movmsk(__sext_varying_bool(v) & __mask));
|
||||
return __popcnt_int64(__movmsk(__sext_varying_bool(v) & __mask));
|
||||
#endif
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
static inline uniform int lanemask() {
|
||||
static inline uniform unsigned int64 lanemask() {
|
||||
return __movmsk(__mask);
|
||||
}
|
||||
|
||||
@@ -746,6 +747,125 @@ static inline void prefetch_nt(const void * varying ptr) {
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// non-short-circuiting alternatives
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline bool and(bool a, bool b) {
|
||||
return a && b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline uniform bool and(uniform bool a, uniform bool b) {
|
||||
return a && b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline bool or(bool a, bool b) {
|
||||
return a || b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline uniform bool or(uniform bool a, uniform bool b) {
|
||||
return a || b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline int8 select(bool c, int8 a, int8 b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline int8 select(uniform bool c, int8 a, int8 b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline uniform int8 select(uniform bool c, uniform int8 a,
|
||||
uniform int8 b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline int16 select(bool c, int16 a, int16 b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline int16 select(uniform bool c, int16 a, int16 b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline uniform int16 select(uniform bool c, uniform int16 a,
|
||||
uniform int16 b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline int32 select(bool c, int32 a, int32 b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline int32 select(uniform bool c, int32 a, int32 b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline uniform int32 select(uniform bool c, uniform int32 a,
|
||||
uniform int32 b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline int64 select(bool c, int64 a, int64 b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline int64 select(uniform bool c, int64 a, int64 b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline uniform int64 select(uniform bool c, uniform int64 a,
|
||||
uniform int64 b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline float select(bool c, float a, float b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline float select(uniform bool c, float a, float b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline uniform float select(uniform bool c, uniform float a,
|
||||
uniform float b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline double select(bool c, double a, double b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline double select(uniform bool c, double a, double b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
__declspec(safe,cost1)
|
||||
static inline uniform double select(uniform bool c, uniform double a,
|
||||
uniform double b) {
|
||||
return c ? a : b;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Horizontal ops / reductions
|
||||
|
||||
@@ -1469,22 +1589,17 @@ static inline void memory_barrier() {
|
||||
|
||||
#define DEFINE_ATOMIC_OP(TA,TB,OPA,OPB,MASKTYPE) \
|
||||
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
||||
memory_barrier(); \
|
||||
TA ret = __atomic_##OPB##_##TB##_global(ptr, value, (MASKTYPE)__mask); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
||||
uniform TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
|
||||
uniform TA * uniform ptrArray[programCount]; \
|
||||
ptrArray[programIndex] = ptr; \
|
||||
memory_barrier(); \
|
||||
TA ret; \
|
||||
__foreach_active (i) { \
|
||||
uniform TA * uniform p = ptrArray[i]; \
|
||||
@@ -1492,23 +1607,21 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, TA value) { \
|
||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
|
||||
#define DEFINE_ATOMIC_SWAP(TA,TB) \
|
||||
static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform int i = 0; \
|
||||
TA ret[programCount]; \
|
||||
TA memVal; \
|
||||
uniform int lastSwap; \
|
||||
uniform int mask = lanemask(); \
|
||||
uniform unsigned int64 mask = lanemask(); \
|
||||
/* First, have the first running program instance (if any) perform \
|
||||
the swap with memory with its value of "value"; record the \
|
||||
value returned. */ \
|
||||
for (; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
if ((mask & (1ull << i)) == 0) \
|
||||
continue; \
|
||||
memVal = __atomic_swap_uniform_##TB##_global(ptr, extract(value, i)); \
|
||||
lastSwap = i; \
|
||||
@@ -1520,7 +1633,7 @@ static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
|
||||
current instance had executed a hardware atomic swap right before \
|
||||
the last one that did a swap. */ \
|
||||
for (; i < programCount; ++i) { \
|
||||
if ((mask & (1 << i)) == 0) \
|
||||
if ((mask & (1ull << i)) == 0) \
|
||||
continue; \
|
||||
ret[lastSwap] = extract(value, i); \
|
||||
lastSwap = i; \
|
||||
@@ -1528,20 +1641,16 @@ static inline TA atomic_swap_global(uniform TA * uniform ptr, TA value) { \
|
||||
/* And the last instance that wanted to swap gets the value we \
|
||||
originally got back from memory... */ \
|
||||
ret[lastSwap] = memVal; \
|
||||
memory_barrier(); \
|
||||
return ret[programIndex]; \
|
||||
} \
|
||||
static inline uniform TA atomic_swap_global(uniform TA * uniform ptr, \
|
||||
uniform TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = __atomic_swap_uniform_##TB##_global(ptr, value); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
|
||||
uniform TA * uniform ptrArray[programCount]; \
|
||||
ptrArray[programIndex] = ptr; \
|
||||
memory_barrier(); \
|
||||
TA ret; \
|
||||
__foreach_active (i) { \
|
||||
uniform TA * uniform p = ptrArray[i]; \
|
||||
@@ -1549,7 +1658,6 @@ static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
|
||||
uniform TA r = __atomic_swap_uniform_##TB##_global(p, v); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
|
||||
@@ -1557,25 +1665,19 @@ static inline TA atomic_swap_global(uniform TA * varying ptr, TA value) { \
|
||||
static inline TA atomic_##OPA##_global(uniform TA * uniform ptr, TA value) { \
|
||||
uniform TA oneval = reduce_##OPA(value); \
|
||||
TA ret; \
|
||||
if (lanemask() != 0) { \
|
||||
memory_barrier(); \
|
||||
if (lanemask() != 0) \
|
||||
ret = __atomic_##OPB##_uniform_##TB##_global(ptr, oneval); \
|
||||
memory_barrier(); \
|
||||
} \
|
||||
return ret; \
|
||||
} \
|
||||
static inline uniform TA atomic_##OPA##_global(uniform TA * uniform ptr, \
|
||||
uniform TA value) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = __atomic_##OPB##_uniform_##TB##_global(ptr, value); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
|
||||
TA value) { \
|
||||
uniform TA * uniform ptrArray[programCount]; \
|
||||
ptrArray[programIndex] = ptr; \
|
||||
memory_barrier(); \
|
||||
TA ret; \
|
||||
__foreach_active (i) { \
|
||||
uniform TA * uniform p = ptrArray[i]; \
|
||||
@@ -1583,7 +1685,6 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
|
||||
uniform TA r = __atomic_##OPB##_uniform_##TB##_global(p, v); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
@@ -1638,25 +1739,20 @@ DEFINE_ATOMIC_SWAP(double,double)
|
||||
#define ATOMIC_DECL_CMPXCHG(TA, TB, MASKTYPE) \
|
||||
static inline uniform TA atomic_compare_exchange_global( \
|
||||
uniform TA * uniform ptr, uniform TA oldval, uniform TA newval) { \
|
||||
memory_barrier(); \
|
||||
uniform TA ret = \
|
||||
__atomic_compare_exchange_uniform_##TB##_global(ptr, oldval, newval); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_compare_exchange_global( \
|
||||
uniform TA * uniform ptr, TA oldval, TA newval) { \
|
||||
memory_barrier(); \
|
||||
TA ret = __atomic_compare_exchange_##TB##_global(ptr, oldval, newval, \
|
||||
(MASKTYPE)__mask); \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
} \
|
||||
static inline TA atomic_compare_exchange_global( \
|
||||
uniform TA * varying ptr, TA oldval, TA newval) { \
|
||||
uniform TA * uniform ptrArray[programCount]; \
|
||||
ptrArray[programIndex] = ptr; \
|
||||
memory_barrier(); \
|
||||
TA ret; \
|
||||
__foreach_active (i) { \
|
||||
uniform TA r = \
|
||||
@@ -1665,7 +1761,6 @@ static inline TA atomic_compare_exchange_global( \
|
||||
extract(newval, i)); \
|
||||
ret = insert(ret, i, r); \
|
||||
} \
|
||||
memory_barrier(); \
|
||||
return ret; \
|
||||
}
|
||||
|
||||
@@ -1678,6 +1773,49 @@ ATOMIC_DECL_CMPXCHG(double, double, IntMaskType)
|
||||
|
||||
#undef ATOMIC_DECL_CMPXCHG
|
||||
|
||||
// void * variants of swap and compare exchange
|
||||
|
||||
static inline void *atomic_swap_global(void ** uniform ptr,
|
||||
void * value) {
|
||||
return (void *)atomic_swap_global((intptr_t * uniform)ptr,
|
||||
(intptr_t)value);
|
||||
}
|
||||
|
||||
static inline void * uniform atomic_swap_global(void ** uniform ptr,
|
||||
void * uniform value) {
|
||||
return (void * uniform)atomic_swap_global((intptr_t * uniform)ptr,
|
||||
(uniform intptr_t)value);
|
||||
}
|
||||
|
||||
static inline void *atomic_swap_global(void ** ptr, void * value) {
|
||||
return (void *)atomic_swap_global((intptr_t *)ptr,
|
||||
(intptr_t)value);
|
||||
}
|
||||
|
||||
static inline void *
|
||||
atomic_compare_exchange_global(void ** uniform ptr,
|
||||
void * oldval, void * newval) {
|
||||
return (void *)atomic_compare_exchange_global((intptr_t * uniform)ptr,
|
||||
(intptr_t)oldval,
|
||||
(intptr_t)newval);
|
||||
}
|
||||
|
||||
static inline void * uniform
|
||||
atomic_compare_exchange_global(void ** uniform ptr, void * uniform oldval,
|
||||
void * uniform newval) {
|
||||
return (void * uniform)atomic_compare_exchange_global((intptr_t * uniform)ptr,
|
||||
(uniform intptr_t)oldval,
|
||||
(uniform intptr_t)newval);
|
||||
}
|
||||
|
||||
static inline void *
|
||||
atomic_compare_exchange_global(void ** ptr, void * oldval,
|
||||
void * newval) {
|
||||
return (void *)atomic_compare_exchange_global((intptr_t *)ptr,
|
||||
(intptr_t)oldval,
|
||||
(intptr_t)newval);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// local atomics
|
||||
|
||||
@@ -1849,6 +1987,49 @@ LOCAL_CMPXCHG(double)
|
||||
#undef LOCAL_ATOMIC
|
||||
#undef LOCAL_CMPXCHG
|
||||
|
||||
// void * variants of swap and compare exchange
|
||||
|
||||
static inline void *atomic_swap_local(void ** uniform ptr,
|
||||
void * value) {
|
||||
return (void *)atomic_swap_local((intptr_t * uniform)ptr,
|
||||
(intptr_t)value);
|
||||
}
|
||||
|
||||
static inline void * uniform atomic_swap_local(void ** uniform ptr,
|
||||
void * uniform value) {
|
||||
return (void * uniform)atomic_swap_local((intptr_t * uniform)ptr,
|
||||
(uniform intptr_t)value);
|
||||
}
|
||||
|
||||
static inline void *atomic_swap_local(void ** ptr, void * value) {
|
||||
return (void *)atomic_swap_local((intptr_t *)ptr,
|
||||
(intptr_t)value);
|
||||
}
|
||||
|
||||
static inline void *
|
||||
atomic_compare_exchange_local(void ** uniform ptr,
|
||||
void * oldval, void * newval) {
|
||||
return (void *)atomic_compare_exchange_local((intptr_t * uniform)ptr,
|
||||
(intptr_t)oldval,
|
||||
(intptr_t)newval);
|
||||
}
|
||||
|
||||
static inline void * uniform
|
||||
atomic_compare_exchange_local(void ** uniform ptr, void * uniform oldval,
|
||||
void * uniform newval) {
|
||||
return (void * uniform)atomic_compare_exchange_local((intptr_t * uniform)ptr,
|
||||
(uniform intptr_t)oldval,
|
||||
(uniform intptr_t)newval);
|
||||
}
|
||||
|
||||
static inline void *
|
||||
atomic_compare_exchange_local(void ** ptr, void * oldval,
|
||||
void * newval) {
|
||||
return (void *)atomic_compare_exchange_local((intptr_t *)ptr,
|
||||
(intptr_t)oldval,
|
||||
(intptr_t)newval);
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Transcendentals (float precision)
|
||||
|
||||
@@ -2735,7 +2916,10 @@ static inline uniform float atan2(uniform float y, uniform float x) {
|
||||
|
||||
__declspec(safe)
|
||||
static inline float exp(float x_full) {
|
||||
if (__math_lib == __math_lib_svml) {
|
||||
if (__have_native_transcendentals) {
|
||||
return __exp_varying_float(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml) {
|
||||
return __svml_exp(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
@@ -2814,7 +2998,10 @@ static inline float exp(float x_full) {
|
||||
|
||||
__declspec(safe)
|
||||
static inline uniform float exp(uniform float x_full) {
|
||||
if (__math_lib == __math_lib_system ||
|
||||
if (__have_native_transcendentals) {
|
||||
return __exp_uniform_float(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system ||
|
||||
__math_lib == __math_lib_svml) {
|
||||
return __stdlib_expf(x_full);
|
||||
}
|
||||
@@ -2936,7 +3123,10 @@ static inline void __range_reduce_log(uniform float input, uniform float * unifo
|
||||
|
||||
__declspec(safe)
|
||||
static inline float log(float x_full) {
|
||||
if (__math_lib == __math_lib_svml) {
|
||||
if (__have_native_transcendentals) {
|
||||
return __log_varying_float(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml) {
|
||||
return __svml_log(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
@@ -3024,7 +3214,10 @@ static inline float log(float x_full) {
|
||||
|
||||
__declspec(safe)
|
||||
static inline uniform float log(uniform float x_full) {
|
||||
if (__math_lib == __math_lib_system ||
|
||||
if (__have_native_transcendentals) {
|
||||
return __log_uniform_float(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system ||
|
||||
__math_lib == __math_lib_svml) {
|
||||
return __stdlib_logf(x_full);
|
||||
}
|
||||
@@ -3105,7 +3298,10 @@ static inline uniform float log(uniform float x_full) {
|
||||
|
||||
__declspec(safe)
|
||||
static inline float pow(float a, float b) {
|
||||
if (__math_lib == __math_lib_svml) {
|
||||
if (__have_native_transcendentals) {
|
||||
return __pow_varying_float(a, b);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml) {
|
||||
return __svml_pow(a, b);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
@@ -3124,6 +3320,9 @@ static inline float pow(float a, float b) {
|
||||
|
||||
__declspec(safe)
|
||||
static inline uniform float pow(uniform float a, uniform float b) {
|
||||
if (__have_native_transcendentals) {
|
||||
return __pow_uniform_float(a, b);
|
||||
}
|
||||
if (__math_lib == __math_lib_system ||
|
||||
__math_lib == __math_lib_svml) {
|
||||
return __stdlib_powf(a, b);
|
||||
@@ -3551,8 +3750,9 @@ static inline int16 float_to_half(float f) {
|
||||
// like recursive filters in DSP - not a typical half-float application. Whether
|
||||
// FP16 denormals are rare in practice, I don't know. Whatever slow path your HW
|
||||
// may or may not have for denormals, this may well hit it.
|
||||
int32 fint2 = intbits(floatbits(fint & round_mask) * floatbits(magic)) - round_mask;
|
||||
fint2 = (fint2 > f16infty) ? f16infty : fint2; // Clamp to signed infinity if overflowed
|
||||
float fscale = floatbits(fint & round_mask) * floatbits(magic);
|
||||
fscale = min(fscale, floatbits((31 << 23) - 0x1000));
|
||||
int32 fint2 = intbits(fscale) - round_mask;
|
||||
|
||||
if (fint < f32infty)
|
||||
o = fint2 >> 13; // Take the bits!
|
||||
@@ -3648,6 +3848,133 @@ static inline int16 float_to_half_fast(float f) {
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// float -> srgb8
|
||||
|
||||
// https://gist.github.com/2246678, from Fabian "rygorous" Giesen.
|
||||
//
|
||||
// The basic ideas are still the same, only this time, we squeeze
|
||||
// everything into the table, even the linear part of the range; since we
|
||||
// are approximating the function as piecewise linear anyway, this is
|
||||
// fairly easy.
|
||||
//
|
||||
// In the exact version of the conversion, any value that produces an
|
||||
// output float less than 0.5 will be rounded to an integer of
|
||||
// zero. Inverting the linear part of the transform, we get:
|
||||
//
|
||||
// log2(0.5 / (255 * 12.92)) =~ -12.686
|
||||
//
|
||||
// which in turn means that any value smaller than about 2^(-12.687) will
|
||||
// return 0. What this means is that we can adapt the clamping code to
|
||||
// just clamp to [2^(-13), 1-eps] and we're covered. This means our table
|
||||
// needs to cover a range of 13 different exponents from -13 to -1.
|
||||
//
|
||||
// The table lookup, storage and interpolation works exactly the same way
|
||||
// as in the code above.
|
||||
//
|
||||
// Max error for the whole function (integer-rounded result minus "exact"
|
||||
// value, as computed in floats using the official formula): 0.544403 at
|
||||
// 0x3e9f8000
|
||||
|
||||
__declspec(safe)
|
||||
static inline int
|
||||
float_to_srgb8(float in)
|
||||
{
|
||||
static const uniform unsigned int table[104] = {
|
||||
0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d,
|
||||
0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d,
|
||||
0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a,
|
||||
0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a,
|
||||
0x010e0033, 0x01280033, 0x01410033, 0x015b0033,
|
||||
0x01750033, 0x018f0033, 0x01a80033, 0x01c20033,
|
||||
0x01dc0067, 0x020f0067, 0x02430067, 0x02760067,
|
||||
0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067,
|
||||
0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce,
|
||||
0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5,
|
||||
0x06970158, 0x07420142, 0x07e30130, 0x087b0120,
|
||||
0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,
|
||||
0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180,
|
||||
0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143,
|
||||
0x11070264, 0x1238023e, 0x1357021d, 0x14660201,
|
||||
0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af,
|
||||
0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad,
|
||||
0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240,
|
||||
0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392,
|
||||
0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300,
|
||||
0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5,
|
||||
0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401,
|
||||
0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d,
|
||||
0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559,
|
||||
0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f,
|
||||
0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723,
|
||||
};
|
||||
|
||||
static const uniform unsigned int almost_one = 0x3f7fffff;
|
||||
|
||||
// Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
|
||||
in = max(in, 0.0f);
|
||||
in = min(in, floatbits(almost_one));
|
||||
|
||||
// Do the table lookup and unpack bias, scale
|
||||
unsigned int tab = table[(intbits(in) - 0x39000000u) >> 20];
|
||||
unsigned int bias = (tab >> 16) << 9;
|
||||
unsigned int scale = tab & 0xffff;
|
||||
|
||||
// Grab next-highest mantissa bits and perform linear interpolation
|
||||
unsigned int t = (intbits(in) >> 12) & 0xff;
|
||||
return (bias + scale*t) >> 16;
|
||||
}
|
||||
|
||||
|
||||
__declspec(safe)
|
||||
static inline uniform int
|
||||
float_to_srgb8(uniform float in)
|
||||
{
|
||||
static const uniform unsigned int table[104] = {
|
||||
0x0073000d, 0x007a000d, 0x0080000d, 0x0087000d,
|
||||
0x008d000d, 0x0094000d, 0x009a000d, 0x00a1000d,
|
||||
0x00a7001a, 0x00b4001a, 0x00c1001a, 0x00ce001a,
|
||||
0x00da001a, 0x00e7001a, 0x00f4001a, 0x0101001a,
|
||||
0x010e0033, 0x01280033, 0x01410033, 0x015b0033,
|
||||
0x01750033, 0x018f0033, 0x01a80033, 0x01c20033,
|
||||
0x01dc0067, 0x020f0067, 0x02430067, 0x02760067,
|
||||
0x02aa0067, 0x02dd0067, 0x03110067, 0x03440067,
|
||||
0x037800ce, 0x03df00ce, 0x044600ce, 0x04ad00ce,
|
||||
0x051400ce, 0x057b00c5, 0x05dd00bc, 0x063b00b5,
|
||||
0x06970158, 0x07420142, 0x07e30130, 0x087b0120,
|
||||
0x090b0112, 0x09940106, 0x0a1700fc, 0x0a9500f2,
|
||||
0x0b0f01cb, 0x0bf401ae, 0x0ccb0195, 0x0d950180,
|
||||
0x0e56016e, 0x0f0d015e, 0x0fbc0150, 0x10630143,
|
||||
0x11070264, 0x1238023e, 0x1357021d, 0x14660201,
|
||||
0x156601e9, 0x165a01d3, 0x174401c0, 0x182401af,
|
||||
0x18fe0331, 0x1a9602fe, 0x1c1502d2, 0x1d7e02ad,
|
||||
0x1ed4028d, 0x201a0270, 0x21520256, 0x227d0240,
|
||||
0x239f0443, 0x25c003fe, 0x27bf03c4, 0x29a10392,
|
||||
0x2b6a0367, 0x2d1d0341, 0x2ebe031f, 0x304d0300,
|
||||
0x31d105b0, 0x34a80555, 0x37520507, 0x39d504c5,
|
||||
0x3c37048b, 0x3e7c0458, 0x40a8042a, 0x42bd0401,
|
||||
0x44c20798, 0x488e071e, 0x4c1c06b6, 0x4f76065d,
|
||||
0x52a50610, 0x55ac05cc, 0x5892058f, 0x5b590559,
|
||||
0x5e0c0a23, 0x631c0980, 0x67db08f6, 0x6c55087f,
|
||||
0x70940818, 0x74a007bd, 0x787d076c, 0x7c330723,
|
||||
};
|
||||
|
||||
static const uniform unsigned int almost_one = 0x3f7fffff;
|
||||
|
||||
// Clamp to [2^(-13), 1-eps]; these two values map to 0 and 1, respectively.
|
||||
in = max(in, 0.0f);
|
||||
in = min(in, floatbits(almost_one));
|
||||
|
||||
// Do the table lookup and unpack bias, scale
|
||||
uniform unsigned int tab = table[(intbits(in) - 0x39000000u) >> 20];
|
||||
uniform unsigned int bias = (tab >> 16) << 9;
|
||||
uniform unsigned int scale = tab & 0xffff;
|
||||
|
||||
// Grab next-highest mantissa bits and perform linear interpolation
|
||||
uniform unsigned int t = (intbits(in) >> 12) & 0xff;
|
||||
return (bias + scale*t) >> 16;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// RNG stuff
|
||||
|
||||
@@ -3699,60 +4026,13 @@ static inline uniform float frandom(uniform RNGState * uniform state)
|
||||
return floatbits(0x3F800000 | irand)-1.0f;
|
||||
}
|
||||
|
||||
static inline uniform unsigned int __seed4(varying RNGState * uniform state,
|
||||
uniform int start,
|
||||
uniform unsigned int seed) {
|
||||
uniform unsigned int c1 = 0xf0f0f0f0;
|
||||
uniform unsigned int c2 = 0x0f0f0f0f;
|
||||
|
||||
state->z1 = insert(state->z1, start + 0, seed);
|
||||
state->z1 = insert(state->z1, start + 1, seed ^ c1);
|
||||
state->z1 = insert(state->z1, start + 2, (seed << 3) ^ c1);
|
||||
state->z1 = insert(state->z1, start + 3, (seed << 2) ^ c2);
|
||||
|
||||
seed += 131;
|
||||
state->z2 = insert(state->z2, start + 0, seed);
|
||||
state->z2 = insert(state->z2, start + 1, seed ^ c1);
|
||||
state->z2 = insert(state->z2, start + 2, (seed << 3) ^ c1);
|
||||
state->z2 = insert(state->z2, start + 3, (seed << 2) ^ c2);
|
||||
|
||||
seed ^= extract(state->z2, 2);
|
||||
state->z3 = insert(state->z3, start + 0, seed);
|
||||
state->z3 = insert(state->z3, start + 1, seed ^ c1);
|
||||
state->z3 = insert(state->z3, start + 2, (seed << 3) ^ c1);
|
||||
state->z3 = insert(state->z3, start + 3, (seed << 2) ^ c2);
|
||||
|
||||
seed <<= 4;
|
||||
seed += 3;
|
||||
seed ^= extract(state->z1, 3);
|
||||
state->z4 = insert(state->z4, start + 0, seed);
|
||||
state->z4 = insert(state->z4, start + 1, seed ^ c1);
|
||||
state->z4 = insert(state->z4, start + 2, (seed << 3) ^ c1);
|
||||
state->z4 = insert(state->z4, start + 3, (seed << 2) ^ c2);
|
||||
|
||||
return seed;
|
||||
}
|
||||
|
||||
static inline void seed_rng(varying RNGState * uniform state,
|
||||
uniform unsigned int seed) {
|
||||
if (programCount == 1) {
|
||||
state->z1 = seed;
|
||||
state->z2 = seed ^ 0xbeeff00d;
|
||||
state->z3 = ((seed & 0xffff) << 16) | (seed >> 16);
|
||||
state->z4 = (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
|
||||
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24);
|
||||
}
|
||||
else {
|
||||
seed = __seed4(state, 0, seed);
|
||||
if (programCount == 8)
|
||||
__seed4(state, 4, seed ^ 0xbeeff00d);
|
||||
if (programCount == 16) {
|
||||
__seed4(state, 4, seed ^ 0xbeeff00d);
|
||||
__seed4(state, 8, ((seed & 0xffff) << 16) | (seed >> 16));
|
||||
__seed4(state, 12, (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
|
||||
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24));
|
||||
}
|
||||
}
|
||||
unsigned int seed) {
|
||||
state->z1 = seed;
|
||||
state->z2 = seed ^ 0xbeeff00d;
|
||||
state->z3 = ((seed & 0xffff) << 16) | (seed >> 16);
|
||||
state->z4 = (((seed & 0xff) << 24) | ((seed & 0xff00) << 8) |
|
||||
((seed & 0xff0000) >> 8) | (seed & 0xff000000) >> 24);
|
||||
}
|
||||
|
||||
static inline void seed_rng(uniform RNGState * uniform state,
|
||||
|
||||
172
stmt.cpp
172
stmt.cpp
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -40,6 +40,7 @@
|
||||
#include "util.h"
|
||||
#include "expr.h"
|
||||
#include "type.h"
|
||||
#include "func.h"
|
||||
#include "sym.h"
|
||||
#include "module.h"
|
||||
#include "llvmutil.h"
|
||||
@@ -121,7 +122,7 @@ DeclStmt::DeclStmt(const std::vector<VariableDeclaration> &v, SourcePos p)
|
||||
|
||||
static bool
|
||||
lHasUnsizedArrays(const Type *type) {
|
||||
const ArrayType *at = dynamic_cast<const ArrayType *>(type);
|
||||
const ArrayType *at = CastType<ArrayType>(type);
|
||||
if (at == NULL)
|
||||
return false;
|
||||
|
||||
@@ -139,7 +140,7 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
|
||||
for (unsigned int i = 0; i < vars.size(); ++i) {
|
||||
Symbol *sym = vars[i].sym;
|
||||
Assert(sym != NULL);
|
||||
AssertPos(pos, sym != NULL);
|
||||
if (sym->type == NULL)
|
||||
continue;
|
||||
Expr *initExpr = vars[i].init;
|
||||
@@ -167,16 +168,30 @@ DeclStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
}
|
||||
|
||||
// References must have initializer expressions as well.
|
||||
if (dynamic_cast<const ReferenceType *>(sym->type) && initExpr == NULL) {
|
||||
Error(sym->pos,
|
||||
"Must provide initializer for reference-type variable \"%s\".",
|
||||
sym->name.c_str());
|
||||
continue;
|
||||
if (IsReferenceType(sym->type) == true) {
|
||||
if (initExpr == NULL) {
|
||||
Error(sym->pos, "Must provide initializer for reference-type "
|
||||
"variable \"%s\".", sym->name.c_str());
|
||||
continue;
|
||||
}
|
||||
if (IsReferenceType(initExpr->GetType()) == false) {
|
||||
const Type *initLVType = initExpr->GetLValueType();
|
||||
if (initLVType == NULL) {
|
||||
Error(initExpr->pos, "Initializer for reference-type variable "
|
||||
"\"%s\" must have an lvalue type.", sym->name.c_str());
|
||||
continue;
|
||||
}
|
||||
if (initLVType->IsUniformType() == false) {
|
||||
Error(initExpr->pos, "Initializer for reference-type variable "
|
||||
"\"%s\" must have a uniform lvalue type.", sym->name.c_str());
|
||||
continue;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *llvmType = sym->type->LLVMType(g->ctx);
|
||||
llvm::Type *llvmType = sym->type->LLVMType(g->ctx);
|
||||
if (llvmType == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(pos, m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -282,8 +297,8 @@ DeclStmt::TypeCheck() {
|
||||
// the int->float type conversion is in there and we don't return
|
||||
// an int as the constValue later...
|
||||
const Type *type = vars[i].sym->type;
|
||||
if (dynamic_cast<const AtomicType *>(type) != NULL ||
|
||||
dynamic_cast<const EnumType *>(type) != NULL) {
|
||||
if (CastType<AtomicType>(type) != NULL ||
|
||||
CastType<EnumType>(type) != NULL) {
|
||||
// If it's an expr list with an atomic type, we'll later issue
|
||||
// an error. Need to leave vars[i].init as is in that case so
|
||||
// it is in fact caught later, though.
|
||||
@@ -463,12 +478,12 @@ IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
|
||||
// under varying control flow,, returns can't stop instruction
|
||||
// emission, so this better be non-NULL...
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
AssertPos(ctx->GetDebugPos(), ctx->GetCurrentBasicBlock());
|
||||
}
|
||||
if (falseStmts) {
|
||||
ctx->SetInternalMaskAndNot(oldMask, test);
|
||||
lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
AssertPos(ctx->GetDebugPos(), ctx->GetCurrentBasicBlock());
|
||||
}
|
||||
}
|
||||
|
||||
@@ -549,7 +564,7 @@ IfStmt::emitVaryingIf(FunctionEmitContext *ctx, llvm::Value *ltest) const {
|
||||
(costIsAcceptable || g->opt.disableCoherentControlFlow)) {
|
||||
ctx->StartVaryingIf(oldMask);
|
||||
emitMaskedTrueAndFalse(ctx, oldMask, ltest);
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
AssertPos(pos, ctx->GetCurrentBasicBlock());
|
||||
ctx->EndIf();
|
||||
}
|
||||
else {
|
||||
@@ -572,7 +587,7 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest,
|
||||
// compiler see what's going on so that subsequent optimizations for
|
||||
// code emitted here can operate with the knowledge that the mask is
|
||||
// definitely all on (until it modifies the mask itself).
|
||||
Assert(!g->opt.disableCoherentControlFlow);
|
||||
AssertPos(pos, !g->opt.disableCoherentControlFlow);
|
||||
if (!g->opt.disableMaskAllOnOptimizations)
|
||||
ctx->SetInternalMask(LLVMMaskAllOn);
|
||||
llvm::Value *oldFunctionMask = ctx->GetFunctionMask();
|
||||
@@ -622,7 +637,7 @@ IfStmt::emitMaskAllOn(FunctionEmitContext *ctx, llvm::Value *ltest,
|
||||
emitMaskedTrueAndFalse(ctx, LLVMMaskAllOn, ltest);
|
||||
// In this case, return/break/continue isn't allowed to jump and end
|
||||
// emission.
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
AssertPos(pos, ctx->GetCurrentBasicBlock());
|
||||
ctx->EndIf();
|
||||
ctx->BranchInst(bDone);
|
||||
|
||||
@@ -651,7 +666,7 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
// Emit statements for true
|
||||
ctx->SetCurrentBasicBlock(bRunTrue);
|
||||
lEmitIfStatements(ctx, trueStmts, "if: expr mixed, true statements");
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
AssertPos(pos, ctx->GetCurrentBasicBlock());
|
||||
ctx->BranchInst(bNext);
|
||||
ctx->SetCurrentBasicBlock(bNext);
|
||||
}
|
||||
@@ -668,7 +683,7 @@ IfStmt::emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
// Emit code for false
|
||||
ctx->SetCurrentBasicBlock(bRunFalse);
|
||||
lEmitIfStatements(ctx, falseStmts, "if: expr mixed, false statements");
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
AssertPos(pos, ctx->GetCurrentBasicBlock());
|
||||
ctx->BranchInst(bNext);
|
||||
ctx->SetCurrentBasicBlock(bNext);
|
||||
}
|
||||
@@ -822,7 +837,7 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (bodyStmts)
|
||||
bodyStmts->EmitCode(ctx);
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
AssertPos(pos, ctx->GetCurrentBasicBlock());
|
||||
ctx->SetFunctionMask(oldFunctionMask);
|
||||
ctx->BranchInst(btest);
|
||||
|
||||
@@ -830,7 +845,7 @@ void DoStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
ctx->SetCurrentBasicBlock(bMixed);
|
||||
if (bodyStmts)
|
||||
bodyStmts->EmitCode(ctx);
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
AssertPos(pos, ctx->GetCurrentBasicBlock());
|
||||
ctx->BranchInst(btest);
|
||||
}
|
||||
else {
|
||||
@@ -971,7 +986,7 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// it and then jump into the loop test code. (Also start a new scope
|
||||
// since the initiailizer may be a declaration statement).
|
||||
if (init) {
|
||||
Assert(dynamic_cast<StmtList *>(init) == NULL);
|
||||
AssertPos(pos, dynamic_cast<StmtList *>(init) == NULL);
|
||||
ctx->StartScope();
|
||||
init->EmitCode(ctx);
|
||||
}
|
||||
@@ -1000,7 +1015,7 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
if (doCoherentCheck)
|
||||
Warning(test->pos, "Uniform condition supplied to cfor/cwhile "
|
||||
"statement.");
|
||||
Assert(ltest->getType() == LLVMTypes::BoolType);
|
||||
AssertPos(pos, ltest->getType() == LLVMTypes::BoolType);
|
||||
ctx->BranchInst(bloop, bexit, ltest);
|
||||
}
|
||||
else {
|
||||
@@ -1036,7 +1051,7 @@ ForStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
if (stmts)
|
||||
stmts->EmitCode(ctx);
|
||||
Assert(ctx->GetCurrentBasicBlock());
|
||||
AssertPos(pos, ctx->GetCurrentBasicBlock());
|
||||
ctx->SetFunctionMask(oldFunctionMask);
|
||||
ctx->BranchInst(bstep);
|
||||
|
||||
@@ -1349,8 +1364,8 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
ctx->SetFunctionMask(LLVMMaskAllOn);
|
||||
|
||||
// This should be caught during typechecking
|
||||
Assert(startExprs.size() == dimVariables.size() &&
|
||||
endExprs.size() == dimVariables.size());
|
||||
AssertPos(pos, startExprs.size() == dimVariables.size() &&
|
||||
endExprs.size() == dimVariables.size());
|
||||
int nDims = (int)dimVariables.size();
|
||||
|
||||
///////////////////////////////////////////////////////////////////////
|
||||
@@ -1689,7 +1704,7 @@ ForeachStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
ctx->SetContinueTarget(bbFullBodyContinue);
|
||||
ctx->AddInstrumentationPoint("foreach loop body (all on)");
|
||||
stmts->EmitCode(ctx);
|
||||
Assert(ctx->GetCurrentBasicBlock() != NULL);
|
||||
AssertPos(pos, ctx->GetCurrentBasicBlock() != NULL);
|
||||
ctx->BranchInst(bbFullBodyContinue);
|
||||
}
|
||||
ctx->SetCurrentBasicBlock(bbFullBodyContinue); {
|
||||
@@ -2079,7 +2094,7 @@ SwitchStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
|
||||
const Type *type;
|
||||
if (expr == NULL || ((type = expr->GetType()) == NULL)) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(pos, m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2097,7 +2112,7 @@ SwitchStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
|
||||
llvm::Value *exprValue = expr->GetValue(ctx);
|
||||
if (exprValue == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(pos, m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2173,8 +2188,8 @@ SwitchStmt::EstimateCost() const {
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// ReturnStmt
|
||||
|
||||
ReturnStmt::ReturnStmt(Expr *v, bool cc, SourcePos p)
|
||||
: Stmt(p), val(v),
|
||||
ReturnStmt::ReturnStmt(Expr *e, bool cc, SourcePos p)
|
||||
: Stmt(p), expr(e),
|
||||
doCoherenceCheck(cc && !g->opt.disableCoherentControlFlow) {
|
||||
}
|
||||
|
||||
@@ -2189,8 +2204,29 @@ ReturnStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
return;
|
||||
}
|
||||
|
||||
// Make sure we're not trying to return a reference to something where
|
||||
// that doesn't make sense
|
||||
const Function *func = ctx->GetFunction();
|
||||
const Type *returnType = func->GetReturnType();
|
||||
if (IsReferenceType(returnType) == true &&
|
||||
IsReferenceType(expr->GetType()) == false) {
|
||||
const Type *lvType = expr->GetLValueType();
|
||||
if (lvType == NULL) {
|
||||
Error(expr->pos, "Illegal to return non-lvalue from function "
|
||||
"returning reference type \"%s\".",
|
||||
returnType->GetString().c_str());
|
||||
return;
|
||||
}
|
||||
else if (lvType->IsUniformType() == false) {
|
||||
Error(expr->pos, "Illegal to return varying lvalue type from "
|
||||
"function returning a reference type \"%s\".",
|
||||
returnType->GetString().c_str());
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
ctx->SetDebugPos(pos);
|
||||
ctx->CurrentLanesReturned(val, doCoherenceCheck);
|
||||
ctx->CurrentLanesReturned(expr, doCoherenceCheck);
|
||||
}
|
||||
|
||||
|
||||
@@ -2210,7 +2246,8 @@ void
|
||||
ReturnStmt::Print(int indent) const {
|
||||
printf("%*c%sReturn Stmt", indent, ' ', doCoherenceCheck ? "Coherent " : "");
|
||||
pos.Print();
|
||||
if (val) val->Print();
|
||||
if (expr)
|
||||
expr->Print();
|
||||
else printf("(void)");
|
||||
printf("\n");
|
||||
}
|
||||
@@ -2228,6 +2265,9 @@ GotoStmt::GotoStmt(const char *l, SourcePos gotoPos, SourcePos ip)
|
||||
|
||||
void
|
||||
GotoStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
if (!ctx->GetCurrentBasicBlock())
|
||||
return;
|
||||
|
||||
if (ctx->VaryingCFDepth() > 0) {
|
||||
Error(pos, "\"goto\" statements are only legal under \"uniform\" "
|
||||
"control flow.");
|
||||
@@ -2241,10 +2281,22 @@ GotoStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
|
||||
llvm::BasicBlock *bb = ctx->GetLabeledBasicBlock(label);
|
||||
if (bb == NULL) {
|
||||
// TODO: use the string distance stuff to suggest alternatives if
|
||||
// there are some with names close to the label name we have here..
|
||||
Error(identifierPos, "No label named \"%s\" found in current function.",
|
||||
label.c_str());
|
||||
/* Label wasn't found. Look for suggestions that are close */
|
||||
std::vector<std::string> labels = ctx->GetLabels();
|
||||
std::vector<std::string> matches = MatchStrings(label, labels);
|
||||
std::string match_output;
|
||||
if (! matches.empty()) {
|
||||
/* Print up to 5 matches. Don't want to spew too much */
|
||||
match_output += "\nDid you mean:";
|
||||
for (unsigned int i=0; i<matches.size() && i<5; i++)
|
||||
match_output += "\n " + matches[i] + "?";
|
||||
}
|
||||
|
||||
/* Label wasn't found. Emit an error */
|
||||
Error(identifierPos,
|
||||
"No label named \"%s\" found in current function.%s",
|
||||
label.c_str(), match_output.c_str());
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
@@ -2290,7 +2342,7 @@ LabeledStmt::LabeledStmt(const char *n, Stmt *s, SourcePos p)
|
||||
void
|
||||
LabeledStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
llvm::BasicBlock *bblock = ctx->GetLabeledBasicBlock(name);
|
||||
Assert(bblock != NULL);
|
||||
AssertPos(pos, bblock != NULL);
|
||||
|
||||
// End the current basic block with a jump to our basic block and then
|
||||
// set things up for emission to continue there. Note that the current
|
||||
@@ -2409,7 +2461,7 @@ lEncodeType(const Type *t) {
|
||||
if (Type::Equal(t, AtomicType::VaryingUInt64)) return 'V';
|
||||
if (Type::Equal(t, AtomicType::UniformDouble)) return 'd';
|
||||
if (Type::Equal(t, AtomicType::VaryingDouble)) return 'D';
|
||||
if (dynamic_cast<const PointerType *>(t) != NULL) {
|
||||
if (CastType<PointerType>(t) != NULL) {
|
||||
if (t->IsUniformType())
|
||||
return 'p';
|
||||
else
|
||||
@@ -2429,7 +2481,7 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
|
||||
if (type == NULL)
|
||||
return NULL;
|
||||
|
||||
if (dynamic_cast<const ReferenceType *>(type) != NULL) {
|
||||
if (CastType<ReferenceType>(type) != NULL) {
|
||||
expr = new RefDerefExpr(expr, expr->pos);
|
||||
type = expr->GetType();
|
||||
if (type == NULL)
|
||||
@@ -2457,7 +2509,7 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
|
||||
else {
|
||||
argTypes.push_back(t);
|
||||
|
||||
LLVM_TYPE_CONST llvm::Type *llvmExprType = type->LLVMType(g->ctx);
|
||||
llvm::Type *llvmExprType = type->LLVMType(g->ctx);
|
||||
llvm::Value *ptr = ctx->AllocaInst(llvmExprType, "print_arg");
|
||||
llvm::Value *val = expr->GetValue(ctx);
|
||||
if (!val)
|
||||
@@ -2478,6 +2530,9 @@ lProcessPrintArg(Expr *expr, FunctionEmitContext *ctx, std::string &argTypes) {
|
||||
*/
|
||||
void
|
||||
PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
if (!ctx->GetCurrentBasicBlock())
|
||||
return;
|
||||
|
||||
ctx->SetDebugPos(pos);
|
||||
|
||||
// __do_print takes 5 arguments; we'll get them stored in the args[] array
|
||||
@@ -2494,7 +2549,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
std::string argTypes;
|
||||
|
||||
if (values == NULL) {
|
||||
LLVM_TYPE_CONST llvm::Type *ptrPtrType =
|
||||
llvm::Type *ptrPtrType =
|
||||
llvm::PointerType::get(LLVMTypes::VoidPointerType, 0);
|
||||
args[4] = llvm::Constant::getNullValue(ptrPtrType);
|
||||
}
|
||||
@@ -2506,7 +2561,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
int nArgs = elist ? elist->exprs.size() : 1;
|
||||
|
||||
// Allocate space for the array of pointers to values to be printed
|
||||
LLVM_TYPE_CONST llvm::Type *argPtrArrayType =
|
||||
llvm::Type *argPtrArrayType =
|
||||
llvm::ArrayType::get(LLVMTypes::VoidPointerType, nArgs);
|
||||
llvm::Value *argPtrArray = ctx->AllocaInst(argPtrArrayType,
|
||||
"print_arg_ptrs");
|
||||
@@ -2542,7 +2597,7 @@ PrintStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
|
||||
// Now we can emit code to call __do_print()
|
||||
llvm::Function *printFunc = m->module->getFunction("__do_print");
|
||||
Assert(printFunc);
|
||||
AssertPos(pos, printFunc);
|
||||
|
||||
llvm::Value *mask = ctx->GetFullMask();
|
||||
// Set up the rest of the parameters to it
|
||||
@@ -2583,6 +2638,9 @@ AssertStmt::AssertStmt(const std::string &msg, Expr *e, SourcePos p)
|
||||
|
||||
void
|
||||
AssertStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
if (!ctx->GetCurrentBasicBlock())
|
||||
return;
|
||||
|
||||
if (expr == NULL)
|
||||
return;
|
||||
const Type *type = expr->GetType();
|
||||
@@ -2595,7 +2653,7 @@ AssertStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
llvm::Function *assertFunc =
|
||||
isUniform ? m->module->getFunction("__do_assert_uniform") :
|
||||
m->module->getFunction("__do_assert_varying");
|
||||
Assert(assertFunc != NULL);
|
||||
AssertPos(pos, assertFunc != NULL);
|
||||
|
||||
char *errorString;
|
||||
if (asprintf(&errorString, "%s:%d:%d: Assertion failed: %s\n",
|
||||
@@ -2658,20 +2716,23 @@ DeleteStmt::DeleteStmt(Expr *e, SourcePos p)
|
||||
|
||||
void
|
||||
DeleteStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
if (!ctx->GetCurrentBasicBlock())
|
||||
return;
|
||||
|
||||
const Type *exprType;
|
||||
if (expr == NULL || ((exprType = expr->GetType()) == NULL)) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(pos, m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
llvm::Value *exprValue = expr->GetValue(ctx);
|
||||
if (exprValue == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(pos, m->errorCount > 0);
|
||||
return;
|
||||
}
|
||||
|
||||
// Typechecking should catch this
|
||||
Assert(dynamic_cast<const PointerType *>(exprType) != NULL);
|
||||
AssertPos(pos, CastType<PointerType>(exprType) != NULL);
|
||||
|
||||
if (exprType->IsUniformType()) {
|
||||
// For deletion of a uniform pointer, we just need to cast the
|
||||
@@ -2680,7 +2741,7 @@ DeleteStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
exprValue = ctx->BitCastInst(exprValue, LLVMTypes::VoidPointerType,
|
||||
"ptr_to_void");
|
||||
llvm::Function *func = m->module->getFunction("__delete_uniform");
|
||||
Assert(func != NULL);
|
||||
AssertPos(pos, func != NULL);
|
||||
|
||||
ctx->CallInst(func, NULL, exprValue, "");
|
||||
}
|
||||
@@ -2690,7 +2751,7 @@ DeleteStmt::EmitCode(FunctionEmitContext *ctx) const {
|
||||
// only need to extend to 64-bit values on 32-bit targets before
|
||||
// calling it.
|
||||
llvm::Function *func = m->module->getFunction("__delete_varying");
|
||||
Assert(func != NULL);
|
||||
AssertPos(pos, func != NULL);
|
||||
if (g->target.is32Bit)
|
||||
exprValue = ctx->ZExtInst(exprValue, LLVMTypes::Int64VectorType,
|
||||
"ptr_to_64");
|
||||
@@ -2711,7 +2772,7 @@ DeleteStmt::TypeCheck() {
|
||||
if (expr == NULL || ((exprType = expr->GetType()) == NULL))
|
||||
return NULL;
|
||||
|
||||
if (dynamic_cast<const PointerType *>(exprType) == NULL) {
|
||||
if (CastType<PointerType>(exprType) == NULL) {
|
||||
Error(pos, "Illegal to delete non-pointer type \"%s\".",
|
||||
exprType->GetString().c_str());
|
||||
return NULL;
|
||||
@@ -2743,7 +2804,7 @@ DeleteStmt::EstimateCost() const {
|
||||
Stmt *
|
||||
CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) {
|
||||
if (iterSym == NULL) {
|
||||
Assert(m->errorCount > 0);
|
||||
AssertPos(pos, m->errorCount > 0);
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -2770,11 +2831,11 @@ CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) {
|
||||
// First, call __movmsk(__mask)) to get the mask as a set of bits.
|
||||
// This should be hoisted out of the loop
|
||||
Symbol *maskSym = m->symbolTable->LookupVariable("__mask");
|
||||
Assert(maskSym != NULL);
|
||||
AssertPos(pos, maskSym != NULL);
|
||||
Expr *maskVecExpr = new SymbolExpr(maskSym, pos);
|
||||
std::vector<Symbol *> mmFuns;
|
||||
m->symbolTable->LookupFunction("__movmsk", &mmFuns);
|
||||
Assert(mmFuns.size() == 2);
|
||||
AssertPos(pos, mmFuns.size() == (g->target.maskBitCount == 32 ? 2 : 1));
|
||||
FunctionSymbolExpr *movmskFunc = new FunctionSymbolExpr("__movmsk", mmFuns,
|
||||
pos);
|
||||
ExprList *movmskArgs = new ExprList(maskVecExpr, pos);
|
||||
@@ -2782,7 +2843,7 @@ CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) {
|
||||
pos);
|
||||
|
||||
// Compute the per lane mask to test the mask bits against: (1 << iter)
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt32, 1,
|
||||
ConstExpr *oneExpr = new ConstExpr(AtomicType::UniformInt64, int64_t(1),
|
||||
iterSym->pos);
|
||||
Expr *shiftLaneExpr = new BinaryExpr(BinaryExpr::Shl, oneExpr, symExpr,
|
||||
pos);
|
||||
@@ -2802,4 +2863,3 @@ CreateForeachActiveStmt(Symbol *iterSym, Stmt *stmts, SourcePos pos) {
|
||||
// And return a for loop that wires it all together.
|
||||
return new ForStmt(initStmt, testExpr, stepStmt, laneCheckIf, false, pos);
|
||||
}
|
||||
|
||||
|
||||
6
stmt.h
6
stmt.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -265,7 +265,7 @@ public:
|
||||
statement in the program. */
|
||||
class ReturnStmt : public Stmt {
|
||||
public:
|
||||
ReturnStmt(Expr *v, bool cc, SourcePos p);
|
||||
ReturnStmt(Expr *e, bool cc, SourcePos p);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
@@ -273,7 +273,7 @@ public:
|
||||
Stmt *TypeCheck();
|
||||
int EstimateCost() const;
|
||||
|
||||
Expr *val;
|
||||
Expr *expr;
|
||||
/** This indicates whether the generated code will check to see if no
|
||||
more program instances are currently running after the return, in
|
||||
which case the code can possibly jump to the end of the current
|
||||
|
||||
116
sym.cpp
116
sym.cpp
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -56,12 +56,6 @@ Symbol::Symbol(const std::string &n, SourcePos p, const Type *t,
|
||||
}
|
||||
|
||||
|
||||
std::string
|
||||
Symbol::MangledName() const {
|
||||
return name + type->Mangle();
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// SymbolTable
|
||||
|
||||
@@ -72,27 +66,31 @@ SymbolTable::SymbolTable() {
|
||||
|
||||
SymbolTable::~SymbolTable() {
|
||||
// Otherwise we have mismatched push/pop scopes
|
||||
Assert(variables.size() == 1 && types.size() == 1);
|
||||
Assert(variables.size() == 1);
|
||||
PopScope();
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
SymbolTable::PushScope() {
|
||||
variables.push_back(new SymbolMapType);
|
||||
types.push_back(new TypeMapType);
|
||||
SymbolMapType *sm;
|
||||
if (freeSymbolMaps.size() > 0) {
|
||||
sm = freeSymbolMaps.back();
|
||||
freeSymbolMaps.pop_back();
|
||||
sm->erase(sm->begin(), sm->end());
|
||||
}
|
||||
else
|
||||
sm = new SymbolMapType;
|
||||
|
||||
variables.push_back(sm);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
SymbolTable::PopScope() {
|
||||
Assert(variables.size() > 1);
|
||||
delete variables.back();
|
||||
freeSymbolMaps.push_back(variables.back());
|
||||
variables.pop_back();
|
||||
|
||||
Assert(types.size() > 1);
|
||||
delete types.back();
|
||||
types.pop_back();
|
||||
}
|
||||
|
||||
|
||||
@@ -147,7 +145,7 @@ SymbolTable::LookupVariable(const char *name) {
|
||||
|
||||
bool
|
||||
SymbolTable::AddFunction(Symbol *symbol) {
|
||||
const FunctionType *ft = dynamic_cast<const FunctionType *>(symbol->type);
|
||||
const FunctionType *ft = CastType<FunctionType>(symbol->type);
|
||||
Assert(ft != NULL);
|
||||
if (LookupFunction(symbol->name.c_str(), ft) != NULL)
|
||||
// A function of the same name and type has already been added to
|
||||
@@ -192,26 +190,17 @@ SymbolTable::LookupFunction(const char *name, const FunctionType *type) {
|
||||
|
||||
bool
|
||||
SymbolTable::AddType(const char *name, const Type *type, SourcePos pos) {
|
||||
// Like AddVariable(), we go backwards through the type maps, working
|
||||
// from innermost scope to outermost.
|
||||
for (int i = types.size()-1; i >= 0; --i) {
|
||||
TypeMapType &sm = *(types[i]);
|
||||
if (sm.find(name) != sm.end()) {
|
||||
if (i == (int)types.size() - 1) {
|
||||
Error(pos, "Ignoring redefinition of type \"%s\".", name);
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
Warning(pos, "Type \"%s\" shadows type declared in outer scope.", name);
|
||||
TypeMapType &sm = *(types.back());
|
||||
sm[name] = type;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
const Type *t = LookupType(name);
|
||||
if (t != NULL && CastType<UndefinedStructType>(t) == NULL) {
|
||||
// If we have a previous declaration of anything other than an
|
||||
// UndefinedStructType with this struct name, issue an error. If
|
||||
// we have an UndefinedStructType, then we'll fall through to the
|
||||
// code below that adds the definition to the type map.
|
||||
Error(pos, "Ignoring redefinition of type \"%s\".", name);
|
||||
return false;
|
||||
}
|
||||
|
||||
TypeMapType &sm = *(types.back());
|
||||
sm[name] = type;
|
||||
types[name] = type;
|
||||
return true;
|
||||
}
|
||||
|
||||
@@ -219,11 +208,9 @@ SymbolTable::AddType(const char *name, const Type *type, SourcePos pos) {
|
||||
const Type *
|
||||
SymbolTable::LookupType(const char *name) const {
|
||||
// Again, search through the type maps backward to get scoping right.
|
||||
for (int i = types.size()-1; i >= 0; --i) {
|
||||
TypeMapType &sm = *(types[i]);
|
||||
if (sm.find(name) != sm.end())
|
||||
return sm[name];
|
||||
}
|
||||
TypeMapType::const_iterator iter = types.find(name);
|
||||
if (iter != types.end())
|
||||
return iter->second;
|
||||
return NULL;
|
||||
}
|
||||
|
||||
@@ -288,21 +275,19 @@ SymbolTable::closestTypeMatch(const char *str, bool structsVsEnums) const {
|
||||
const int maxDelta = 2;
|
||||
std::vector<std::string> matches[maxDelta+1];
|
||||
|
||||
for (unsigned int i = 0; i < types.size(); ++i) {
|
||||
TypeMapType::const_iterator iter;
|
||||
for (iter = types[i]->begin(); iter != types[i]->end(); ++iter) {
|
||||
// Skip over either StructTypes or EnumTypes, depending on the
|
||||
// value of the structsVsEnums parameter
|
||||
bool isEnum = (dynamic_cast<const EnumType *>(iter->second) != NULL);
|
||||
if (isEnum && structsVsEnums)
|
||||
continue;
|
||||
else if (!isEnum && !structsVsEnums)
|
||||
continue;
|
||||
TypeMapType::const_iterator iter;
|
||||
for (iter = types.begin(); iter != types.end(); ++iter) {
|
||||
// Skip over either StructTypes or EnumTypes, depending on the
|
||||
// value of the structsVsEnums parameter
|
||||
bool isEnum = (CastType<EnumType>(iter->second) != NULL);
|
||||
if (isEnum && structsVsEnums)
|
||||
continue;
|
||||
else if (!isEnum && !structsVsEnums)
|
||||
continue;
|
||||
|
||||
int dist = StringEditDistance(str, iter->first, maxDelta+1);
|
||||
if (dist <= maxDelta)
|
||||
matches[dist].push_back(iter->first);
|
||||
}
|
||||
int dist = StringEditDistance(str, iter->first, maxDelta+1);
|
||||
if (dist <= maxDelta)
|
||||
matches[dist].push_back(iter->first);
|
||||
}
|
||||
|
||||
for (int i = 0; i <= maxDelta; ++i) {
|
||||
@@ -342,16 +327,12 @@ SymbolTable::Print() {
|
||||
|
||||
depth = 0;
|
||||
fprintf(stderr, "Named types:\n---------------\n");
|
||||
for (unsigned int i = 0; i < types.size(); ++i) {
|
||||
TypeMapType &sm = *types[i];
|
||||
TypeMapType::iterator siter = sm.begin();
|
||||
while (siter != sm.end()) {
|
||||
fprintf(stderr, "%*c", depth, ' ');
|
||||
fprintf(stderr, "%s -> %s\n", siter->first.c_str(),
|
||||
siter->second->GetString().c_str());
|
||||
++siter;
|
||||
}
|
||||
depth += 4;
|
||||
TypeMapType::iterator siter = types.begin();
|
||||
while (siter != types.end()) {
|
||||
fprintf(stderr, "%*c", depth, ' ');
|
||||
fprintf(stderr, "%s -> %s\n", siter->first.c_str(),
|
||||
siter->second->GetString().c_str());
|
||||
++siter;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -382,14 +363,11 @@ SymbolTable::RandomSymbol() {
|
||||
|
||||
const Type *
|
||||
SymbolTable::RandomType() {
|
||||
int v = ispcRand() % types.size();
|
||||
if (types[v]->size() == 0)
|
||||
return NULL;
|
||||
int count = ispcRand() % types[v]->size();
|
||||
TypeMapType::iterator iter = types[v]->begin();
|
||||
int count = types.size();
|
||||
TypeMapType::iterator iter = types.begin();
|
||||
while (count-- > 0) {
|
||||
++iter;
|
||||
Assert(iter != types[v]->end());
|
||||
Assert(iter != types.end());
|
||||
}
|
||||
return iter->second;
|
||||
}
|
||||
|
||||
22
sym.h
22
sym.h
@@ -1,5 +1,5 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
Copyright (c) 2010-2012, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
@@ -67,15 +67,8 @@ public:
|
||||
Symbol(const std::string &name, SourcePos pos, const Type *t = NULL,
|
||||
StorageClass sc = SC_NONE);
|
||||
|
||||
/** This method should only be called for function symbols; for them,
|
||||
it returns a mangled version of the function name with the argument
|
||||
types encoded into the returned name. This is used to generate
|
||||
unique symbols in object files for overloaded functions.
|
||||
*/
|
||||
std::string MangledName() const;
|
||||
|
||||
SourcePos pos; /*!< Source file position where the symbol was defined */
|
||||
const std::string name; /*!< Symbol's name */
|
||||
std::string name; /*!< Symbol's name */
|
||||
llvm::Value *storagePtr; /*!< For symbols with storage associated with
|
||||
them (i.e. variables but not functions),
|
||||
this member stores a pointer to its
|
||||
@@ -208,6 +201,9 @@ public:
|
||||
/** Adds the named type to the symbol table. This is used for both
|
||||
struct definitions (where <tt>struct Foo</tt> causes type \c Foo to
|
||||
be added to the symbol table) as well as for <tt>typedef</tt>s.
|
||||
For structs with forward declarations ("struct Foo;") and are thus
|
||||
UndefinedStructTypes, this method replaces these with an actual
|
||||
struct definition if one is provided.
|
||||
|
||||
@param name Name of the type to be added
|
||||
@param type Type that \c name represents
|
||||
@@ -264,6 +260,8 @@ private:
|
||||
typedef std::map<std::string, Symbol *> SymbolMapType;
|
||||
std::vector<SymbolMapType *> variables;
|
||||
|
||||
std::vector<SymbolMapType *> freeSymbolMaps;
|
||||
|
||||
/** Function declarations are *not* scoped. (C99, for example, allows
|
||||
an implementation to maintain function declarations in a single
|
||||
namespace.) A STL \c vector is used to store the function symbols
|
||||
@@ -272,12 +270,10 @@ private:
|
||||
typedef std::map<std::string, std::vector<Symbol *> > FunctionMapType;
|
||||
FunctionMapType functions;
|
||||
|
||||
/** Type definitions can also be scoped. A new \c TypeMapType
|
||||
is added to the back of the \c types \c vector each time a new scope
|
||||
is entered. (And it's removed when the scope exits).
|
||||
/** Type definitions can't currently be scoped.
|
||||
*/
|
||||
typedef std::map<std::string, const Type *> TypeMapType;
|
||||
std::vector<TypeMapType *> types;
|
||||
TypeMapType types;
|
||||
};
|
||||
|
||||
|
||||
|
||||
@@ -102,15 +102,21 @@ void *ISPCAlloc(void **handle, int64_t size, int32_t alignment) {
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
int w = width();
|
||||
assert(w <= 16);
|
||||
assert(w <= 64);
|
||||
|
||||
float returned_result[16];
|
||||
for (int i = 0; i < 16; ++i)
|
||||
float returned_result[64];
|
||||
float vfloat[64];
|
||||
double vdouble[64];
|
||||
int vint[64], vint2[64];
|
||||
|
||||
for (int i = 0; i < 64; ++i) {
|
||||
returned_result[i] = -1e20;
|
||||
float vfloat[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
|
||||
double vdouble[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
|
||||
int vint[16] = { 2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32 };
|
||||
int vint2[16] = { 5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
|
||||
vfloat[i] = i+1;
|
||||
vdouble[i] = i+1;
|
||||
vint[i] = 2*(i+1);
|
||||
vint2[i] = i+5;
|
||||
}
|
||||
|
||||
float b = 5.;
|
||||
|
||||
#if (TEST_SIG == 0)
|
||||
@@ -131,8 +137,8 @@ int main(int argc, char *argv[]) {
|
||||
#error "Unknown or unset TEST_SIG value"
|
||||
#endif
|
||||
|
||||
float expected_result[16];
|
||||
memset(expected_result, 0, 16*sizeof(float));
|
||||
float expected_result[64];
|
||||
memset(expected_result, 0, 64*sizeof(float));
|
||||
result(expected_result);
|
||||
|
||||
int errors = 0;
|
||||
|
||||
@@ -6,14 +6,14 @@ bool ok(float x, float ref) { return (abs(x - ref) < 1e-6) || abs((x-ref)/ref) <
|
||||
|
||||
export void f_v(uniform float RET[]) {
|
||||
uniform float vals[8] = { 0, 1, 0.5, -1, -.87, -.25, 1e-3, -.99999999 };
|
||||
uniform float r[8];
|
||||
uniform float r[programCount];
|
||||
foreach (i = 0 ... 8)
|
||||
r[i] = cos(acos(vals[i]));
|
||||
r[i] = cos(acos(vals[i % 8]));
|
||||
|
||||
int errors = 0;
|
||||
for (uniform int i = 0; i < 8; ++i) {
|
||||
if (ok(r[i], vals[i]) == false) {
|
||||
print("error @ %: got %, expected %\n", i, r[i], vals[i]);
|
||||
if (ok(r[i], vals[i%8]) == false) {
|
||||
print("error @ %: got %, expected %\n", i, r[i], vals[i%8]);
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -3,7 +3,9 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_v(uniform float RET[]) {
|
||||
#define width 3
|
||||
#define maxProgramCount 16
|
||||
#define maxProgramCount 64
|
||||
assert(programCount <= maxProgramCount);
|
||||
|
||||
//CO const uniform int width = 3;
|
||||
//CO const uniform int maxProgramCount = 16;
|
||||
uniform float a[width*maxProgramCount], r[width*maxProgramCount];
|
||||
|
||||
@@ -3,7 +3,9 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_v(uniform float RET[]) {
|
||||
#define width 4
|
||||
#define maxProgramCount 16
|
||||
#define maxProgramCount 64
|
||||
assert(programCount <= maxProgramCount);
|
||||
|
||||
//CO const uniform int width = 4;
|
||||
//CO const uniform int maxProgramCount = 16;
|
||||
uniform float a[width*maxProgramCount], r[width*maxProgramCount];
|
||||
|
||||
@@ -3,7 +3,9 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_v(uniform float RET[]) {
|
||||
#define width 3
|
||||
#define maxProgramCount 16
|
||||
#define maxProgramCount 64
|
||||
assert(programCount <= maxProgramCount);
|
||||
|
||||
//CO const uniform int width = 3;
|
||||
//CO const uniform int maxProgramCount = 16;
|
||||
uniform int a[width*maxProgramCount], r[width*maxProgramCount];
|
||||
|
||||
@@ -3,7 +3,9 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_v(uniform float RET[]) {
|
||||
#define width 4
|
||||
#define maxProgramCount 16
|
||||
#define maxProgramCount 64
|
||||
assert(programCount <= maxProgramCount);
|
||||
|
||||
//CO const uniform int width = 4;
|
||||
//CO const uniform int maxProgramCount = 16;
|
||||
uniform int a[width*maxProgramCount], r[width*maxProgramCount];
|
||||
|
||||
@@ -5,9 +5,9 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform float x[45];
|
||||
uniform float x[programCount+15];
|
||||
uniform int i;
|
||||
for (i = 0; i < 45; ++i)
|
||||
for (i = 0; i < programCount+15; ++i)
|
||||
x[i] = i;
|
||||
|
||||
float ret;
|
||||
|
||||
@@ -10,7 +10,8 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
for (uniform int i = 0; i < 29+b; ++i)
|
||||
for (uniform int j = 0; j < 29+b; ++j)
|
||||
x[i][j] = 0;
|
||||
x[a][a] = a;
|
||||
if (a < 34)
|
||||
x[a][a] = a;
|
||||
RET[programIndex] = x[4][4] + x[1][1] + x[b][b] + x[0][0];
|
||||
}
|
||||
|
||||
|
||||
@@ -12,8 +12,10 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[0] = 1; RET[4] = 5; RET[8] = 9; RET[12] = 13;
|
||||
RET[1] = RET[5] = RET[9] = RET[13] = 0;
|
||||
RET[2] = 6; RET[6] = 14; RET[10] = 22; RET[14] = 30;
|
||||
RET[3] = RET[7] = RET[11] = RET[15] = 3;
|
||||
for (uniform int i = 0; i < programCount; i += 4) {
|
||||
RET[i] = i+1;
|
||||
RET[i+1] = 0;
|
||||
RET[i+2] = 2 * (i+3);
|
||||
RET[i+3] = 3;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -4,9 +4,9 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform float x[45];
|
||||
uniform float x[programCount+5];
|
||||
uniform int i;
|
||||
for (i = 0; i < 45; ++i)
|
||||
for (i = 0; i < programCount+5; ++i)
|
||||
x[i] = i+b;
|
||||
a -= 1;
|
||||
if (a == 3) a = 0;
|
||||
|
||||
@@ -4,9 +4,9 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform float x[45];
|
||||
uniform float x[programCount+5];
|
||||
uniform int i;
|
||||
for (i = 0; i < 45; ++i)
|
||||
for (i = 0; i < programCount+5; ++i)
|
||||
x[i] = i+b;
|
||||
RET[programIndex] = x[a];
|
||||
}
|
||||
|
||||
@@ -4,14 +4,14 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
float x[55];
|
||||
float x[programCount+10];
|
||||
uniform int i;
|
||||
for (i = 0; i < 45; ++i)
|
||||
for (i = 0; i < programCount+10; ++i)
|
||||
x[i] = a+b;
|
||||
RET[programIndex] = x[a];
|
||||
}
|
||||
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 6 + programIndex;;
|
||||
RET[programIndex] = 6 + programIndex;
|
||||
}
|
||||
|
||||
@@ -15,6 +15,9 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
x[a][b-1] = 0;
|
||||
else
|
||||
x[a][b-1] = 1;
|
||||
|
||||
a = min(a, 46);
|
||||
|
||||
RET[programIndex] = x[3][a];
|
||||
}
|
||||
|
||||
|
||||
@@ -4,9 +4,10 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform float x[47][47];
|
||||
for (uniform int i = 0; i < 47; ++i)
|
||||
for (uniform int j = 0; j < 47; ++j)
|
||||
assert(programCount <= 64);
|
||||
uniform float x[70][70];
|
||||
for (uniform int i = 0; i < 70; ++i)
|
||||
for (uniform int j = 0; j < 70; ++j)
|
||||
x[i][j] = 2+b-5;
|
||||
|
||||
// all are 2 except (4,2) = 0, (4,...) = 1, (4,programCount-1)=2
|
||||
|
||||
@@ -10,6 +10,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
for (uniform int j = 0; j < 47; ++j)
|
||||
x[i][j] = 2+b-5;
|
||||
|
||||
a = min(a,46);
|
||||
x[a][b-1] = 0;
|
||||
RET[programIndex] = x[2][a];
|
||||
}
|
||||
|
||||
@@ -11,7 +11,7 @@ export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
|
||||
uniform int index[4] = { 0, 1, 2, 4 };
|
||||
float v = index[programIndex & 0x3];
|
||||
x[a][v] = 0;
|
||||
x[min(a,39)][v] = 0;
|
||||
RET[programIndex] = x[v+1][v];
|
||||
}
|
||||
|
||||
|
||||
@@ -4,7 +4,7 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform float a[programCount+4];
|
||||
for (unsigned int i = 0; i < programCount+4; ++i)
|
||||
for (uniform int i = 0; i < programCount+4; ++i)
|
||||
a[i] = aFOO[min((int)i, programCount)];
|
||||
|
||||
RET[programIndex] = *(a + 2);
|
||||
|
||||
@@ -4,9 +4,8 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform float x[100];
|
||||
// HACK to avoid @llvm.memset...
|
||||
for (uniform int i = 0; i < b*20; ++i)
|
||||
uniform float x[2*programCount];
|
||||
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||
x[i] = 0;
|
||||
|
||||
x[2*(a-1)] = b;
|
||||
|
||||
@@ -4,9 +4,8 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform float x[100];
|
||||
// HACK to avoid @llvm.memset...
|
||||
for (uniform int i = 0; i < b*20; ++i)
|
||||
uniform float x[2*programCount];
|
||||
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||
x[i] = 0;
|
||||
|
||||
x[2*(a-1)] = b;
|
||||
|
||||
@@ -5,8 +5,8 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform float x[40];
|
||||
for (uniform int i = 0; i < 40; ++i)
|
||||
uniform float x[programCount+5];
|
||||
for (uniform int i = 0; i < programCount+5; ++i)
|
||||
x[i] = 0.;
|
||||
x[a] = 2;
|
||||
RET[programIndex] = x[4] + x[0] + x[5];
|
||||
|
||||
@@ -4,9 +4,8 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
float x[30];
|
||||
// HACK to avoid @llvm.memset...
|
||||
for (uniform int i = 0; i < b*6; ++i)
|
||||
float x[2*programCount];
|
||||
for (uniform int i = 0; i < 2*programCount; ++i)
|
||||
x[i] = 0;
|
||||
x[a] = a;
|
||||
RET[programIndex] = x[4] + x[0] + x[5];
|
||||
|
||||
@@ -4,14 +4,14 @@ export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
struct Foo {
|
||||
uniform float x[17];
|
||||
uniform float x[programCount+1];
|
||||
};
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
uniform Foo foo;
|
||||
uniform int i;
|
||||
for (i = 0; i < 17; ++i)
|
||||
for (i = 0; i < programCount+1; ++i)
|
||||
foo.x[i] = i;
|
||||
|
||||
if ((int)a & 1)
|
||||
|
||||
@@ -8,12 +8,12 @@ export void f_v(uniform float RET[]) {
|
||||
uniform float vals[8] = { 0, 1, 0.5, -1, -.87, -.25, 1e-3, -.99999999 };
|
||||
uniform float r[8];
|
||||
foreach (i = 0 ... 8)
|
||||
r[i] = sin(asin(vals[i]));
|
||||
r[i] = sin(asin(vals[i%8]));
|
||||
|
||||
int errors = 0;
|
||||
for (uniform int i = 0; i < 8; ++i) {
|
||||
if (ok(r[i], vals[i]) == false) {
|
||||
print("error @ %: got %, expected %\n", i, r[i], vals[i]);
|
||||
if (ok(r[i], vals[i%8]) == false) {
|
||||
print("error @ %: got %, expected %\n", i, r[i], vals[i%8]);
|
||||
++errors;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -6,14 +6,14 @@ uniform unsigned int32 s = 0;
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
if (programIndex & 1)
|
||||
if (programIndex < 30 && programIndex & 1)
|
||||
b = atomic_or_global(&s, (1 << programIndex));
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int sum = 0;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
for (uniform int i = 0; i < min(30, programCount); ++i)
|
||||
if (i & 1)
|
||||
sum += (1 << i);
|
||||
RET[programIndex] = sum;
|
||||
|
||||
@@ -5,12 +5,12 @@ uniform unsigned int32 s = 0;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
if (programIndex & 1)
|
||||
int32 b = 0;
|
||||
if (programIndex < 32 && programIndex & 1)
|
||||
b = atomic_or_global(&s, (1 << programIndex));
|
||||
RET[programIndex] = popcnt(reduce_max((int32)b));
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = programCount == 1 ? 0 : ((programCount/2) - 1);
|
||||
RET[programIndex] = programCount == 1 ? 0 : ((min(32, programCount)/2) - 1);
|
||||
}
|
||||
|
||||
@@ -7,14 +7,14 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = 0;
|
||||
if (programIndex & 1)
|
||||
b = atomic_or_global(&s, (1 << programIndex));
|
||||
b = atomic_or_global(&s, (1ull << programIndex));
|
||||
RET[programIndex] = (s>>20);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int sum = 0;
|
||||
uniform int64 sum = 0;
|
||||
for (uniform int i = 0; i < programCount; ++i)
|
||||
if (i & 1)
|
||||
sum += (1 << i);
|
||||
sum += (1ull << i);
|
||||
RET[programIndex] = ((unsigned int64)(0xffffffffff000000 | sum)) >> 20;
|
||||
}
|
||||
|
||||
@@ -5,10 +5,10 @@ uniform int32 s = 0;
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
float b = atomic_or_global(&s, (1<<programIndex));
|
||||
float b = atomic_or_global(&s, (1<<min(programIndex,30)));
|
||||
RET[programIndex] = s;
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (1<<programCount)-1;
|
||||
RET[programIndex] = (1<<min(programCount,31))-1;
|
||||
}
|
||||
|
||||
@@ -5,7 +5,8 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
uniform float * uniform buf = uniform new uniform float[32*32];
|
||||
for (uniform int i = 0; i < 32*32; ++i)
|
||||
buf[i] = i;
|
||||
|
||||
|
||||
assert(programIndex <= 64);
|
||||
RET[programIndex] = buf[64-programIndex];
|
||||
}
|
||||
|
||||
|
||||
@@ -3,10 +3,10 @@ export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
int32 i = (1 << programIndex);
|
||||
int32 i = (1 << (programIndex % 28));
|
||||
RET[programIndex] = count_leading_zeros(i);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 31-programIndex;
|
||||
RET[programIndex] = 31-(programIndex%28);
|
||||
}
|
||||
|
||||
@@ -3,10 +3,10 @@ export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
unsigned int64 i = ((unsigned int64)1 << (40+programIndex));
|
||||
unsigned int64 i = ((unsigned int64)1 << min(63, 40+programIndex));
|
||||
RET[programIndex] = count_trailing_zeros(i);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 40+programIndex;
|
||||
RET[programIndex] = min(63, 40+programIndex);
|
||||
}
|
||||
|
||||
@@ -5,8 +5,17 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
RET[programIndex] = exclusive_scan_add(programIndex);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int result[] = { 0, 0, 1, 3, 6, 10, 15, 21, 28,
|
||||
36, 45, 55, 66, 78, 91, 105, 120 };
|
||||
RET[programIndex] = result[programIndex];
|
||||
int es(int v) {
|
||||
uniform int vv[programCount];
|
||||
vv[programIndex] = v;
|
||||
|
||||
uniform int r[programCount];
|
||||
r[0] = 0;
|
||||
for (uniform int i = 1; i < programCount; ++i)
|
||||
r[i] = r[i-1] + vv[i-1];
|
||||
return r[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = es(programIndex);
|
||||
}
|
||||
|
||||
@@ -10,11 +10,19 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
}
|
||||
|
||||
|
||||
int es(int v) {
|
||||
uniform int vv[programCount];
|
||||
vv[programIndex] = v;
|
||||
|
||||
uniform int r[programCount];
|
||||
r[0] = 0;
|
||||
for (uniform int i = 1; i < programCount; ++i)
|
||||
r[i] = r[i-1] + vv[i-1];
|
||||
return r[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int result[16] = { 0, 0, 0, 2, 0, 6, 0, 12,
|
||||
0, 20, 0, 30, 0, 42, 0, 56 };
|
||||
if (programIndex & 1)
|
||||
RET[programIndex] = result[programIndex];
|
||||
else
|
||||
RET[programIndex] = es((programIndex & 1) ? (programIndex+1) : 0);
|
||||
if ((programIndex & 1) == 0)
|
||||
RET[programIndex] = -1;
|
||||
}
|
||||
|
||||
@@ -5,8 +5,17 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
RET[programIndex] = exclusive_scan_add(aFOO[programIndex]);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int result[] = { 0, 1, 3, 6, 10, 15, 21, 28,
|
||||
36, 45, 55, 66, 78, 91, 105, 120, 136 };
|
||||
RET[programIndex] = result[programIndex];
|
||||
int es(int v) {
|
||||
uniform int vv[programCount];
|
||||
vv[programIndex] = v;
|
||||
|
||||
uniform int r[programCount];
|
||||
r[0] = 0;
|
||||
for (uniform int i = 1; i < programCount; ++i)
|
||||
r[i] = r[i-1] + vv[i-1];
|
||||
return r[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = es(programIndex+1);
|
||||
}
|
||||
|
||||
@@ -9,8 +9,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int result[] = { 0, 1, 3, 0, 0, 0, 0, 0,
|
||||
0, 0, 0, 0, 0, 0, 0, 0 };
|
||||
uniform int result[] = { 0, 1, 3 };
|
||||
RET[programIndex] = -1;
|
||||
if (programIndex <= 1)
|
||||
RET[programIndex] = result[programIndex];
|
||||
|
||||
@@ -9,12 +9,20 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
}
|
||||
}
|
||||
|
||||
int es(int v) {
|
||||
uniform int vv[programCount];
|
||||
vv[programIndex] = v;
|
||||
|
||||
uniform int r[programCount];
|
||||
r[0] = 0;
|
||||
for (uniform int i = 1; i < programCount; ++i)
|
||||
r[i] = r[i-1] + vv[i-1];
|
||||
return r[programIndex];
|
||||
}
|
||||
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int result[16] = { 0, 0, 0, 2, 0, 6, 0, 12,
|
||||
0, 20, 0, 30, 0, 42, 0, 56 };
|
||||
if (programIndex & 1)
|
||||
RET[programIndex] = result[programIndex];
|
||||
else
|
||||
RET[programIndex] = es((programIndex & 1) ? (programIndex+1) : 0);
|
||||
if ((programIndex & 1) == 0)
|
||||
RET[programIndex] = -1;
|
||||
}
|
||||
|
||||
@@ -5,8 +5,17 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
RET[programIndex] = exclusive_scan_add((float)programIndex);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int result[] = { 0, 0, 1, 3, 6, 10, 15, 21, 28,
|
||||
36, 45, 55, 66, 78, 91, 105, 120 };
|
||||
RET[programIndex] = result[programIndex];
|
||||
int es(int v) {
|
||||
uniform int vv[programCount];
|
||||
vv[programIndex] = v;
|
||||
|
||||
uniform int r[programCount];
|
||||
r[0] = 0;
|
||||
for (uniform int i = 1; i < programCount; ++i)
|
||||
r[i] = r[i-1] + vv[i-1];
|
||||
return r[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = es(programIndex);
|
||||
}
|
||||
|
||||
@@ -5,8 +5,17 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
RET[programIndex] = exclusive_scan_add((double)aFOO[programIndex]);
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
uniform int result[] = { 0, 1, 3, 6, 10, 15, 21, 28,
|
||||
36, 45, 55, 66, 78, 91, 105, 120, 136 };
|
||||
RET[programIndex] = result[programIndex];
|
||||
int es(int v) {
|
||||
uniform int vv[programCount];
|
||||
vv[programIndex] = v;
|
||||
|
||||
uniform int r[programCount];
|
||||
r[0] = 0;
|
||||
for (uniform int i = 1; i < programCount; ++i)
|
||||
r[i] = r[i-1] + vv[i-1];
|
||||
return r[programIndex];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = es(programIndex+1);
|
||||
}
|
||||
|
||||
@@ -4,7 +4,7 @@ export uniform int width() { return programCount; }
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
RET[programIndex] = -1;
|
||||
int32 a = ~(1 << programIndex);
|
||||
if ((programIndex & 1) == 0) {
|
||||
if ((programIndex < 32) && (programIndex & 1) == 0) {
|
||||
RET[programIndex] = exclusive_scan_and(a);
|
||||
}
|
||||
}
|
||||
@@ -12,7 +12,7 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = -1;
|
||||
if ((programIndex & 1) == 0 && programIndex > 0) {
|
||||
if ((programIndex & 1) == 0 && programIndex > 0 && programIndex < 32) {
|
||||
int val = 0xffffffff;
|
||||
for (int i = 0; i < programIndex-1; i += 2)
|
||||
val &= ~(1<<i);
|
||||
|
||||
@@ -3,11 +3,11 @@ export uniform int width() { return programCount; }
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
RET[programIndex] = -1;
|
||||
int32 a = (1 << programIndex);
|
||||
int32 a = (1 << (min(programIndex, 30)));
|
||||
RET[programIndex] = exclusive_scan_or(a);
|
||||
}
|
||||
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = (1 << programIndex) - 1;
|
||||
RET[programIndex] = (1 << (min(programIndex, 31))) - 1;
|
||||
}
|
||||
|
||||
@@ -10,8 +10,10 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
|
||||
// make sure we reset the func mask in the foreach loop...
|
||||
if ((int)aFOO[programIndex] & 1)
|
||||
foreach (i = 0 ... programCount+3)
|
||||
val[i] += aFOO[i] - 1;
|
||||
foreach (i = 0 ... programCount+3) {
|
||||
int ic = min(i, programCount-1);
|
||||
val[i] += aFOO[ic] - 1 + i-ic;
|
||||
}
|
||||
|
||||
RET[programIndex] = val[3+programIndex];
|
||||
}
|
||||
|
||||
@@ -5,8 +5,10 @@ export uniform int width() { return programCount; }
|
||||
// make sure we reset the func mask in the foreach loop...
|
||||
|
||||
void update(uniform float val[], const uniform float a[]) {
|
||||
foreach (i = 0 ... programCount+3)
|
||||
val[i] += a[i] - 1;
|
||||
foreach (i = 0 ... programCount+3) {
|
||||
int ic = min(i, programCount-1);
|
||||
val[i] += a[ic] - 1 + i-ic;
|
||||
}
|
||||
}
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
|
||||
@@ -3,7 +3,7 @@ export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
double a = (1<<programIndex) * 1.5;
|
||||
double a = (1<< (programIndex % 28)) * 1.5;
|
||||
if (programIndex & 1)
|
||||
a = -a;
|
||||
int exponent;
|
||||
@@ -12,5 +12,5 @@ export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1+programIndex;
|
||||
RET[programIndex] = 1+(programIndex % 28);
|
||||
}
|
||||
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user