Initial commit.
This commit is contained in:
2
.gitignore
vendored
Normal file
2
.gitignore
vendored
Normal file
@@ -0,0 +1,2 @@
|
||||
*.pyc
|
||||
*~
|
||||
116
LICENSE.txt
Normal file
116
LICENSE.txt
Normal file
@@ -0,0 +1,116 @@
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
||||
POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
===========================================================================
|
||||
Copyrights and Licenses for Third Party Software Distrubted with
|
||||
The Intel(r) SPMD Program Compiler
|
||||
===========================================================================
|
||||
|
||||
ISPC incorporates code from the Syrah library, which is covered by the
|
||||
following license:
|
||||
|
||||
Copyright (c) 2009, Stanford University, and authors listed below.
|
||||
All rights reserved.
|
||||
|
||||
Original authors:
|
||||
Solomon Boulos
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimer.
|
||||
|
||||
Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
Neither the name of Stanford University nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
"AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
---------------------------------------------------------------------------
|
||||
|
||||
Binary distributions of ISPC are linked with the LLVM libraries, which are
|
||||
covered by the following license:
|
||||
|
||||
University of Illinois/NCSA
|
||||
Open Source License
|
||||
|
||||
Copyright (c) 2003-2010 University of Illinois at Urbana-Champaign.
|
||||
All rights reserved.
|
||||
|
||||
Developed by:
|
||||
|
||||
LLVM Team
|
||||
|
||||
University of Illinois at Urbana-Champaign
|
||||
|
||||
http://llvm.org
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy of
|
||||
this software and associated documentation files (the "Software"), to deal with
|
||||
the Software without restriction, including without limitation the rights to
|
||||
use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies
|
||||
of the Software, and to permit persons to whom the Software is furnished to do
|
||||
so, subject to the following conditions:
|
||||
|
||||
* Redistributions of source code must retain the above copyright notice,
|
||||
this list of conditions and the following disclaimers.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright notice,
|
||||
this list of conditions and the following disclaimers in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the names of the LLVM Team, University of Illinois at
|
||||
Urbana-Champaign, nor the names of its contributors may be used to
|
||||
endorse or promote products derived from this Software without specific
|
||||
prior written permission.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS
|
||||
FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
CONTRIBUTORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS WITH THE
|
||||
SOFTWARE.
|
||||
118
Makefile
Normal file
118
Makefile
Normal file
@@ -0,0 +1,118 @@
|
||||
#
|
||||
# ispc Makefile
|
||||
#
|
||||
|
||||
ARCH = $(shell uname)
|
||||
|
||||
CLANG=clang
|
||||
LLVM_LIBS=$(shell llvm-config --ldflags --libs) -lpthread -ldl
|
||||
LLVM_CXXFLAGS=$(shell llvm-config --cppflags)
|
||||
LLVM_VERSION_DEF=-DLLVM_$(shell llvm-config --version | sed s/\\./_/)
|
||||
|
||||
BUILD_DATE=$(shell date +%Y%m%d)
|
||||
BUILD_VERSION=$(shell git log | head -1)
|
||||
|
||||
CXX=g++
|
||||
CPP=cpp
|
||||
CXXFLAGS=-g3 $(LLVM_CXXFLAGS) -I. -Iobjs/ -Wall $(LLVM_VERSION_DEF) \
|
||||
-DBUILD_DATE="\"$(BUILD_DATE)\"" -DBUILD_VERSION="\"$(BUILD_VERSION)\""
|
||||
|
||||
LDFLAGS=
|
||||
ifeq ($(ARCH),Linux)
|
||||
# try to link everything statically under Linux (including libstdc++) so
|
||||
# that the binaries we generate will be portable across distributions...
|
||||
LDFLAGS=-static -L/usr/lib/gcc/x86_64-linux-gnu/4.4
|
||||
endif
|
||||
|
||||
LEX=flex
|
||||
YACC=bison -d -v -t
|
||||
|
||||
###########################################################################
|
||||
|
||||
CXX_SRC=builtins.cpp ctx.cpp decl.cpp expr.cpp ispc.cpp \
|
||||
llvmutil.cpp main.cpp module.cpp opt.cpp stmt.cpp sym.cpp type.cpp \
|
||||
util.cpp
|
||||
HEADERS=builtins.h ctx.h decl.h expr.h ispc.h llvmutil.h module.h \
|
||||
opt.h stmt.h sym.h type.h util.h
|
||||
STDLIB_SRC=stdlib-avx.ll stdlib-sse2.ll stdlib-sse4.ll stdlib-sse4x2.ll
|
||||
BISON_SRC=parse.yy
|
||||
FLEX_SRC=lex.ll
|
||||
|
||||
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(STDLIB_SRC:.ll=.o) stdlib-c.o stdlib_ispc.o \
|
||||
$(BISON_SRC:.yy=.o) $(FLEX_SRC:.ll=.o))
|
||||
|
||||
default: ispc ispc_test
|
||||
|
||||
.PHONY: dirs clean depend doxygen
|
||||
.PRECIOUS: objs/stdlib-%.cpp
|
||||
|
||||
depend: $(CXX_SRC) $(HEADERS)
|
||||
@echo Updating dependencies
|
||||
@gcc -MM $(CXXFLAGS) $(CXX_SRC) | sed 's_^\([a-z]\)_objs/\1_g' > depend
|
||||
|
||||
-include depend
|
||||
|
||||
dirs:
|
||||
@echo Creating objs/ directory
|
||||
@/bin/mkdir -p objs
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs ispc ispc_test
|
||||
|
||||
doxygen:
|
||||
/bin/rm -rf docs/doxygen
|
||||
doxygen doxygen.cfg
|
||||
|
||||
ispc: dirs $(OBJS)
|
||||
@echo Creating ispc executable
|
||||
@$(CXX) $(LDFLAGS) -o $@ $(OBJS) $(LLVM_LIBS)
|
||||
|
||||
ispc_test: dirs ispc_test.cpp
|
||||
@echo Creating ispc_test executable
|
||||
@$(CXX) $(LDFLAGS) $(CXXFLAGS) -o $@ ispc_test.cpp $(LLVM_LIBS)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/parse.cc: parse.yy
|
||||
@echo Running bison on $<
|
||||
@$(YACC) -o $@ $<
|
||||
|
||||
objs/parse.o: objs/parse.cc $(HEADERS)
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/lex.cpp: lex.ll
|
||||
@echo Running flex on $<
|
||||
@$(LEX) -o $@ $<
|
||||
|
||||
objs/lex.o: objs/lex.cpp $(HEADERS)
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
$(STDLIB_SRC): stdlib.m4
|
||||
|
||||
objs/stdlib-%.cpp: stdlib-%.ll
|
||||
@echo Creating C++ source from stdlib file $<
|
||||
@m4 stdlib.m4 $< | ./bitcode2cpp.py $< > $@
|
||||
|
||||
objs/stdlib-%.o: objs/stdlib-%.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/stdlib-c.cpp: stdlib-c.c
|
||||
@echo Creating C++ source from stdlib file $<
|
||||
@$(CLANG) -I /opt/l1om/usr/include/ -emit-llvm -c $< -o - | llvm-dis - | ./bitcode2cpp.py $< > $@
|
||||
|
||||
objs/stdlib-c.o: objs/stdlib-c.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/stdlib_ispc.cpp: stdlib.ispc
|
||||
@echo Creating C++ source from $<
|
||||
@$(CPP) -DISPC=1 -DPI=3.1415936535 $< | ./stdlib2cpp.py > $@
|
||||
|
||||
objs/stdlib_ispc.o: objs/stdlib_ispc.cpp
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
22
READMErst.txt
Normal file
22
READMErst.txt
Normal file
@@ -0,0 +1,22 @@
|
||||
==============================
|
||||
Intel(r) SPMD Program Compiler
|
||||
==============================
|
||||
|
||||
Welcome to the Intel(r) SPMD Program Compiler (ispc)!
|
||||
|
||||
ispc is a new compiler for "single program, multiple data" (SPMD)
|
||||
programs. Under the SPMD model, the programmer writes a program that mostly
|
||||
appears to be a regular serial program, though the execution model is
|
||||
actually that a number of program instances execute in parallel on the
|
||||
hardware. ispc compiles a C-based SPMD programming language to run on the
|
||||
SIMD units of CPUs; it frequently provides a a 3x or more speedup on CPUs
|
||||
with 4-wide SSE units, without any of the difficulty of writing intrinsics
|
||||
code.
|
||||
|
||||
ispc is an open source compiler under the BSD license; see the file
|
||||
LICENSE.txt. ispc supports Windows, Mac, and Linux, with both x86 and
|
||||
x86-64 targets. It currently supports the SSE2 and SSE4 instruction sets,
|
||||
though support for AVX should be available soon.
|
||||
|
||||
For more information and examples, as well as a wiki and the bug database,
|
||||
see the ispc distribution site, http://ispc.github.com.
|
||||
34
bitcode2cpp.py
Executable file
34
bitcode2cpp.py
Executable file
@@ -0,0 +1,34 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import sys
|
||||
import string
|
||||
import re
|
||||
import subprocess
|
||||
|
||||
length=0
|
||||
|
||||
src=str(sys.argv[1])
|
||||
|
||||
target = re.sub(".*stdlib-", "", src)
|
||||
target = re.sub("\.ll$", "", target)
|
||||
target = re.sub("\.c$", "", target)
|
||||
target = re.sub("-", "_", target)
|
||||
|
||||
try:
|
||||
as_out=subprocess.Popen([ "llvm-as", "-", "-o", "-"], stdout=subprocess.PIPE)
|
||||
except IOError:
|
||||
print >> sys.stderr, "Couldn't open " + src
|
||||
sys.exit(1)
|
||||
|
||||
print "unsigned char stdlib_bitcode_" + target + "[] = {"
|
||||
for line in as_out.stdout.readlines():
|
||||
length = length + len(line)
|
||||
for c in line:
|
||||
print ord(c)
|
||||
print ", "
|
||||
print " 0 };\n\n"
|
||||
print "int stdlib_bitcode_" + target + "_length = " + str(length) + ";\n"
|
||||
|
||||
as_out.wait()
|
||||
|
||||
sys.exit(as_out.returncode)
|
||||
617
builtins.cpp
Normal file
617
builtins.cpp
Normal file
@@ -0,0 +1,617 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file builtins.cpp
|
||||
@brief Definitions of functions related to setting up the standard library
|
||||
and other builtins.
|
||||
*/
|
||||
|
||||
#include "builtins.h"
|
||||
#include "type.h"
|
||||
#include "util.h"
|
||||
#include "sym.h"
|
||||
#include "expr.h"
|
||||
#include "llvmutil.h"
|
||||
#include "module.h"
|
||||
#include "ctx.h"
|
||||
|
||||
#include <math.h>
|
||||
#include <stdlib.h>
|
||||
#include <llvm/LLVMContext.h>
|
||||
#include <llvm/Module.h>
|
||||
#include <llvm/Type.h>
|
||||
#include <llvm/DerivedTypes.h>
|
||||
#include <llvm/Instructions.h>
|
||||
#include <llvm/Linker.h>
|
||||
#include <llvm/Support/MemoryBuffer.h>
|
||||
#include <llvm/Bitcode/ReaderWriter.h>
|
||||
|
||||
extern int yyparse();
|
||||
struct yy_buffer_state;
|
||||
extern yy_buffer_state *yy_scan_string(const char *);
|
||||
|
||||
|
||||
/** Given an LLVM type, try to find the equivalent ispc type. Note that
|
||||
this is an under-constrained problem due to LLVM's type representations
|
||||
carrying less information than ispc's. (For example, LLVM doesn't
|
||||
distinguish between signed and unsigned integers in its types.)
|
||||
|
||||
However, because this function is only used for generating ispc
|
||||
declarations of functions defined in LLVM bitcode in the stdlib-*.ll
|
||||
files, in practice we can get enough of what we need for the relevant
|
||||
cases to make things work.
|
||||
*/
|
||||
static const Type *
|
||||
lLLVMTypeToISPCType(const llvm::Type *t) {
|
||||
if (t == LLVMTypes::VoidType)
|
||||
return AtomicType::Void;
|
||||
else if (t == LLVMTypes::BoolType)
|
||||
return AtomicType::UniformBool;
|
||||
else if (t == LLVMTypes::Int32Type)
|
||||
return AtomicType::UniformInt32;
|
||||
else if (t == LLVMTypes::FloatType)
|
||||
return AtomicType::UniformFloat;
|
||||
else if (t == LLVMTypes::DoubleType)
|
||||
return AtomicType::UniformDouble;
|
||||
else if (t == LLVMTypes::Int64Type)
|
||||
return AtomicType::UniformInt64;
|
||||
else if (t == LLVMTypes::Int32VectorType)
|
||||
return AtomicType::VaryingInt32;
|
||||
else if (t == LLVMTypes::FloatVectorType)
|
||||
return AtomicType::VaryingFloat;
|
||||
else if (t == LLVMTypes::DoubleVectorType)
|
||||
return AtomicType::VaryingDouble;
|
||||
else if (t == LLVMTypes::Int64VectorType)
|
||||
return AtomicType::VaryingInt64;
|
||||
else if (t == LLVMTypes::Int32PointerType)
|
||||
return new ReferenceType(AtomicType::UniformInt32, false);
|
||||
else if (t == LLVMTypes::FloatPointerType)
|
||||
return new ReferenceType(AtomicType::UniformFloat, false);
|
||||
else if (t == LLVMTypes::Int32VectorPointerType)
|
||||
return new ReferenceType(AtomicType::VaryingInt32, false);
|
||||
else if (t == LLVMTypes::FloatVectorPointerType)
|
||||
return new ReferenceType(AtomicType::VaryingFloat, false);
|
||||
else if (llvm::isa<const llvm::PointerType>(t)) {
|
||||
const llvm::PointerType *pt = llvm::dyn_cast<const llvm::PointerType>(t);
|
||||
|
||||
// Is it a pointer to an unsized array of objects? If so, then
|
||||
// create the equivalent ispc type. Note that it has to be a
|
||||
// reference to an array, since ispc passes arrays to functions by
|
||||
// reference.
|
||||
//
|
||||
// FIXME: generalize this to do more than uniform int32s (that's
|
||||
// all that's necessary for the stdlib currently.)
|
||||
const llvm::ArrayType *at =
|
||||
llvm::dyn_cast<const llvm::ArrayType>(pt->getElementType());
|
||||
if (at && at->getNumElements() == 0 &&
|
||||
at->getElementType() == LLVMTypes::Int32Type)
|
||||
return new ReferenceType(new ArrayType(AtomicType::UniformInt32, 0),
|
||||
false);
|
||||
}
|
||||
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
/** Given an LLVM function declaration, synthesize the equivalent ispc
|
||||
symbol for the function (if possible). Returns true on success, false
|
||||
on failure.
|
||||
*/
|
||||
static bool
|
||||
lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
||||
SourcePos noPos;
|
||||
noPos.name = "__stdlib";
|
||||
|
||||
const llvm::FunctionType *ftype = func->getFunctionType();
|
||||
std::string name = func->getName();
|
||||
|
||||
const Type *returnType = lLLVMTypeToISPCType(ftype->getReturnType());
|
||||
if (!returnType)
|
||||
// return type not representable in ispc -> not callable from ispc
|
||||
return false;
|
||||
|
||||
// Iterate over the arguments and try to find their equivalent ispc
|
||||
// types.
|
||||
std::vector<const Type *> argTypes;
|
||||
for (unsigned int i = 0; i < ftype->getNumParams(); ++i) {
|
||||
const llvm::Type *llvmArgType = ftype->getParamType(i);
|
||||
const Type *type = lLLVMTypeToISPCType(llvmArgType);
|
||||
if (type == NULL)
|
||||
return false;
|
||||
argTypes.push_back(type);
|
||||
}
|
||||
|
||||
FunctionType *funcType = new FunctionType(returnType, argTypes, noPos);
|
||||
Symbol *sym = new Symbol(name, noPos, funcType);
|
||||
sym->function = func;
|
||||
symbolTable->AddFunction(sym);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
/** Given an LLVM module, create ispc symbols for the functions in the
|
||||
module.
|
||||
*/
|
||||
static void
|
||||
lAddModuleSymbols(llvm::Module *module, SymbolTable *symbolTable) {
|
||||
#if 0
|
||||
// FIXME: handle globals?
|
||||
assert(module->global_empty());
|
||||
#endif
|
||||
|
||||
llvm::Module::iterator iter;
|
||||
for (iter = module->begin(); iter != module->end(); ++iter) {
|
||||
llvm::Function *func = iter;
|
||||
lCreateISPCSymbol(func, symbolTable);
|
||||
}
|
||||
}
|
||||
|
||||
/** Declare the function symbol 'bool __is_compile_time_constant_mask(mask type)'.
|
||||
This function will never be defined; it's just a placeholder
|
||||
that will be handled during the optimization process. See the
|
||||
discussion of the implementation of CompileTimeConstantResolvePass for
|
||||
more details.
|
||||
*/
|
||||
static void
|
||||
lDeclareCompileTimeConstant(llvm::Module *module) {
|
||||
SourcePos noPos;
|
||||
noPos.name = "__stdlib";
|
||||
|
||||
std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::BoolType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__is_compile_time_constant_mask", module);
|
||||
func->setOnlyReadsMemory(true);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
|
||||
/** Declare the 'pseudo-gather' functions. When the ispc front-end needs
|
||||
to perform a gather, it generates a call to one of these functions,
|
||||
which have signatures:
|
||||
|
||||
varying int32 __pseudo_gather(varying int32 *, mask)
|
||||
varying int64 __pseudo_gather(varying int64 *, mask)
|
||||
|
||||
These functions are never actually implemented; the
|
||||
GatherScatterFlattenOpt optimization pass finds them and then converts
|
||||
them to make calls to the following functions, which represent gathers
|
||||
from a common base pointer with offsets. This approach allows the
|
||||
front-end to be relatively simple in how it emits address calculation
|
||||
for gathers.
|
||||
|
||||
varying int32 __pseudo_gather_base_offsets_32(uniform int32 *base,
|
||||
int32 offsets, mask)
|
||||
varying int64 __pseudo_gather_base_offsets_64(uniform int64 *base,
|
||||
int64 offsets, mask)
|
||||
|
||||
Then, the GSImprovementsPass optimizations finds these and either
|
||||
converts them to native gather functions or converts them to vector
|
||||
loads, if equivalent.
|
||||
*/
|
||||
static void
|
||||
lDeclarePseudoGathers(llvm::Module *module) {
|
||||
SourcePos noPos;
|
||||
noPos.name = "__stdlib";
|
||||
|
||||
{
|
||||
std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerVectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::Int32VectorType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_gather_32", module);
|
||||
func->setOnlyReadsMemory(true);
|
||||
func->setDoesNotThrow(true);
|
||||
|
||||
fType = llvm::FunctionType::get(LLVMTypes::Int64VectorType, argTypes, false);
|
||||
func = llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_gather_64", module);
|
||||
func->setOnlyReadsMemory(true);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::Int32VectorType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_gather_base_offsets_32", module);
|
||||
func->setOnlyReadsMemory(true);
|
||||
func->setDoesNotThrow(true);
|
||||
|
||||
fType = llvm::FunctionType::get(LLVMTypes::Int64VectorType, argTypes, false);
|
||||
func = llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_gather_base_offsets_64", module);
|
||||
func->setOnlyReadsMemory(true);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Similarly to the 'pseudo-gathers' defined by lDeclarePseudoGathers(),
|
||||
we also declare (but never define) pseudo-scatter instructions with
|
||||
signatures:
|
||||
|
||||
void __pseudo_scatter_32(varying int32 *, varying int32 values, mask)
|
||||
void __pseudo_scatter_64(varying int64 *, varying int64 values, mask)
|
||||
|
||||
The GatherScatterFlattenOpt optimization pass also finds these and
|
||||
transforms them to scatters like:
|
||||
|
||||
void __pseudo_scatter_base_offsets_32(uniform int32 *base,
|
||||
varying int32 offsets, varying int32 values, mask)
|
||||
void __pseudo_scatter_base_offsets_64(uniform int64 *base,
|
||||
varying int62 offsets, varying int64 values, mask)
|
||||
|
||||
And the GSImprovementsPass in turn converts these to actual native
|
||||
scatters or masked stores.
|
||||
*/
|
||||
static void
|
||||
lDeclarePseudoScatters(llvm::Module *module) {
|
||||
SourcePos noPos;
|
||||
noPos.name = "__stdlib";
|
||||
|
||||
{
|
||||
std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerVectorType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_scatter_32", module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
{
|
||||
std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerVectorType);
|
||||
argTypes.push_back(LLVMTypes::Int64VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_scatter_64", module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_scatter_base_offsets_32", module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
{
|
||||
std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(LLVMTypes::Int64VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_scatter_base_offsets_64", module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** This function declares placeholder masked store functions for the
|
||||
front-end to use.
|
||||
|
||||
void __pseudo_masked_store_32(uniform int32 *ptr, varying int32 values, mask)
|
||||
void __pseudo_masked_store_64(uniform int64 *ptr, varying int64 values, mask)
|
||||
|
||||
These in turn are converted to native masked stores or to regular
|
||||
stores (if the mask is all on) by the MaskedStoreOptPass optimization
|
||||
pass.
|
||||
*/
|
||||
static void
|
||||
lDeclarePseudoMaskedStore(llvm::Module *module) {
|
||||
SourcePos noPos;
|
||||
noPos.name = "__stdlib";
|
||||
|
||||
{
|
||||
std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::Int32VectorPointerType);
|
||||
argTypes.push_back(LLVMTypes::Int32VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_masked_store_32", module);
|
||||
func->setDoesNotThrow(true);
|
||||
func->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
func->setDoesNotCapture(1, true);
|
||||
}
|
||||
|
||||
{
|
||||
std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::Int64VectorPointerType);
|
||||
argTypes.push_back(LLVMTypes::Int64VectorType);
|
||||
argTypes.push_back(LLVMTypes::MaskType);
|
||||
|
||||
llvm::FunctionType *fType =
|
||||
llvm::FunctionType::get(LLVMTypes::VoidType, argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(fType, llvm::GlobalValue::ExternalLinkage,
|
||||
"__pseudo_masked_store_64", module);
|
||||
func->setDoesNotThrow(true);
|
||||
func->addFnAttr(llvm::Attribute::AlwaysInline);
|
||||
func->setDoesNotCapture(1, true);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** This utility function takes serialized binary LLVM bitcode and adds its
|
||||
definitions to the given module. Functions in the bitcode that can be
|
||||
mapped to ispc functions are also added to the symbol table.
|
||||
|
||||
@param bitcode Binary LLVM bitcode (e.g. the contents of a *.bc file)
|
||||
@param length Length of the bitcode buffer
|
||||
@param module Module to link the bitcode into
|
||||
@param symbolTable Symbol table to add definitions to
|
||||
*/
|
||||
static void
|
||||
lAddBitcode(const unsigned char *bitcode, int length,
|
||||
llvm::Module *module, SymbolTable *symbolTable) {
|
||||
std::string bcErr;
|
||||
llvm::StringRef sb = llvm::StringRef((char *)bitcode, length);
|
||||
llvm::MemoryBuffer *bcBuf = llvm::MemoryBuffer::getMemBuffer(sb);
|
||||
llvm::Module *bcModule = llvm::ParseBitcodeFile(bcBuf, *g->ctx, &bcErr);
|
||||
if (!bcModule)
|
||||
Error(SourcePos(), "Error parsing stdlib bitcode: %s", bcErr.c_str());
|
||||
else {
|
||||
std::string(linkError);
|
||||
if (llvm::Linker::LinkModules(module, bcModule, &linkError))
|
||||
Error(SourcePos(), "Error linking stdlib bitcode: %s", linkError.c_str());
|
||||
lAddModuleSymbols(module, symbolTable);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
/** Utility routine that defines a constant int32 with given value, adding
|
||||
the symbol to both the ispc symbol table and the given LLVM module.
|
||||
*/
|
||||
static void
|
||||
lDefineConstantInt(const char *name, int val, llvm::Module *module,
|
||||
SymbolTable *symbolTable) {
|
||||
Symbol *pw = new Symbol(name, SourcePos(), AtomicType::UniformConstInt32);
|
||||
pw->isStatic = true;
|
||||
pw->constValue = new ConstExpr(pw->type, val, SourcePos());
|
||||
const llvm::Type *ltype = LLVMTypes::Int32Type;
|
||||
llvm::Constant *linit = LLVMInt32(val);
|
||||
pw->storagePtr = new llvm::GlobalVariable(*module, ltype, true,
|
||||
llvm::GlobalValue::InternalLinkage,
|
||||
linit, pw->name.c_str());
|
||||
symbolTable->AddVariable(pw);
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lDefineProgramIndex(llvm::Module *module, SymbolTable *symbolTable) {
|
||||
Symbol *pidx = new Symbol("programIndex", SourcePos(),
|
||||
AtomicType::VaryingConstInt32);
|
||||
pidx->isStatic = true;
|
||||
|
||||
int pi[ISPC_MAX_NVEC];
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
pi[i] = i;
|
||||
pidx->constValue = new ConstExpr(pidx->type, pi, SourcePos());
|
||||
|
||||
const llvm::Type *ltype = LLVMTypes::Int32VectorType;
|
||||
llvm::Constant *linit = LLVMInt32Vector(pi);
|
||||
pidx->storagePtr = new llvm::GlobalVariable(*module, ltype, true,
|
||||
llvm::GlobalValue::InternalLinkage, linit,
|
||||
pidx->name.c_str());
|
||||
symbolTable->AddVariable(pidx);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *module,
|
||||
bool includeStdlibISPC) {
|
||||
// Add the definitions from the compiled stdlib-c.c file
|
||||
extern unsigned char stdlib_bitcode_c[];
|
||||
extern int stdlib_bitcode_c_length;
|
||||
lAddBitcode(stdlib_bitcode_c, stdlib_bitcode_c_length, module, symbolTable);
|
||||
|
||||
// Next, add the target's custom implementations of the various needed
|
||||
// builtin functions (e.g. __masked_store_32(), etc).
|
||||
switch (g->target.isa) {
|
||||
case Target::SSE2:
|
||||
extern unsigned char stdlib_bitcode_sse2[];
|
||||
extern int stdlib_bitcode_sse2_length;
|
||||
lAddBitcode(stdlib_bitcode_sse2, stdlib_bitcode_sse2_length, module,
|
||||
symbolTable);
|
||||
break;
|
||||
case Target::SSE4:
|
||||
extern unsigned char stdlib_bitcode_sse4[];
|
||||
extern int stdlib_bitcode_sse4_length;
|
||||
extern unsigned char stdlib_bitcode_sse4x2[];
|
||||
extern int stdlib_bitcode_sse4x2_length;
|
||||
switch (g->target.vectorWidth) {
|
||||
case 4:
|
||||
lAddBitcode(stdlib_bitcode_sse4, stdlib_bitcode_sse4_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
case 8:
|
||||
lAddBitcode(stdlib_bitcode_sse4x2, stdlib_bitcode_sse4x2_length,
|
||||
module, symbolTable);
|
||||
break;
|
||||
default:
|
||||
FATAL("logic error in DefineStdlib");
|
||||
}
|
||||
break;
|
||||
case Target::AVX:
|
||||
extern unsigned char stdlib_bitcode_avx[];
|
||||
extern int stdlib_bitcode_avx_length;
|
||||
lAddBitcode(stdlib_bitcode_avx, stdlib_bitcode_avx_length, module,
|
||||
symbolTable);
|
||||
break;
|
||||
default:
|
||||
FATAL("logic error");
|
||||
}
|
||||
|
||||
// Add a declaration of void *ISPCMalloc(int64_t). The user is
|
||||
// responsible for linking in a definition of this if it's needed by
|
||||
// the compiled program.
|
||||
{ std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(llvm::Type::getInt64Ty(*ctx));
|
||||
llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidPointerType,
|
||||
argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(ftype, llvm::GlobalValue::ExternalLinkage,
|
||||
"ISPCMalloc", module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
// Add a declaration of void ISPCFree(void *). The user is
|
||||
// responsible for linking in a definition of this if it's needed by
|
||||
// the compiled program.
|
||||
{ std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||
llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidPointerType,
|
||||
argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(ftype, llvm::GlobalValue::ExternalLinkage,
|
||||
"ISPCFree", module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
// Add a declaration of void ISPCLaunch(void *funcPtr, void *data).
|
||||
// The user is responsible for linking in a definition of this if it's
|
||||
// needed by the compiled program.
|
||||
{ std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||
argTypes.push_back(LLVMTypes::VoidPointerType);
|
||||
llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidType,
|
||||
argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(ftype, llvm::GlobalValue::ExternalLinkage,
|
||||
"ISPCLaunch", module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
// Add a declaration of void ISPCSync(). The user is responsible for
|
||||
// linking in a definition of this if it's needed by the compiled
|
||||
// program.
|
||||
{
|
||||
std::vector<const llvm::Type *> argTypes;
|
||||
llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidType,
|
||||
argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(ftype, llvm::GlobalValue::ExternalLinkage,
|
||||
"ISPCSync", module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
// Add a declaration of void ISPCInstrument(void *, void *, int, int).
|
||||
// The user is responsible for linking in a definition of this if it's
|
||||
// needed by the compiled program.
|
||||
{
|
||||
std::vector<const llvm::Type *> argTypes;
|
||||
argTypes.push_back(llvm::PointerType::get(llvm::Type::getInt8Ty(*g->ctx), 0));
|
||||
argTypes.push_back(llvm::PointerType::get(llvm::Type::getInt8Ty(*g->ctx), 0));
|
||||
argTypes.push_back(LLVMTypes::Int32Type);
|
||||
argTypes.push_back(LLVMTypes::Int32Type);
|
||||
llvm::FunctionType *ftype = llvm::FunctionType::get(LLVMTypes::VoidType,
|
||||
argTypes, false);
|
||||
llvm::Function *func =
|
||||
llvm::Function::Create(ftype, llvm::GlobalValue::ExternalLinkage,
|
||||
"ISPCInstrument", module);
|
||||
func->setDoesNotThrow(true);
|
||||
}
|
||||
|
||||
// Declare various placeholder functions that the optimizer will later
|
||||
// find and replace with something more useful.
|
||||
lDeclareCompileTimeConstant(module);
|
||||
lDeclarePseudoGathers(module);
|
||||
lDeclarePseudoScatters(module);
|
||||
lDeclarePseudoMaskedStore(module);
|
||||
|
||||
// define the 'programCount' builtin variable
|
||||
lDefineConstantInt("programCount", g->target.vectorWidth, module, symbolTable);
|
||||
|
||||
// define the 'programIndex' builtin
|
||||
lDefineProgramIndex(module, symbolTable);
|
||||
|
||||
// Define __math_lib stuff. This is used by stdlib.ispc, for example, to
|
||||
// figure out which math routines to end up calling...
|
||||
lDefineConstantInt("__math_lib", (int)g->mathLib, module, symbolTable);
|
||||
lDefineConstantInt("__math_lib_ispc", (int)Globals::Math_ISPC, module,
|
||||
symbolTable);
|
||||
lDefineConstantInt("__math_lib_ispc_fast", (int)Globals::Math_ISPCFast,
|
||||
module, symbolTable);
|
||||
lDefineConstantInt("__math_lib_svml", (int)Globals::Math_SVML, module,
|
||||
symbolTable);
|
||||
lDefineConstantInt("__math_lib_system", (int)Globals::Math_System, module,
|
||||
symbolTable);
|
||||
|
||||
if (includeStdlibISPC) {
|
||||
// If the user wants the standard library to be included, parse the
|
||||
// serialized version of the stdlib.ispc file to get its definitions
|
||||
// added.
|
||||
extern const char *stdlib_code;
|
||||
yy_scan_string(stdlib_code);
|
||||
yyparse();
|
||||
}
|
||||
}
|
||||
58
builtins.h
Normal file
58
builtins.h
Normal file
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file builtins.h
|
||||
@brief Declarations of functions related to builtins and the
|
||||
standard library
|
||||
*/
|
||||
|
||||
#ifndef ISPC_STDLIB_H
|
||||
#define ISPC_STDLIB_H 1
|
||||
|
||||
#include "ispc.h"
|
||||
|
||||
/** Adds declarations and definitions of ispc standard library functions
|
||||
and types to the given module.
|
||||
|
||||
@param symbolTable SymbolTable in which to add symbol definitions for
|
||||
stdlib stuff
|
||||
@param ctx llvm::LLVMContext to use for getting types and the
|
||||
like for standard library definitions
|
||||
@param module Module in which to add the declarations/definitions
|
||||
@param includeStdlib Indicates whether the definitions from the stdlib.ispc
|
||||
file should be added to the module.
|
||||
*/
|
||||
void DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *module,
|
||||
bool includeStdlib);
|
||||
|
||||
#endif // ISPC_STDLIB_H
|
||||
507
ctx.h
Normal file
507
ctx.h
Normal file
@@ -0,0 +1,507 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file ctx.h
|
||||
@brief Declaration of the FunctionEmitContext class
|
||||
*/
|
||||
|
||||
#ifndef ISPC_CTX_H
|
||||
#define ISPC_CTX_H 1
|
||||
|
||||
#include "ispc.h"
|
||||
#include <llvm/InstrTypes.h>
|
||||
#include <llvm/Instructions.h>
|
||||
#ifndef LLVM_2_8
|
||||
#include <llvm/Analysis/DIBuilder.h>
|
||||
#endif
|
||||
#include <llvm/Analysis/DebugInfo.h>
|
||||
|
||||
struct CFInfo;
|
||||
|
||||
/** FunctionEmitContext is one of the key classes in ispc; it is used to
|
||||
help with emitting the intermediate representation of a function during
|
||||
compilation. It carries information the current program context during
|
||||
IR emission (e.g. the basic block into which instructions should be
|
||||
added; or, the current source file and line number, so debugging
|
||||
symbols can be correctly generated). This class also provides a number
|
||||
of helper routines that are useful for code that emits IR.
|
||||
*/
|
||||
class FunctionEmitContext {
|
||||
public:
|
||||
/** Create a new FunctionEmitContext.
|
||||
@param returnType The return type of the function
|
||||
@param function LLVM function in the current module that corresponds
|
||||
to the function
|
||||
@param funSym Symbol that corresponds to the function
|
||||
@param firstStmtPos Source file position of the first statement in the
|
||||
function
|
||||
*/
|
||||
FunctionEmitContext(const Type *returnType, llvm::Function *function, Symbol *funSym,
|
||||
SourcePos firstStmtPos);
|
||||
~FunctionEmitContext();
|
||||
|
||||
/** @name Current basic block management
|
||||
@{
|
||||
*/
|
||||
/** Returns the current basic block pointer */
|
||||
llvm::BasicBlock *GetCurrentBasicBlock();
|
||||
|
||||
/** Set the given llvm::BasicBlock to be the basic block to emit
|
||||
forthcoming instructions into. */
|
||||
void SetCurrentBasicBlock(llvm::BasicBlock *bblock);
|
||||
|
||||
/** @name Mask management
|
||||
@{
|
||||
*/
|
||||
/** Returns the current mask value */
|
||||
llvm::Value *GetMask();
|
||||
|
||||
/** Provides the value of the mask at function entry */
|
||||
void SetEntryMask(llvm::Value *val);
|
||||
|
||||
/** Sets the mask to a new value */
|
||||
void SetMask(llvm::Value *val);
|
||||
|
||||
/** Sets the mask to (oldMask & val) */
|
||||
void MaskAnd(llvm::Value *oldMask, llvm::Value *val);
|
||||
|
||||
/** Sets the mask to (oldMask & ~val) */
|
||||
void MaskAndNot(llvm::Value *oldMask, llvm::Value *test);
|
||||
|
||||
/** Emits a branch instruction to the basic block btrue if any of the
|
||||
lanes of current mask are on and bfalse if none are on. */
|
||||
void BranchIfMaskAny(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse);
|
||||
|
||||
/** Emits a branch instruction to the basic block btrue if all of the
|
||||
lanes of current mask are on and bfalse if none are on. */
|
||||
void BranchIfMaskAll(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse);
|
||||
|
||||
/** Emits a branch instruction to the basic block btrue if none of the
|
||||
lanes of current mask are on and bfalse if none are on. */
|
||||
void BranchIfMaskNone(llvm::BasicBlock *btrue, llvm::BasicBlock *bfalse);
|
||||
/** @} */
|
||||
|
||||
/** @name Control flow management
|
||||
@{
|
||||
*/
|
||||
/** Notifies the FunctionEmitContext that we're starting emission of an
|
||||
'if' statement with a uniform test. The value of the mask going
|
||||
into the 'if' statement is provided in the oldMask parameter. */
|
||||
void StartUniformIf(llvm::Value *oldMask);
|
||||
|
||||
/** Notifies the FunctionEmitContext that we're starting emission of an
|
||||
'if' statement with a varying test. The value of the mask going
|
||||
into the 'if' statement is provided in the oldMask parameter. */
|
||||
void StartVaryingIf(llvm::Value *oldMask);
|
||||
|
||||
/** Notifies the FunctionEmitConitext that we're done emitting the IR
|
||||
for an 'if' statement. */
|
||||
void EndIf();
|
||||
|
||||
/** Notifies the FunctionEmitContext that we're starting to emit IR
|
||||
for a loop. Basic blocks are provides for where 'break' and
|
||||
'continue' statements should jump to (if all running lanes want to
|
||||
break or continue), uniformControlFlow indicates whether the loop
|
||||
condition is 'uniform', and oldMask provides the current mask going
|
||||
into the loop. */
|
||||
void StartLoop(llvm::BasicBlock *breakTarget, llvm::BasicBlock *continueTarget,
|
||||
bool uniformControlFlow, llvm::Value *oldMask);
|
||||
|
||||
/** Informs FunctionEmitContext of the value of the mask at the start
|
||||
of a loop body. */
|
||||
void SetLoopMask(llvm::Value *mask);
|
||||
|
||||
/** Informs FunctionEmitContext that code generation for a loop is
|
||||
finished. */
|
||||
void EndLoop();
|
||||
|
||||
/** Emit code for a 'break' statement in a loop. If doCoherenceCheck
|
||||
is true, then if we're in a 'varying' loop, code will be emitted to
|
||||
see if all of the lanes want to break, in which case a jump to the
|
||||
break target will be taken. (For 'uniform' loops, the jump is
|
||||
always done). */
|
||||
void Break(bool doCoherenceCheck);
|
||||
|
||||
/** Emit code for a 'continue' statement in a loop. If
|
||||
doCoherenceCheck is true, then if we're in a 'varying' loop, code
|
||||
will be emitted to see if all of the lanes want to continue, in
|
||||
which case a jump to the continue target will be taken. (For
|
||||
'uniform' loops, the jump is always done). */
|
||||
void Continue(bool doCoherenceCheck);
|
||||
|
||||
/** This method is called by code emitting IR for a loop at the end of
|
||||
the loop body; it restores the lanes of the mask that executed a
|
||||
'continue' statement when going through the loop body in the
|
||||
previous iteration. */
|
||||
void RestoreContinuedLanes();
|
||||
|
||||
/** Returns the current number of nested levels of 'varying' control
|
||||
flow */
|
||||
int VaryingCFDepth() const;
|
||||
|
||||
/** Called to generate code for 'return' statement; value is the
|
||||
expression in the return statement (if non-NULL), and
|
||||
doCoherenceCheck indicates whether instructions should be generated
|
||||
to see if all of the currently-running lanes have returned (if
|
||||
we're under varying control flow). */
|
||||
void CurrentLanesReturned(Expr *value, bool doCoherenceCheck);
|
||||
/** @} */
|
||||
|
||||
/** @name Small helper/utility routines
|
||||
@{
|
||||
*/
|
||||
/** Given a boolean mask value of type LLVMTypes::MaskType, return an
|
||||
i1 value that indicates if any of the mask lanes are on. */
|
||||
llvm::Value *Any(llvm::Value *mask);
|
||||
|
||||
/** Given a boolean mask value of type LLVMTypes::MaskType, return an
|
||||
i1 value that indicates if all of the mask lanes are on. */
|
||||
llvm::Value *All(llvm::Value *mask);
|
||||
|
||||
/** Given a boolean mask value of type LLVMTypes::MaskType, return an
|
||||
i32 value wherein the i'th bit is on if and only if the i'th lane
|
||||
of the mask is on. */
|
||||
llvm::Value *LaneMask(llvm::Value *mask);
|
||||
|
||||
/** Given two masks of type LLVMTypes::MaskType, return an i1 value
|
||||
that indicates whether the two masks are equal. */
|
||||
llvm::Value *MasksAllEqual(llvm::Value *mask1, llvm::Value *mask2);
|
||||
|
||||
/** Given a string, create an anonymous global variable to hold its
|
||||
value and return the pointer to the string. */
|
||||
llvm::Value *GetStringPtr(const std::string &str);
|
||||
|
||||
/** Create a new basic block with given name */
|
||||
llvm::BasicBlock *CreateBasicBlock(const char *name);
|
||||
|
||||
/** Given a vector with element type i1, return a vector of type
|
||||
LLVMTypes::BoolVectorType. This method handles the conversion for
|
||||
the targets where the bool vector element type is, for example,
|
||||
i32. */
|
||||
llvm::Value *I1VecToBoolVec(llvm::Value *b);
|
||||
|
||||
/** Emit code to call the user-supplied ISPCMalloc function to
|
||||
allocate space for an object of thee given type. Returns the
|
||||
pointer value returned by the ISPCMalloc call. */
|
||||
llvm::Value *EmitMalloc(const llvm::Type *ty);
|
||||
|
||||
/** Emit code to call the user-supplied ISPCFree function, passing it
|
||||
the given pointer to storage previously allocated by an
|
||||
EmitMalloc() call. */
|
||||
void EmitFree(llvm::Value *ptr);
|
||||
|
||||
/** If the user has asked to compile the program with instrumentation,
|
||||
this inserts a callback to the user-supplied instrumentation
|
||||
function at the current point in the code. */
|
||||
void AddInstrumentationPoint(const char *note);
|
||||
/** @} */
|
||||
|
||||
/** @name Debugging support
|
||||
@{
|
||||
*/
|
||||
/** Set the current source file position; subsequent emitted
|
||||
instructions will have this position associated with them if
|
||||
debugging information is being generated. */
|
||||
void SetDebugPos(SourcePos pos);
|
||||
|
||||
SourcePos GetDebugPos() const;
|
||||
|
||||
/** Adds debugging metadata to the given instruction. If pos == NULL,
|
||||
use FunctionEmitContext::currentPos as the source file position for
|
||||
the instruction. Similarly, if a DIScope is provided, it's used
|
||||
and otherwise the scope is found from a GetDIScope() call. This
|
||||
takes a llvm::Value for the instruction rather than an
|
||||
llvm::Instruction for convenience; in calling code we often have
|
||||
Instructions stored using Value pointers; the code here returns
|
||||
silently if it's not actually given an instruction. */
|
||||
void AddDebugPos(llvm::Value *instruction, const SourcePos *pos = NULL,
|
||||
llvm::DIScope *scope = NULL);
|
||||
|
||||
/** Inform the debugging information generation code that a new scope
|
||||
is starting in the source program. */
|
||||
void StartScope();
|
||||
|
||||
/** Inform the debugging information generation code that the current
|
||||
scope is ending in the source program. */
|
||||
void EndScope();
|
||||
|
||||
/** Returns the llvm::DIScope corresponding to the current program
|
||||
scope. */
|
||||
llvm::DIScope GetDIScope() const;
|
||||
|
||||
/** Emits debugging information for the variable represented by
|
||||
sym. */
|
||||
void EmitVariableDebugInfo(Symbol *sym);
|
||||
|
||||
/** Emits debugging information for the function parameter represented
|
||||
by sym. */
|
||||
void EmitFunctionParameterDebugInfo(Symbol *sym);
|
||||
/** @} */
|
||||
|
||||
/** @name IR instruction emission
|
||||
@brief These methods generally closely correspond to LLVM IR
|
||||
instructions. See the LLVM assembly language reference manual
|
||||
(http://llvm.org/docs/LangRef.html) and the LLVM doxygen documentaion
|
||||
(http://llvm.org/doxygen) for more information. Here we will only
|
||||
document significant generalizations to the functionality of the
|
||||
corresponding basic LLVM instructions.
|
||||
|
||||
Beyond actually emitting the instruction, the implementations of
|
||||
these methods in FunctionEmitContext also handle adding debugging
|
||||
metadata if debugging symbols are enabled, adding the instructions
|
||||
to the current basic block, and handling generalizations like
|
||||
'varying' lvalues, arithmetic operations with VectorType operands,
|
||||
etc.
|
||||
@{
|
||||
*/
|
||||
/** Emit the binary operator given by the inst parameter. If
|
||||
llvm::Values corresponding to VectorTypes are given as operands,
|
||||
this also handles applying the given operation to the vector
|
||||
elements. */
|
||||
llvm::Value *BinaryOperator(llvm::Instruction::BinaryOps inst,
|
||||
llvm::Value *v0, llvm::Value *v1,
|
||||
const char *name = NULL);
|
||||
|
||||
/** Emit the "not" operator. Like BinaryOperator(), this also handles
|
||||
a VectorType-based operand. */
|
||||
llvm::Value *NotOperator(llvm::Value *v, const char *name = NULL);
|
||||
|
||||
/** Emit a comparison instruction. If the operands are VectorTypes,
|
||||
then a value for the corresponding boolean VectorType is
|
||||
returned. */
|
||||
llvm::Value *CmpInst(llvm::Instruction::OtherOps inst,
|
||||
llvm::CmpInst::Predicate pred,
|
||||
llvm::Value *v0, llvm::Value *v1, const char *name = NULL);
|
||||
|
||||
llvm::Value *BitCastInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *PtrToIntInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *IntToPtrInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *TruncInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *CastInst(llvm::Instruction::CastOps op, llvm::Value *value,
|
||||
const llvm::Type *type, const char *name = NULL);
|
||||
llvm::Instruction *FPCastInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *SExtInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
llvm::Instruction *ZExtInst(llvm::Value *value, const llvm::Type *type,
|
||||
const char *name = NULL);
|
||||
|
||||
/** This GEP method is a generalization of the standard one in LLVM; it
|
||||
supports both uniform and varying basePtr values (an array of
|
||||
pointers) as well as uniform and varying index values (arrays of
|
||||
indices). */
|
||||
llvm::Value *GetElementPtrInst(llvm::Value *basePtr, llvm::Value *index0,
|
||||
llvm::Value *index1, const char *name = NULL);
|
||||
|
||||
/** This is a convenience method to generate a GEP instruction with
|
||||
indices with values with known constant values as the ispc program
|
||||
is being compiled. */
|
||||
llvm::Value *GetElementPtrInst(llvm::Value *basePtr, int v0, int v1,
|
||||
const char *name = NULL);
|
||||
|
||||
/** Load from the memory location(s) given by lvalue. The lvalue may
|
||||
be varying, in which case this corresponds to a gather from the
|
||||
multiple memory locations given by the array of pointer values
|
||||
given by the lvalue. If the lvalue is not varying, then the type
|
||||
parameter may be NULL. */
|
||||
llvm::Value *LoadInst(llvm::Value *lvalue, const Type *type,
|
||||
const char *name = NULL);
|
||||
|
||||
/** Emits an alloca instruction to allocate stack storage for the given
|
||||
type. If a non-zero alignment is specified, the object is also
|
||||
allocated at the given alignment. By default, the alloca
|
||||
instruction is added at the start of the function in the entry
|
||||
basic block; if it should be added to the current basic block, then
|
||||
the atEntryBlock parameter should be false. */
|
||||
llvm::Value *AllocaInst(const llvm::Type *llvmType, const char *name = NULL,
|
||||
int align = 0, bool atEntryBlock = true);
|
||||
|
||||
/** Standard store instruction; for this variant, the lvalue must be a
|
||||
single pointer, not a varying lvalue. */
|
||||
void StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
|
||||
const char *name = NULL);
|
||||
|
||||
/** In this variant of StoreInst(), the lvalue may be varying. If so,
|
||||
this corresponds to a scatter. Whether the lvalue is uniform of
|
||||
varying, the given storeMask is used to mask the stores so that
|
||||
they only execute for the active program instances. */
|
||||
void StoreInst(llvm::Value *rvalue, llvm::Value *lvalue,
|
||||
llvm::Value *storeMask, const Type *rvalueType,
|
||||
const char *name = NULL);
|
||||
|
||||
void BranchInst(llvm::BasicBlock *block);
|
||||
void BranchInst(llvm::BasicBlock *trueBlock, llvm::BasicBlock *falseBlock,
|
||||
llvm::Value *test);
|
||||
|
||||
/** This convenience method maps to an llvm::ExtractElementInst if the
|
||||
given value is a llvm::VectorType, and to an llvm::ExtractValueInst
|
||||
otherwise. */
|
||||
llvm::Value *ExtractInst(llvm::Value *v, int elt, const char *name = NULL);
|
||||
|
||||
/** This convenience method maps to an llvm::InsertElementInst if the
|
||||
given value is a llvm::VectorType, and to an llvm::InsertValueInst
|
||||
otherwise. */
|
||||
llvm::Value *InsertInst(llvm::Value *v, llvm::Value *eltVal, int elt,
|
||||
const char *name = NULL);
|
||||
|
||||
llvm::PHINode *PhiNode(const llvm::Type *type, int count, const char *name = NULL);
|
||||
llvm::Instruction *SelectInst(llvm::Value *test, llvm::Value *val0,
|
||||
llvm::Value *val1, const char *name = NULL);
|
||||
|
||||
llvm::Instruction *CallInst(llvm::Function *func,
|
||||
const std::vector<llvm::Value *> &args,
|
||||
const char *name = NULL);
|
||||
/** This is a convenience method that issues a call instruction to a
|
||||
function that takes just a single argument. */
|
||||
llvm::Instruction *CallInst(llvm::Function *func, llvm::Value *arg,
|
||||
const char *name = NULL);
|
||||
|
||||
/** This is a convenience method that issues a call instruction to a
|
||||
function that takes two arguments. */
|
||||
llvm::Instruction *CallInst(llvm::Function *func, llvm::Value *arg0,
|
||||
llvm::Value *arg1, const char *name = NULL);
|
||||
|
||||
/** Launch an asynchronous task to run the given function, passing it
|
||||
he given argument values. */
|
||||
llvm::Instruction *LaunchInst(llvm::Function *callee,
|
||||
std::vector<llvm::Value *> &argVals);
|
||||
|
||||
llvm::Instruction *ReturnInst();
|
||||
/** @} */
|
||||
|
||||
private:
|
||||
/** The basic block into which we add any alloca instructions that need
|
||||
to go at the very start of the function. */
|
||||
llvm::BasicBlock *allocaBlock;
|
||||
|
||||
/** The current basic block into which we're emitting new
|
||||
instructions */
|
||||
llvm::BasicBlock *bblock;
|
||||
|
||||
/** Pointer to stack-allocated memory that stores the current value of
|
||||
the program mask. */
|
||||
llvm::Value *maskPtr;
|
||||
|
||||
/** Current source file position; if debugging information is being
|
||||
generated, this position is used to set file/line information for
|
||||
instructions. */
|
||||
SourcePos currentPos;
|
||||
|
||||
/** Source file position where the function definition started. Used
|
||||
for error messages and debugging symbols. */
|
||||
SourcePos funcStartPos;
|
||||
|
||||
/** Type of result that the current function returns. */
|
||||
const Type *returnType;
|
||||
|
||||
/** Value of the program mask when the function starts execution. */
|
||||
llvm::Value *entryMask;
|
||||
|
||||
/** If currently in a loop body, the value of the mask at the start of
|
||||
the loop. */
|
||||
llvm::Value *loopMask;
|
||||
|
||||
/** If currently in a loop body, this is a pointer to memory to store a
|
||||
mask value that represents which of the lanes have executed a
|
||||
'break' statement. If we're not in a loop body, this should be
|
||||
NULL. */
|
||||
llvm::Value *breakLanesPtr;
|
||||
|
||||
/** Similar to breakLanesPtr, if we're inside a loop, this is a pointer
|
||||
to memory to record which of the program instances have executed a
|
||||
'continue' statement. */
|
||||
llvm::Value *continueLanesPtr;
|
||||
|
||||
/** If we're inside a loop, this gives the basic block immediately
|
||||
after the current loop, which we will jump to if all of the lanes
|
||||
have executed a break statement or are otherwise done with the
|
||||
loop. */
|
||||
llvm::BasicBlock *breakTarget;
|
||||
|
||||
/** If we're inside a loop, this gives the block to jump to if all of
|
||||
the running lanes have executed a 'continue' statement. */
|
||||
llvm::BasicBlock *continueTarget;
|
||||
|
||||
/** A pointer to memory that records which of the program instances
|
||||
have executed a 'return' statement (and are thus really truly done
|
||||
running any more instructions in this functions. */
|
||||
llvm::Value *returnedLanesPtr;
|
||||
|
||||
/** A pointer to memory to store the return value for the function.
|
||||
Since difference program instances may execute 'return' statements
|
||||
at different times, we need to accumulate the return values as they
|
||||
come in until we return for real. */
|
||||
llvm::Value *returnValuePtr;
|
||||
|
||||
/** The CFInfo structure records information about a nesting level of
|
||||
control flow. This vector lets us see what control flow is going
|
||||
around outside the current position in the function being
|
||||
emitted. */
|
||||
std::vector<CFInfo *> controlFlowInfo;
|
||||
|
||||
/** DIFile object corresponding to the source file where the current
|
||||
function was defined (used for debugging info0. */
|
||||
llvm::DIFile diFile;
|
||||
|
||||
/** DISubprogram corresponding to this function (used for debugging
|
||||
info). */
|
||||
llvm::DISubprogram diFunction;
|
||||
|
||||
/** These correspond to the current set of nested scopes in the
|
||||
function. */
|
||||
std::vector<llvm::DILexicalBlock> debugScopes;
|
||||
|
||||
/** True if a 'launch' statement has been encountered in the function. */
|
||||
bool launchedTasks;
|
||||
|
||||
llvm::Value *pointerVectorToVoidPointers(llvm::Value *value);
|
||||
static void addGSMetadata(llvm::Instruction *inst, SourcePos pos);
|
||||
bool ifsInLoopAllUniform() const;
|
||||
void jumpIfAllLoopLanesAreDone(llvm::BasicBlock *target);
|
||||
llvm::Value *emitGatherCallback(llvm::Value *lvalue, llvm::Value *retPtr);
|
||||
|
||||
void restoreMaskGivenReturns(llvm::Value *oldMask);
|
||||
|
||||
void scatter(llvm::Value *rvalue, llvm::Value *lvalue,
|
||||
llvm::Value *maskPtr, const Type *rvalueType);
|
||||
llvm::Value *gather(llvm::Value *lvalue, const Type *type,
|
||||
const char *name);
|
||||
void maskedStore(llvm::Value *rvalue, llvm::Value *lvalue,
|
||||
const Type *rvalueType, llvm::Value *maskPtr);
|
||||
};
|
||||
|
||||
#endif // ISPC_CTX_H
|
||||
348
decl.cpp
Normal file
348
decl.cpp
Normal file
@@ -0,0 +1,348 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file decl.cpp
|
||||
@brief Implementations of classes related to turning declarations into
|
||||
symbols and types.
|
||||
*/
|
||||
|
||||
#include "decl.h"
|
||||
#include "util.h"
|
||||
#include "sym.h"
|
||||
#include "type.h"
|
||||
#include "expr.h"
|
||||
#include <stdio.h>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// DeclSpecs
|
||||
|
||||
DeclSpecs::DeclSpecs(const Type *t, StorageClass sc, int tq) {
|
||||
baseType = t;
|
||||
storageClass = sc;
|
||||
typeQualifier = tq;
|
||||
soaWidth = 0;
|
||||
vectorSize = 0;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
DeclSpecs::Print() const {
|
||||
if (storageClass == SC_EXTERN) printf("extern ");
|
||||
if (storageClass == SC_EXTERN_C) printf("extern \"C\" ");
|
||||
if (storageClass == SC_EXPORT) printf("export ");
|
||||
if (storageClass == SC_STATIC) printf("static ");
|
||||
if (storageClass == SC_TYPEDEF) printf("typedef ");
|
||||
|
||||
if (soaWidth > 0) printf("soa<%d> ", soaWidth);
|
||||
|
||||
if (typeQualifier & TYPEQUAL_INLINE) printf("inline ");
|
||||
if (typeQualifier & TYPEQUAL_CONST) printf("const ");
|
||||
if (typeQualifier & TYPEQUAL_UNIFORM) printf("uniform ");
|
||||
if (typeQualifier & TYPEQUAL_VARYING) printf("varying ");
|
||||
if (typeQualifier & TYPEQUAL_TASK) printf("task ");
|
||||
if (typeQualifier & TYPEQUAL_REFERENCE) printf("reference ");
|
||||
if (typeQualifier & TYPEQUAL_UNSIGNED) printf("unsigned ");
|
||||
|
||||
printf("%s", baseType->GetString().c_str());
|
||||
|
||||
if (vectorSize > 0) printf("<%d>", vectorSize);
|
||||
}
|
||||
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Declarator
|
||||
|
||||
Declarator::Declarator(Symbol *s, SourcePos p)
|
||||
: pos(p) {
|
||||
sym = s;
|
||||
functionArgs = NULL;
|
||||
isFunction = false;
|
||||
initExpr = NULL;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Declarator::AddArrayDimension(int size) {
|
||||
assert(size > 0 || size == -1); // -1 -> unsized
|
||||
arraySize.push_back(size);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Declarator::InitFromDeclSpecs(DeclSpecs *ds) {
|
||||
sym->type = GetType(ds);
|
||||
|
||||
if (ds->storageClass == SC_STATIC)
|
||||
sym->isStatic = true;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Declarator::Print() const {
|
||||
printf("%s", sym->name.c_str());
|
||||
if (initExpr != NULL) {
|
||||
printf(" = (");
|
||||
initExpr->Print();
|
||||
printf(")");
|
||||
}
|
||||
pos.Print();
|
||||
}
|
||||
|
||||
|
||||
static const Type *
|
||||
lGetType(const Declarator *decl, DeclSpecs *ds,
|
||||
std::vector<int>::const_iterator arrayIter) {
|
||||
if (arrayIter == decl->arraySize.end()) {
|
||||
// If we don't have an array (or have processed all of the array
|
||||
// dimensions in previous recursive calls), we can go ahead and
|
||||
// figure out the final non-array type we have here.
|
||||
const Type *type = ds->baseType;
|
||||
if (type == NULL) {
|
||||
Error(decl->pos, "Type not provided in variable declaration for variable \"%s\".",
|
||||
decl->sym->name.c_str());
|
||||
return NULL;
|
||||
}
|
||||
|
||||
// Account for 'unsigned' and 'const' qualifiers in the type
|
||||
if ((ds->typeQualifier & TYPEQUAL_UNSIGNED) != 0) {
|
||||
const Type *unsignedType = type->GetAsUnsignedType();
|
||||
if (unsignedType != NULL)
|
||||
type = unsignedType;
|
||||
else
|
||||
Error(decl->pos, "\"unsigned\" qualifier is illegal with \"%s\" type.",
|
||||
type->GetString().c_str());
|
||||
}
|
||||
if ((ds->typeQualifier & TYPEQUAL_CONST) != 0)
|
||||
type = type->GetAsConstType();
|
||||
|
||||
if (ds->vectorSize > 0) {
|
||||
const AtomicType *atomicType = dynamic_cast<const AtomicType *>(type);
|
||||
if (atomicType == NULL) {
|
||||
Error(decl->pos, "Only atomic types (int, float, ...) are legal for vector "
|
||||
"types.");
|
||||
return NULL;
|
||||
}
|
||||
type = new VectorType(atomicType, ds->vectorSize);
|
||||
}
|
||||
|
||||
// if uniform/varying is specified explicitly, then go with that
|
||||
if ((ds->typeQualifier & TYPEQUAL_UNIFORM) != 0)
|
||||
return type->GetAsUniformType();
|
||||
else if ((ds->typeQualifier & TYPEQUAL_VARYING) != 0)
|
||||
return type->GetAsVaryingType();
|
||||
else {
|
||||
// otherwise, structs are uniform by default and everything
|
||||
// else is varying by default
|
||||
if (dynamic_cast<const StructType *>(type) != NULL)
|
||||
return type->GetAsUniformType();
|
||||
else
|
||||
return type->GetAsVaryingType();
|
||||
}
|
||||
}
|
||||
else {
|
||||
// Peel off one dimension of the array
|
||||
int arraySize = *arrayIter;
|
||||
++arrayIter;
|
||||
|
||||
// Get the type, not including the arraySize dimension peeled off
|
||||
// above.
|
||||
const Type *childType = lGetType(decl, ds, arrayIter);
|
||||
|
||||
int soaWidth = ds->soaWidth;
|
||||
if (soaWidth == 0)
|
||||
// If there's no "soa<n>" stuff going on, just return a regular
|
||||
// array with the appropriate size
|
||||
return new ArrayType(childType, arraySize == -1 ? 0 : arraySize);
|
||||
else {
|
||||
// Make sure we actually have an array of structs ..
|
||||
const StructType *childStructType =
|
||||
dynamic_cast<const StructType *>(childType);
|
||||
if (childStructType == NULL) {
|
||||
Error(decl->pos, "Illegal to provide soa<%d> qualifier with non-struct "
|
||||
"type \"%s\".", soaWidth, childType->GetString().c_str());
|
||||
return new ArrayType(childType, arraySize == -1 ? 0 : arraySize);
|
||||
}
|
||||
else if ((soaWidth & (soaWidth - 1)) != 0) {
|
||||
Error(decl->pos, "soa<%d> width illegal. Value must be power of two.",
|
||||
soaWidth);
|
||||
return NULL;
|
||||
}
|
||||
else if (arraySize != -1 && (arraySize % soaWidth) != 0) {
|
||||
Error(decl->pos, "soa<%d> width must evenly divide array size %d.",
|
||||
soaWidth, arraySize);
|
||||
return NULL;
|
||||
}
|
||||
return new SOAArrayType(childStructType, arraySize == -1 ? 0 : arraySize,
|
||||
soaWidth);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
const Type *
|
||||
Declarator::GetType(DeclSpecs *ds) const {
|
||||
bool hasUniformQual = ((ds->typeQualifier & TYPEQUAL_UNIFORM) != 0);
|
||||
bool hasVaryingQual = ((ds->typeQualifier & TYPEQUAL_VARYING) != 0);
|
||||
bool isTask = ((ds->typeQualifier & TYPEQUAL_TASK) != 0);
|
||||
bool isReference = ((ds->typeQualifier & TYPEQUAL_REFERENCE) != 0);
|
||||
|
||||
if (hasUniformQual && hasVaryingQual) {
|
||||
Error(pos, "Can't provide both \"uniform\" and \"varying\" qualifiers.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
if (isFunction) {
|
||||
std::vector<const Type *> args;
|
||||
std::vector<std::string> argNames;
|
||||
if (functionArgs) {
|
||||
// Loop over the function arguments and get names and types for
|
||||
// each one in the args and argNames arrays
|
||||
for (unsigned int i = 0; i < functionArgs->size(); ++i) {
|
||||
Declaration *d = (*functionArgs)[i];
|
||||
Symbol *sym;
|
||||
if (d->declarators.size() == 0) {
|
||||
// function declaration like foo(float), w/o a name for
|
||||
// the parameter
|
||||
char buf[32];
|
||||
sprintf(buf, "__anon_parameter_%d", i);
|
||||
sym = new Symbol(buf, pos);
|
||||
Declarator *declarator = new Declarator(sym, sym->pos);
|
||||
sym->type = declarator->GetType(ds);
|
||||
d->declarators.push_back(declarator);
|
||||
}
|
||||
else {
|
||||
assert(d->declarators.size() == 1);
|
||||
sym = d->declarators[0]->sym;
|
||||
}
|
||||
|
||||
// Arrays are passed by reference, so convert array
|
||||
// parameters to be references here.
|
||||
if (dynamic_cast<const ArrayType *>(sym->type) != NULL)
|
||||
sym->type = new ReferenceType(sym->type, sym->type->IsConstType());
|
||||
|
||||
args.push_back(sym->type);
|
||||
argNames.push_back(sym->name);
|
||||
}
|
||||
}
|
||||
|
||||
if (ds->baseType == NULL) {
|
||||
Warning(pos, "No return type provided in declaration of function \"%s\". "
|
||||
"Treating as \"void\".", sym->name.c_str());
|
||||
ds->baseType = AtomicType::Void;
|
||||
}
|
||||
|
||||
if (isReference) {
|
||||
Error(pos, "Function return types can't be reference types.");
|
||||
return NULL;
|
||||
}
|
||||
|
||||
const Type *returnType = lGetType(this, ds, arraySize.begin());
|
||||
if (returnType == NULL)
|
||||
return NULL;
|
||||
|
||||
bool isExported = (ds->storageClass == SC_EXPORT);
|
||||
bool isExternC = (ds->storageClass == SC_EXTERN_C);
|
||||
return new FunctionType(returnType, args, pos, &argNames, isTask,
|
||||
isExported, isExternC);
|
||||
}
|
||||
else {
|
||||
if (isTask)
|
||||
Error(pos, "\"task\" qualifier illegal in variable declaration \"%s\".",
|
||||
sym->name.c_str());
|
||||
|
||||
const Type *type = lGetType(this, ds, arraySize.begin());
|
||||
|
||||
if (type != NULL && isReference) {
|
||||
bool hasConstQual = ((ds->typeQualifier & TYPEQUAL_CONST) != 0);
|
||||
type = new ReferenceType(type, hasConstQual);
|
||||
}
|
||||
|
||||
return type;
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Declaration
|
||||
|
||||
void
|
||||
Declaration::AddSymbols(SymbolTable *st) const {
|
||||
assert(declSpecs->storageClass != SC_TYPEDEF);
|
||||
|
||||
for (unsigned int i = 0; i < declarators.size(); ++i)
|
||||
if (declarators[i])
|
||||
st->AddVariable(declarators[i]->sym);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
Declaration::Print() const {
|
||||
printf("Declaration: specs [");
|
||||
declSpecs->Print();
|
||||
printf("], declarators [");
|
||||
for (unsigned int i = 0 ; i < declarators.size(); ++i) {
|
||||
declarators[i]->Print();
|
||||
printf("%s", (i == declarators.size() - 1) ? "]" : ", ");
|
||||
}
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
|
||||
void
|
||||
GetStructTypesAndNames(const std::vector<StructDeclaration *> &sd,
|
||||
std::vector<const Type *> *elementTypes,
|
||||
std::vector<std::string> *elementNames) {
|
||||
for (unsigned int i = 0; i < sd.size(); ++i) {
|
||||
const Type *type = sd[i]->type;
|
||||
// FIXME: making this fake little DeclSpecs here is really
|
||||
// disgusting
|
||||
DeclSpecs ds(type);
|
||||
if (type->IsUniformType())
|
||||
ds.typeQualifier |= TYPEQUAL_UNIFORM;
|
||||
else
|
||||
ds.typeQualifier |= TYPEQUAL_VARYING;
|
||||
|
||||
for (unsigned int j = 0; j < sd[i]->declarators->size(); ++j) {
|
||||
Declarator *d = (*sd[i]->declarators)[j];
|
||||
d->InitFromDeclSpecs(&ds);
|
||||
|
||||
// if it's an unsized array, make it a reference to an unsized
|
||||
// array, so the caller can pass a pointer...
|
||||
const ArrayType *at = dynamic_cast<const ArrayType *>(d->sym->type);
|
||||
if (at && at->GetElementCount() == 0)
|
||||
d->sym->type = new ReferenceType(d->sym->type, type->IsConstType());
|
||||
|
||||
elementTypes->push_back(d->sym->type);
|
||||
elementNames->push_back(d->sym->name);
|
||||
}
|
||||
}
|
||||
}
|
||||
203
decl.h
Normal file
203
decl.h
Normal file
@@ -0,0 +1,203 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file decl.h
|
||||
@brief Declarations related to type declarations; the parser basically
|
||||
creates instances of these classes, which are then turned into actual
|
||||
Types.
|
||||
|
||||
Three classes work together to represent declarations. As an example,
|
||||
consider a declaration like:
|
||||
|
||||
static uniform int foo, bar[10];
|
||||
|
||||
An instance of the Declaration class represents this entire declaration
|
||||
of two variables, 'foo' and 'bar'. It holds a single instance of the
|
||||
DeclSpecs class represents the common specifiers for all of the
|
||||
variables--here, that the declaration has the 'static' and 'uniform'
|
||||
qualifiers, and that it's basic type is 'int'. Then for each variable
|
||||
declaration, the Declaraiton class holds an instance of a Declarator,
|
||||
which in turn records the per-variable information like the symbol
|
||||
name, array size (if any), initializer expression, etc.
|
||||
*/
|
||||
|
||||
#ifndef ISPC_DECL_H
|
||||
#define ISPC_DECL_H
|
||||
|
||||
#include "ispc.h"
|
||||
|
||||
enum StorageClass {
|
||||
SC_NONE,
|
||||
SC_EXTERN,
|
||||
SC_EXPORT,
|
||||
SC_STATIC,
|
||||
SC_TYPEDEF,
|
||||
SC_EXTERN_C
|
||||
};
|
||||
|
||||
|
||||
/* Multiple qualifiers can be provided with types in declarations;
|
||||
therefore, they are set up so that they can be ANDed together into an
|
||||
int. */
|
||||
#define TYPEQUAL_NONE 0
|
||||
#define TYPEQUAL_CONST (1<<0)
|
||||
#define TYPEQUAL_UNIFORM (1<<1)
|
||||
#define TYPEQUAL_VARYING (1<<2)
|
||||
#define TYPEQUAL_TASK (1<<3)
|
||||
#define TYPEQUAL_REFERENCE (1<<4)
|
||||
#define TYPEQUAL_UNSIGNED (1<<5)
|
||||
#define TYPEQUAL_INLINE (1<<6)
|
||||
|
||||
/** @brief Representation of the declaration specifiers in a declaration.
|
||||
|
||||
In other words, this represents all of the stuff that applies to all of
|
||||
the (possibly multiple) variables in a declaration.
|
||||
*/
|
||||
class DeclSpecs {
|
||||
public:
|
||||
DeclSpecs(const Type *t = NULL, StorageClass sc = SC_NONE, int tq = TYPEQUAL_NONE);
|
||||
|
||||
void Print() const;
|
||||
|
||||
StorageClass storageClass;
|
||||
|
||||
/** Zero or more of the TYPEQUAL_* values, ANDed together. */
|
||||
int typeQualifier;
|
||||
|
||||
/** The basic type provided in the declaration; this should be an
|
||||
AtomicType, a StructType, or a VectorType; other types (like
|
||||
ArrayTypes) will end up being created if a particular declaration
|
||||
has an array size, etc.
|
||||
*/
|
||||
const Type *baseType;
|
||||
|
||||
/** If this is a declaration with a vector type, this gives the vector
|
||||
width. For non-vector types, this is zero.
|
||||
*/
|
||||
int vectorSize;
|
||||
|
||||
/** If this is a declaration with an "soa<n>" qualifier, this gives the
|
||||
SOA width specified. Otherwise this is zero.
|
||||
*/
|
||||
int soaWidth;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Representation of the declaration of a single variable.
|
||||
|
||||
In conjunction with an instance of the DeclSpecs, this gives us
|
||||
everything we need for a full variable declaration.
|
||||
*/
|
||||
class Declarator {
|
||||
public:
|
||||
Declarator(Symbol *s, SourcePos p);
|
||||
|
||||
/** As the parser peels off array dimension declarations after the
|
||||
symbol name, it calls this method to provide them to the
|
||||
Declarator.
|
||||
*/
|
||||
void AddArrayDimension(int size);
|
||||
|
||||
/** Once a DeclSpecs instance is available, this method completes the
|
||||
initialization of the Symbol, setting its Type accordingly.
|
||||
*/
|
||||
void InitFromDeclSpecs(DeclSpecs *ds);
|
||||
|
||||
/** Get the actual type of the combination of Declarator and the given
|
||||
DeclSpecs */
|
||||
const Type *GetType(DeclSpecs *ds) const;
|
||||
|
||||
void Print() const;
|
||||
|
||||
const SourcePos pos;
|
||||
Symbol *sym;
|
||||
/** If this declarator includes an array specification, the sizes of
|
||||
the array dimensions are represented here.
|
||||
*/
|
||||
std::vector<int> arraySize;
|
||||
/** Initialization expression for the variable. May be NULL. */
|
||||
Expr *initExpr;
|
||||
bool isFunction;
|
||||
std::vector<Declaration *> *functionArgs;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Representation of a full declaration of one or more variables,
|
||||
including the shared DeclSpecs as well as the per-variable Declarators.
|
||||
*/
|
||||
class Declaration {
|
||||
public:
|
||||
Declaration(DeclSpecs *ds, std::vector<Declarator *> *dlist = NULL) {
|
||||
declSpecs = ds;
|
||||
if (dlist != NULL)
|
||||
declarators = *dlist;
|
||||
for (unsigned int i = 0; i < declarators.size(); ++i)
|
||||
if (declarators[i] != NULL)
|
||||
declarators[i]->InitFromDeclSpecs(declSpecs);
|
||||
}
|
||||
Declaration(DeclSpecs *ds, Declarator *d) {
|
||||
declSpecs = ds;
|
||||
if (d) {
|
||||
d->InitFromDeclSpecs(ds);
|
||||
declarators.push_back(d);
|
||||
}
|
||||
}
|
||||
|
||||
/** Adds the symbols for the variables in the declaration to the symbol
|
||||
table. */
|
||||
void AddSymbols(SymbolTable *st) const;
|
||||
void Print() const;
|
||||
|
||||
DeclSpecs *declSpecs;
|
||||
std::vector<Declarator *> declarators;
|
||||
};
|
||||
|
||||
|
||||
/** The parser creates instances of StructDeclaration for the members of
|
||||
structs as it's parsing their declarations. */
|
||||
struct StructDeclaration {
|
||||
StructDeclaration(const Type *t, std::vector<Declarator *> *d)
|
||||
: type(t), declarators(d) { }
|
||||
|
||||
const Type *type;
|
||||
std::vector<Declarator *> *declarators;
|
||||
};
|
||||
|
||||
|
||||
/** Given a set of StructDeclaration instances, this returns the types of
|
||||
the elements of the corresponding struct and their names. */
|
||||
extern void GetStructTypesAndNames(const std::vector<StructDeclaration *> &sd,
|
||||
std::vector<const Type *> *elementTypes,
|
||||
std::vector<std::string> *elementNames);
|
||||
|
||||
#endif // ISPC_DECL_H
|
||||
7
docs/build.sh
Executable file
7
docs/build.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/bin/bash
|
||||
|
||||
rst2html ispc.txt > ispc.html
|
||||
|
||||
#rst2latex --section-numbering --documentclass=article --documentoptions=DIV=9,10pt,letterpaper ispc.txt > ispc.tex
|
||||
#pdflatex ispc.tex
|
||||
#/bin/rm -f ispc.aux ispc.log ispc.out ispc.tex
|
||||
2640
docs/ispc.txt
Normal file
2640
docs/ispc.txt
Normal file
File diff suppressed because it is too large
Load Diff
1685
doxygen.cfg
Normal file
1685
doxygen.cfg
Normal file
File diff suppressed because it is too large
Load Diff
88
examples/README.txt
Normal file
88
examples/README.txt
Normal file
@@ -0,0 +1,88 @@
|
||||
====================
|
||||
ISPC Examples README
|
||||
====================
|
||||
|
||||
This directory has a number of sample ispc programs. Before building them
|
||||
(on an system), install the appropriate ispc compiler binary into a
|
||||
directory in your path. Then, if you're running Windows, open the
|
||||
"examples.sln" file and built from there. For building under Linux/OSX,
|
||||
there are makefiles in each directory that build the examples individually.
|
||||
|
||||
Almost all of them benchmark ispc implementations of the given computation
|
||||
against regular serial C++ implementations, printing out a comparison of
|
||||
the runtimes and the speedup delivered by ispc. It may be instructive to
|
||||
do a side-by-side diff of the C++ and ispc implementations of these
|
||||
algorithms to learn more about wirting ispc code.
|
||||
|
||||
AOBench
|
||||
=======
|
||||
|
||||
This is an ISPC implementation of the "AO bench" benchmark
|
||||
(http://syoyo.wordpress.com/2009/01/26/ao-bench-is-evolving/). The command
|
||||
line arguments are:
|
||||
|
||||
ao (num iterations) (x res) (yres)
|
||||
|
||||
It executes the program for the given number of iterations, rendering an
|
||||
(xres x yres) image each time and measuring the computation time with both
|
||||
serial and ispc implementations.
|
||||
|
||||
AOBench_Instrumented
|
||||
====================
|
||||
|
||||
This version of AO Bench is compiled with the --instrument ispc compiler
|
||||
flag. This causes the compiler to emit calls to a (user-supplied)
|
||||
ISPCInstrument() function at interesting places in the compiled code. An
|
||||
example implementation of this function that counts the number of times the
|
||||
callback is made and records some statistics about control flow coherence
|
||||
is provided in the instrument.cpp file.
|
||||
|
||||
*** Note: on Linux, this example currently hits an assertion in LLVM during
|
||||
*** compilation
|
||||
|
||||
Mandelbrot
|
||||
==========
|
||||
|
||||
Mandelbrot set generation. This example is extensively documented at the
|
||||
http://ispc.github.com/example.html page.
|
||||
|
||||
Mandelbrot_tasks
|
||||
================
|
||||
|
||||
Implementation of Mandelbrot set generation that also parallelizes across
|
||||
cores using tasks. Under Windows, a simple task system built on
|
||||
Microsoft's Concurrency Runtime is used (see tasks_concrt.cpp). On OSX, a
|
||||
task system based on Grand Central Dispatch is used (tasks_gcd.cpp), and on
|
||||
Linux, a pthreads-based task system is used (tasks_pthreads.cpp). When
|
||||
using tasks with ispc, no task system is mandated; the user is free to plug
|
||||
in any task system they want, for ease of interoperating with existing task
|
||||
systems.
|
||||
|
||||
Options
|
||||
=======
|
||||
|
||||
This program implements both the Black-Scholes and Binomial options pricing
|
||||
models in both ispc and regular serial C++ code.
|
||||
|
||||
RT
|
||||
==
|
||||
|
||||
This is a simple ray tracer; it reads in camera parameters and a bounding
|
||||
volume hierarchy and renders the scene from the given viewpoint. The
|
||||
command line arguments are:
|
||||
|
||||
rt <scene name base>
|
||||
|
||||
Where <scene base name> is one of "cornell", "teapot", or "sponza".
|
||||
|
||||
The implementation originally derives from the bounding volume hierarchy
|
||||
and triangle intersection code from pbrt; see the pbrt source code and/or
|
||||
"Physically Based Rendering" book for more about the basic algorithmic
|
||||
details.
|
||||
|
||||
Simple
|
||||
======
|
||||
|
||||
This is a simple "hello world" type program that shows a ~10 line
|
||||
application program calling out to a ~5 line ispc program to do a simple
|
||||
computation.
|
||||
26
examples/aobench/Makefile
Normal file
26
examples/aobench/Makefile
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --fast-math
|
||||
|
||||
default: ao
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ ao
|
||||
|
||||
ao: dirs objs/ao.o objs/ao_serial.o objs/ao_ispc.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ objs/ao.o objs/ao_ispc.o objs/ao_serial.o -lm -lpthread
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/ao.o: objs/ao_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
182
examples/aobench/ao.cpp
Normal file
182
examples/aobench/ao.cpp
Normal file
@@ -0,0 +1,182 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define NOMINMAX
|
||||
#pragma warning (disable: 4244)
|
||||
#pragma warning (disable: 4305)
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#ifdef __linux__
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
#include <math.h>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "ao_ispc.h"
|
||||
using namespace ispc;
|
||||
|
||||
#include "../timing.h"
|
||||
|
||||
#define NSUBSAMPLES 2
|
||||
|
||||
extern void ao_serial(int w, int h, int nsubsamples, float image[]);
|
||||
|
||||
static unsigned int test_iterations;
|
||||
static unsigned int width, height;
|
||||
static unsigned char *img;
|
||||
static float *fimg;
|
||||
|
||||
|
||||
static unsigned char
|
||||
clamp(float f)
|
||||
{
|
||||
int i = (int)(f * 255.5);
|
||||
|
||||
if (i < 0) i = 0;
|
||||
if (i > 255) i = 255;
|
||||
|
||||
return (unsigned char)i;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
savePPM(const char *fname, int w, int h)
|
||||
{
|
||||
for (int y = 0; y < h; y++) {
|
||||
for (int x = 0; x < w; x++) {
|
||||
img[3 * (y * w + x) + 0] = clamp(fimg[3 *(y * w + x) + 0]);
|
||||
img[3 * (y * w + x) + 1] = clamp(fimg[3 *(y * w + x) + 1]);
|
||||
img[3 * (y * w + x) + 2] = clamp(fimg[3 *(y * w + x) + 2]);
|
||||
}
|
||||
}
|
||||
|
||||
FILE *fp = fopen(fname, "wb");
|
||||
if (!fp) {
|
||||
perror(fname);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fprintf(fp, "P6\n");
|
||||
fprintf(fp, "%d %d\n", w, h);
|
||||
fprintf(fp, "255\n");
|
||||
fwrite(img, w * h * 3, 1, fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
|
||||
// Allocate memory with 64-byte alignment.
|
||||
float *
|
||||
AllocAligned(int size) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
return (float *)_aligned_malloc(size, 64);
|
||||
#elif defined (__APPLE__)
|
||||
// Allocate excess memory to ensure an aligned pointer can be returned
|
||||
void *mem = malloc(size + (64-1) + sizeof(void*));
|
||||
char *amem = ((char*)mem) + sizeof(void*);
|
||||
amem += 64 - (reinterpret_cast<uint64_t>(amem) & (64 - 1));
|
||||
((void**)amem)[-1] = mem;
|
||||
return (float *)amem;
|
||||
#else
|
||||
return (float *)memalign(64, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc != 4) {
|
||||
printf ("%s\n", argv[0]);
|
||||
printf ("Usage: ao [num test iterations] [width] [height]\n");
|
||||
getchar();
|
||||
exit(-1);
|
||||
}
|
||||
else {
|
||||
test_iterations = atoi(argv[1]);
|
||||
width = atoi (argv[2]);
|
||||
height = atoi (argv[3]);
|
||||
}
|
||||
|
||||
// Allocate space for output images
|
||||
img = (unsigned char *)AllocAligned(width * height * 3);
|
||||
fimg = (float *)AllocAligned(sizeof(float) * width * height * 3);
|
||||
|
||||
//
|
||||
// Run the ispc path, test_iterations times, and report the minimum
|
||||
// time for any of them.
|
||||
//
|
||||
double minTimeISPC = 1e30;
|
||||
for (unsigned int i = 0; i < test_iterations; i++) {
|
||||
memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||
assert(NSUBSAMPLES == 2);
|
||||
|
||||
reset_and_start_timer();
|
||||
ao_ispc(width, height, NSUBSAMPLES, fimg);
|
||||
double t = get_elapsed_mcycles();
|
||||
minTimeISPC = std::min(minTimeISPC, t);
|
||||
}
|
||||
|
||||
// Report results and save image
|
||||
printf("[aobench ispc]:\t\t\t[%.3f] M cycles (%d x %d image)\n", minTimeISPC,
|
||||
width, height);
|
||||
savePPM("ao-ispc.ppm", width, height);
|
||||
|
||||
//
|
||||
// Run the serial path, again test_iteration times, and report the
|
||||
// minimum time.
|
||||
//
|
||||
double minTimeSerial = 1e30;
|
||||
for (unsigned int i = 0; i < test_iterations; i++) {
|
||||
memset((void *)fimg, 0, sizeof(float) * width * height * 3);
|
||||
reset_and_start_timer();
|
||||
ao_serial(width, height, NSUBSAMPLES, fimg);
|
||||
double t = get_elapsed_mcycles();
|
||||
minTimeSerial = std::min(minTimeSerial, t);
|
||||
}
|
||||
|
||||
// Report more results, save another image...
|
||||
printf("[aobench serial]:\t\t[%.3f] M cycles (%d x %d image)\n", minTimeSerial,
|
||||
width, height);
|
||||
printf("\t\t\t\t(%.2fx speedup from ISPC)\n", minTimeSerial / minTimeISPC);
|
||||
savePPM("ao-serial.ppm", width, height);
|
||||
|
||||
return 0;
|
||||
}
|
||||
317
examples/aobench/ao.ispc
Normal file
317
examples/aobench/ao.ispc
Normal file
@@ -0,0 +1,317 @@
|
||||
// -*- mode: c++ -*-
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/*
|
||||
Based on Syoyo Fujita's aobench: http://code.google.com/p/aobench
|
||||
*/
|
||||
|
||||
#define NAO_SAMPLES 8
|
||||
#define M_PI 3.1415926535f
|
||||
|
||||
typedef float<3> vec;
|
||||
|
||||
struct Isect {
|
||||
float t;
|
||||
vec p;
|
||||
vec n;
|
||||
int hit;
|
||||
};
|
||||
|
||||
struct Sphere {
|
||||
vec center;
|
||||
float radius;
|
||||
|
||||
};
|
||||
|
||||
struct Plane {
|
||||
vec p;
|
||||
vec n;
|
||||
};
|
||||
|
||||
struct Ray {
|
||||
vec org;
|
||||
vec dir;
|
||||
};
|
||||
|
||||
static inline float dot(vec a, vec b) {
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
static inline vec vcross(vec v0, vec v1) {
|
||||
vec ret;
|
||||
ret.x = v0.y * v1.z - v0.z * v1.y;
|
||||
ret.y = v0.z * v1.x - v0.x * v1.z;
|
||||
ret.z = v0.x * v1.y - v0.y * v1.x;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void vnormalize(reference vec v) {
|
||||
float len2 = dot(v, v);
|
||||
float invlen = rsqrt(len2);
|
||||
v *= invlen;
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
ray_plane_intersect(reference Isect isect, reference Ray ray,
|
||||
reference Plane plane) {
|
||||
float d = -dot(plane.p, plane.n);
|
||||
float v = dot(ray.dir, plane.n);
|
||||
|
||||
cif (abs(v) < 1.0e-17)
|
||||
return;
|
||||
else {
|
||||
float t = -(dot(ray.org, plane.n) + d) / v;
|
||||
|
||||
cif ((t > 0.0) && (t < isect.t)) {
|
||||
isect.t = t;
|
||||
isect.hit = 1;
|
||||
isect.p = ray.org + ray.dir * t;
|
||||
isect.n = plane.n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
ray_sphere_intersect(reference Isect isect, reference Ray ray,
|
||||
reference Sphere sphere) {
|
||||
vec rs = ray.org - sphere.center;
|
||||
|
||||
float B = dot(rs, ray.dir);
|
||||
float C = dot(rs, rs) - sphere.radius * sphere.radius;
|
||||
float D = B * B - C;
|
||||
|
||||
cif (D > 0.) {
|
||||
float t = -B - sqrt(D);
|
||||
|
||||
cif ((t > 0.0) && (t < isect.t)) {
|
||||
isect.t = t;
|
||||
isect.hit = 1;
|
||||
isect.p = ray.org + t * ray.dir;
|
||||
isect.n = isect.p - sphere.center;
|
||||
vnormalize(isect.n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
orthoBasis(reference vec basis[3], vec n) {
|
||||
basis[2] = n;
|
||||
basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0;
|
||||
|
||||
if ((n.x < 0.6) && (n.x > -0.6)) {
|
||||
basis[1].x = 1.0;
|
||||
} else if ((n.y < 0.6) && (n.y > -0.6)) {
|
||||
basis[1].y = 1.0;
|
||||
} else if ((n.z < 0.6) && (n.z > -0.6)) {
|
||||
basis[1].z = 1.0;
|
||||
} else {
|
||||
basis[1].x = 1.0;
|
||||
}
|
||||
|
||||
basis[0] = vcross(basis[1], basis[2]);
|
||||
vnormalize(basis[0]);
|
||||
|
||||
basis[1] = vcross(basis[2], basis[0]);
|
||||
vnormalize(basis[1]);
|
||||
}
|
||||
|
||||
|
||||
static inline float
|
||||
ambient_occlusion(reference Isect isect, reference Plane plane,
|
||||
reference Sphere spheres[3], reference RNGState rngstate) {
|
||||
float eps = 0.0001f;
|
||||
vec p, n;
|
||||
vec basis[3];
|
||||
float occlusion = 0.0;
|
||||
|
||||
p = isect.p + eps * isect.n;
|
||||
|
||||
orthoBasis(basis, isect.n);
|
||||
|
||||
static const uniform int ntheta = NAO_SAMPLES;
|
||||
static const uniform int nphi = NAO_SAMPLES;
|
||||
for (uniform int j = 0; j < ntheta; j++) {
|
||||
for (uniform int i = 0; i < nphi; i++) {
|
||||
Ray ray;
|
||||
Isect occIsect;
|
||||
|
||||
float theta = sqrt(frandom(rngstate));
|
||||
float phi = 2.0f * M_PI * frandom(rngstate);
|
||||
float x = cos(phi) * theta;
|
||||
float y = sin(phi) * theta;
|
||||
float z = sqrt(1.0 - theta * theta);
|
||||
|
||||
// local . global
|
||||
float rx = x * basis[0].x + y * basis[1].x + z * basis[2].x;
|
||||
float ry = x * basis[0].y + y * basis[1].y + z * basis[2].y;
|
||||
float rz = x * basis[0].z + y * basis[1].z + z * basis[2].z;
|
||||
|
||||
ray.org = p;
|
||||
ray.dir.x = rx;
|
||||
ray.dir.y = ry;
|
||||
ray.dir.z = rz;
|
||||
|
||||
occIsect.t = 1.0e+17;
|
||||
occIsect.hit = 0;
|
||||
|
||||
for (uniform int snum = 0; snum < 3; ++snum)
|
||||
ray_sphere_intersect(occIsect, ray, spheres[snum]);
|
||||
ray_plane_intersect (occIsect, ray, plane);
|
||||
|
||||
if (occIsect.hit) occlusion += 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
occlusion = (ntheta * nphi - occlusion) / (float)(ntheta * nphi);
|
||||
return occlusion;
|
||||
}
|
||||
|
||||
|
||||
/* Compute the image for the scanlines from [y0,y1), for an overall image
|
||||
of width w and height h.
|
||||
*/
|
||||
void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
|
||||
uniform int nsubsamples, reference uniform float image[]) {
|
||||
static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
|
||||
static Sphere spheres[3] = {
|
||||
{ { -2.0f, 0.0f, -3.5f }, 0.5f },
|
||||
{ { -0.5f, 0.0f, -3.0f }, 0.5f },
|
||||
{ { 1.0f, 0.0f, -2.2f }, 0.5f } };
|
||||
RNGState rngstate;
|
||||
|
||||
seed_rng(rngstate, y0);
|
||||
|
||||
// Compute the mapping between the 'programCount'-wide program
|
||||
// instances running in parallel and samples in the image.
|
||||
//
|
||||
// For now, we'll always take four samples per pixel, so start by
|
||||
// initializing du and dv with offsets into subpixel samples. We'll
|
||||
// take care of further updating du and dv for the case where we're
|
||||
// doing more than 4 program instances in parallel shortly.
|
||||
uniform float uSteps[4] = { 0, 1, 0, 1 };
|
||||
uniform float vSteps[4] = { 0, 0, 1, 1 };
|
||||
float du = uSteps[programIndex % 4] / nsubsamples;
|
||||
float dv = vSteps[programIndex % 4] / nsubsamples;
|
||||
|
||||
// Now handle the case where we are able to do more than one pixel's
|
||||
// worth of work at once. nx records the number of pixels in the x
|
||||
// direction we do per iteration and ny the number in y.
|
||||
uniform int nx = 1, ny = 1;
|
||||
|
||||
if (programCount == 8) {
|
||||
// Do two pixels at once in the x direction
|
||||
nx = 2;
|
||||
if (programIndex >= 4)
|
||||
// And shift the offsets for the second pixel's worth of work
|
||||
++du;
|
||||
}
|
||||
else if (programCount == 16) {
|
||||
// Two at once in both x and y
|
||||
nx = ny = 2;
|
||||
if ((programIndex >= 4 && programIndex < 8) || programIndex >= 12)
|
||||
++du;
|
||||
if (programIndex >= 8)
|
||||
++dv;
|
||||
}
|
||||
|
||||
// Now loop over all of the pixels, stepping in x and y as calculated
|
||||
// above. (Assumes that ny divides y and nx divides x...)
|
||||
for (uniform int y = y0; y < y1; y += ny) {
|
||||
for (uniform int x = 0; x < w; x += nx) {
|
||||
// Figur out x,y pixel in NDC
|
||||
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
|
||||
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
|
||||
float ret = 0.f;
|
||||
Ray ray;
|
||||
Isect isect;
|
||||
|
||||
ray.org = 0.f;
|
||||
|
||||
// Poor man's perspective projection
|
||||
ray.dir.x = px;
|
||||
ray.dir.y = py;
|
||||
ray.dir.z = -1.0;
|
||||
vnormalize(ray.dir);
|
||||
|
||||
isect.t = 1.0e+17;
|
||||
isect.hit = 0;
|
||||
|
||||
for (uniform int snum = 0; snum < 3; ++snum)
|
||||
ray_sphere_intersect(isect, ray, spheres[snum]);
|
||||
ray_plane_intersect(isect, ray, plane);
|
||||
|
||||
// Note use of 'coherent' if statement; the set of rays we
|
||||
// trace will often all hit or all miss the scene
|
||||
cif (isect.hit)
|
||||
ret = ambient_occlusion(isect, plane, spheres, rngstate);
|
||||
|
||||
// This is a little grungy; we have results for
|
||||
// programCount-worth of values. Because we're doing 2x2
|
||||
// subsamples, we need to peel them off in groups of four,
|
||||
// average the four values for each pixel, and update the
|
||||
// output image.
|
||||
//
|
||||
// Store the varying value to a uniform array of the same size.
|
||||
// See the discussion about communication among program
|
||||
// instances in the ispc user's manual for more discussion on
|
||||
// this idiom.
|
||||
uniform float retArray[programCount];
|
||||
retArray[programIndex] = ret;
|
||||
|
||||
// offset to the first pixel in the image
|
||||
uniform int offset = 3 * (y * w + x);
|
||||
for (uniform int p = 0; p < programCount; p += 4, ++offset) {
|
||||
// Get the four sample values for this pixel
|
||||
uniform float sumret = retArray[p] + retArray[p+1] + retArray[p+2] +
|
||||
retArray[p+3];
|
||||
|
||||
// Normalize by number of samples taken
|
||||
sumret /= nsubsamples * nsubsamples;
|
||||
|
||||
// Store result in the image
|
||||
image[offset+0] = sumret;
|
||||
image[offset+1] = sumret;
|
||||
image[offset+2] = sumret;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export void ao_ispc(uniform int w, uniform int h, uniform int nsubsamples,
|
||||
uniform float image[]) {
|
||||
ao_scanlines(0, h, w, h, nsubsamples, image);
|
||||
}
|
||||
314
examples/aobench/ao_serial.cpp
Normal file
314
examples/aobench/ao_serial.cpp
Normal file
@@ -0,0 +1,314 @@
|
||||
// -*- mode: c++ -*-
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/*
|
||||
Based on Syoyo Fujita's aobench: http://code.google.com/p/aobench
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define NOMINMAX
|
||||
#pragma warning (disable: 4244)
|
||||
#pragma warning (disable: 4305)
|
||||
#endif
|
||||
|
||||
#include <stdlib.h>
|
||||
#include <math.h>
|
||||
|
||||
#ifdef _MSC_VER
|
||||
static long long drand48_x = 0x1234ABCD330E;
|
||||
|
||||
static inline void srand48(int x) {
|
||||
drand48_x = x ^ (x << 16);
|
||||
}
|
||||
|
||||
static inline double drand48() {
|
||||
drand48_x = drand48_x * 0x5DEECE66D + 0xB;
|
||||
return (drand48_x & 0xFFFFFFFFFFFF) * (1.0 / 281474976710656.0);
|
||||
}
|
||||
#endif // _MSC_VER
|
||||
|
||||
#ifdef _MSC_VER
|
||||
__declspec(align(16))
|
||||
#endif
|
||||
struct vec {
|
||||
vec() { x=y=z=pad=0.; }
|
||||
vec(float xx, float yy, float zz) { x = xx; y = yy; z = zz; }
|
||||
|
||||
vec operator*(float f) const { return vec(x*f, y*f, z*f); }
|
||||
vec operator+(const vec &f2) const {
|
||||
return vec(x+f2.x, y+f2.y, z+f2.z);
|
||||
}
|
||||
vec operator-(const vec &f2) const {
|
||||
return vec(x-f2.x, y-f2.y, z-f2.z);
|
||||
}
|
||||
vec operator*(const vec &f2) const {
|
||||
return vec(x*f2.x, y*f2.y, z*f2.z);
|
||||
}
|
||||
float x, y, z;
|
||||
float pad;
|
||||
}
|
||||
#ifndef _MSC_VER
|
||||
__attribute__ ((aligned(16)))
|
||||
#endif
|
||||
;
|
||||
inline vec operator*(float f, const vec &v) { return vec(f*v.x, f*v.y, f*v.z); }
|
||||
|
||||
|
||||
#define NAO_SAMPLES 8
|
||||
|
||||
#ifdef M_PI
|
||||
#undef M_PI
|
||||
#endif
|
||||
#define M_PI 3.1415926535f
|
||||
|
||||
struct Isect {
|
||||
float t;
|
||||
vec p;
|
||||
vec n;
|
||||
int hit;
|
||||
};
|
||||
|
||||
struct Sphere {
|
||||
vec center;
|
||||
float radius;
|
||||
|
||||
};
|
||||
|
||||
struct Plane {
|
||||
vec p;
|
||||
vec n;
|
||||
};
|
||||
|
||||
struct Ray {
|
||||
vec org;
|
||||
vec dir;
|
||||
};
|
||||
|
||||
static inline float dot(const vec &a, const vec &b) {
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
static inline vec vcross(const vec &v0, const vec &v1) {
|
||||
vec ret;
|
||||
ret.x = v0.y * v1.z - v0.z * v1.y;
|
||||
ret.y = v0.z * v1.x - v0.x * v1.z;
|
||||
ret.z = v0.x * v1.y - v0.y * v1.x;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void vnormalize(vec &v) {
|
||||
float len2 = dot(v, v);
|
||||
float invlen = 1.f / sqrtf(len2);
|
||||
v = v * invlen;
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
ray_plane_intersect(Isect &isect, Ray &ray,
|
||||
Plane &plane) {
|
||||
float d = -dot(plane.p, plane.n);
|
||||
float v = dot(ray.dir, plane.n);
|
||||
|
||||
if (fabsf(v) < 1.0e-17)
|
||||
return;
|
||||
else {
|
||||
float t = -(dot(ray.org, plane.n) + d) / v;
|
||||
|
||||
if ((t > 0.0) && (t < isect.t)) {
|
||||
isect.t = t;
|
||||
isect.hit = 1;
|
||||
isect.p = ray.org + ray.dir * t;
|
||||
isect.n = plane.n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
ray_sphere_intersect(Isect &isect, Ray &ray,
|
||||
Sphere &sphere) {
|
||||
vec rs = ray.org - sphere.center;
|
||||
|
||||
float B = dot(rs, ray.dir);
|
||||
float C = dot(rs, rs) - sphere.radius * sphere.radius;
|
||||
float D = B * B - C;
|
||||
|
||||
if (D > 0.) {
|
||||
float t = -B - sqrtf(D);
|
||||
|
||||
if ((t > 0.0) && (t < isect.t)) {
|
||||
isect.t = t;
|
||||
isect.hit = 1;
|
||||
isect.p = ray.org + t * ray.dir;
|
||||
isect.n = isect.p - sphere.center;
|
||||
vnormalize(isect.n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
orthoBasis(vec basis[3], const vec &n) {
|
||||
basis[2] = n;
|
||||
basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0;
|
||||
|
||||
if ((n.x < 0.6) && (n.x > -0.6)) {
|
||||
basis[1].x = 1.0;
|
||||
} else if ((n.y < 0.6) && (n.y > -0.6)) {
|
||||
basis[1].y = 1.0;
|
||||
} else if ((n.z < 0.6) && (n.z > -0.6)) {
|
||||
basis[1].z = 1.0;
|
||||
} else {
|
||||
basis[1].x = 1.0;
|
||||
}
|
||||
|
||||
basis[0] = vcross(basis[1], basis[2]);
|
||||
vnormalize(basis[0]);
|
||||
|
||||
basis[1] = vcross(basis[2], basis[0]);
|
||||
vnormalize(basis[1]);
|
||||
}
|
||||
|
||||
|
||||
static float
|
||||
ambient_occlusion(Isect &isect, Plane &plane,
|
||||
Sphere spheres[3]) {
|
||||
float eps = 0.0001f;
|
||||
vec p, n;
|
||||
vec basis[3];
|
||||
float occlusion = 0.0;
|
||||
|
||||
p = isect.p + eps * isect.n;
|
||||
|
||||
orthoBasis(basis, isect.n);
|
||||
|
||||
static const int ntheta = NAO_SAMPLES;
|
||||
static const int nphi = NAO_SAMPLES;
|
||||
for (int j = 0; j < ntheta; j++) {
|
||||
for (int i = 0; i < nphi; i++) {
|
||||
Ray ray;
|
||||
Isect occIsect;
|
||||
|
||||
float theta = sqrtf(drand48());
|
||||
float phi = 2.0f * M_PI * drand48();
|
||||
float x = cosf(phi) * theta;
|
||||
float y = sinf(phi) * theta;
|
||||
float z = sqrtf(1.0 - theta * theta);
|
||||
|
||||
// local . global
|
||||
float rx = x * basis[0].x + y * basis[1].x + z * basis[2].x;
|
||||
float ry = x * basis[0].y + y * basis[1].y + z * basis[2].y;
|
||||
float rz = x * basis[0].z + y * basis[1].z + z * basis[2].z;
|
||||
|
||||
ray.org = p;
|
||||
ray.dir.x = rx;
|
||||
ray.dir.y = ry;
|
||||
ray.dir.z = rz;
|
||||
|
||||
occIsect.t = 1.0e+17;
|
||||
occIsect.hit = 0;
|
||||
|
||||
for (int snum = 0; snum < 3; ++snum)
|
||||
ray_sphere_intersect(occIsect, ray, spheres[snum]);
|
||||
ray_plane_intersect (occIsect, ray, plane);
|
||||
|
||||
if (occIsect.hit) occlusion += 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
occlusion = (ntheta * nphi - occlusion) / (float)(ntheta * nphi);
|
||||
return occlusion;
|
||||
}
|
||||
|
||||
|
||||
/* Compute the image for the scanlines from [y0,y1), for an overall image
|
||||
of width w and height h.
|
||||
*/
|
||||
static void ao_scanlines(int y0, int y1, int w, int h, int nsubsamples,
|
||||
float image[]) {
|
||||
static Plane plane = { vec(0.0f, -0.5f, 0.0f), vec(0.f, 1.f, 0.f) };
|
||||
static Sphere spheres[3] = {
|
||||
{ vec(-2.0f, 0.0f, -3.5f), 0.5f },
|
||||
{ vec(-0.5f, 0.0f, -3.0f), 0.5f },
|
||||
{ vec(1.0f, 0.0f, -2.2f), 0.5f } };
|
||||
|
||||
srand48(y0);
|
||||
|
||||
for (int y = y0; y < y1; ++y) {
|
||||
for (int x = 0; x < w; ++x) {
|
||||
int offset = 3 * (y * w + x);
|
||||
for (int u = 0; u < nsubsamples; ++u) {
|
||||
for (int v = 0; v < nsubsamples; ++v) {
|
||||
float px = (x + (u / (float)nsubsamples) - (w / 2.0f)) / (w / 2.0f);
|
||||
float py = -(y + (v / (float)nsubsamples) - (h / 2.0f)) / (h / 2.0f);
|
||||
float ret = 0.f;
|
||||
Ray ray;
|
||||
Isect isect;
|
||||
|
||||
ray.org = vec(0.f, 0.f, 0.f);
|
||||
|
||||
ray.dir.x = px;
|
||||
ray.dir.y = py;
|
||||
ray.dir.z = -1.0;
|
||||
vnormalize(ray.dir);
|
||||
|
||||
isect.t = 1.0e+17;
|
||||
isect.hit = 0;
|
||||
|
||||
for (int snum = 0; snum < 3; ++snum)
|
||||
ray_sphere_intersect(isect, ray, spheres[snum]);
|
||||
ray_plane_intersect(isect, ray, plane);
|
||||
|
||||
if (isect.hit)
|
||||
ret = ambient_occlusion(isect, plane, spheres);
|
||||
|
||||
// Update image for AO for this ray
|
||||
image[offset+0] += ret;
|
||||
image[offset+1] += ret;
|
||||
image[offset+2] += ret;
|
||||
}
|
||||
}
|
||||
// Normalize image pixels by number of samples taken per pixel
|
||||
image[offset+0] /= nsubsamples * nsubsamples;
|
||||
image[offset+1] /= nsubsamples * nsubsamples;
|
||||
image[offset+2] /= nsubsamples * nsubsamples;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void ao_serial(int w, int h, int nsubsamples,
|
||||
float image[]) {
|
||||
ao_scanlines(0, h, w, h, nsubsamples, image);
|
||||
}
|
||||
161
examples/aobench/aobench.vcxproj
Executable file
161
examples/aobench/aobench.vcxproj
Executable file
@@ -0,0 +1,161 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ao.cpp" />
|
||||
<ClCompile Include="ao_serial.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="ao.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Filename).obj</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Filename).obj</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Filename).obj</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Filename).obj</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>aobench</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
26
examples/aobench_instrumented/Makefile
Normal file
26
examples/aobench_instrumented/Makefile
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -g3 -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --fast-math --instrument
|
||||
|
||||
default: ao
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ ao
|
||||
|
||||
ao: dirs objs/ao.o objs/instrument.o objs/ao_ispc.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ objs/ao.o objs/ao_ispc.o objs/instrument.o -lm -lpthread
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/ao.o: objs/ao_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
148
examples/aobench_instrumented/ao.cpp
Normal file
148
examples/aobench_instrumented/ao.cpp
Normal file
@@ -0,0 +1,148 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define NOMINMAX
|
||||
#pragma warning (disable: 4244)
|
||||
#pragma warning (disable: 4305)
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#ifdef __linux__
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
#include <math.h>
|
||||
#include <map>
|
||||
#include <string>
|
||||
#include <algorithm>
|
||||
#include <sys/types.h>
|
||||
|
||||
#include "ao_ispc.h"
|
||||
using namespace ispc;
|
||||
|
||||
#include "instrument.h"
|
||||
#include "../timing.h"
|
||||
|
||||
#define NSUBSAMPLES 2
|
||||
|
||||
static unsigned int test_iterations;
|
||||
static unsigned int width, height;
|
||||
static unsigned char *img;
|
||||
static float *fimg;
|
||||
|
||||
|
||||
static unsigned char
|
||||
clamp(float f)
|
||||
{
|
||||
int i = (int)(f * 255.5);
|
||||
|
||||
if (i < 0) i = 0;
|
||||
if (i > 255) i = 255;
|
||||
|
||||
return (unsigned char)i;
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
savePPM(const char *fname, int w, int h)
|
||||
{
|
||||
for (int y = 0; y < h; y++) {
|
||||
for (int x = 0; x < w; x++) {
|
||||
img[3 * (y * w + x) + 0] = clamp(fimg[3 *(y * w + x) + 0]);
|
||||
img[3 * (y * w + x) + 1] = clamp(fimg[3 *(y * w + x) + 1]);
|
||||
img[3 * (y * w + x) + 2] = clamp(fimg[3 *(y * w + x) + 2]);
|
||||
}
|
||||
}
|
||||
|
||||
FILE *fp = fopen(fname, "wb");
|
||||
if (!fp) {
|
||||
perror(fname);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fprintf(fp, "P6\n");
|
||||
fprintf(fp, "%d %d\n", w, h);
|
||||
fprintf(fp, "255\n");
|
||||
fwrite(img, w * h * 3, 1, fp);
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
|
||||
// Allocate memory with 64-byte alignment.
|
||||
float *
|
||||
AllocAligned(int size) {
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
return (float *)_aligned_malloc(size, 64);
|
||||
#elif defined (__APPLE__)
|
||||
// Allocate excess memory to ensure an aligned pointer can be returned
|
||||
void *mem = malloc(size + (64-1) + sizeof(void*));
|
||||
char *amem = ((char*)mem) + sizeof(void*);
|
||||
amem += 64 - (reinterpret_cast<uint64_t>(amem) & (64 - 1));
|
||||
((void**)amem)[-1] = mem;
|
||||
return (float *)amem;
|
||||
#else
|
||||
return (float *)memalign(64, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char **argv)
|
||||
{
|
||||
if (argc != 4) {
|
||||
printf ("%s\n", argv[0]);
|
||||
printf ("Usage: ao [num test iterations] [width] [height]\n");
|
||||
getchar();
|
||||
exit(-1);
|
||||
}
|
||||
else {
|
||||
test_iterations = atoi(argv[1]);
|
||||
width = atoi (argv[2]);
|
||||
height = atoi (argv[3]);
|
||||
}
|
||||
|
||||
// Allocate space for output images
|
||||
img = (unsigned char *)AllocAligned(width * height * 3);
|
||||
fimg = (float *)AllocAligned(sizeof(float) * width * height * 3);
|
||||
|
||||
ao_ispc(width, height, NSUBSAMPLES, fimg);
|
||||
|
||||
savePPM("ao-ispc.ppm", width, height);
|
||||
|
||||
ISPCPrintInstrument();
|
||||
|
||||
return 0;
|
||||
}
|
||||
317
examples/aobench_instrumented/ao.ispc
Normal file
317
examples/aobench_instrumented/ao.ispc
Normal file
@@ -0,0 +1,317 @@
|
||||
// -*- mode: c++ -*-
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
/*
|
||||
Based on Syoyo Fujita's aobench: http://code.google.com/p/aobench
|
||||
*/
|
||||
|
||||
#define NAO_SAMPLES 8
|
||||
#define M_PI 3.1415926535f
|
||||
|
||||
typedef float<3> vec;
|
||||
|
||||
struct Isect {
|
||||
float t;
|
||||
vec p;
|
||||
vec n;
|
||||
int hit;
|
||||
};
|
||||
|
||||
struct Sphere {
|
||||
vec center;
|
||||
float radius;
|
||||
|
||||
};
|
||||
|
||||
struct Plane {
|
||||
vec p;
|
||||
vec n;
|
||||
};
|
||||
|
||||
struct Ray {
|
||||
vec org;
|
||||
vec dir;
|
||||
};
|
||||
|
||||
static inline float dot(vec a, vec b) {
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
static inline vec vcross(vec v0, vec v1) {
|
||||
vec ret;
|
||||
ret.x = v0.y * v1.z - v0.z * v1.y;
|
||||
ret.y = v0.z * v1.x - v0.x * v1.z;
|
||||
ret.z = v0.x * v1.y - v0.y * v1.x;
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline void vnormalize(reference vec v) {
|
||||
float len2 = dot(v, v);
|
||||
float invlen = rsqrt(len2);
|
||||
v *= invlen;
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
ray_plane_intersect(reference Isect isect, reference Ray ray,
|
||||
reference Plane plane) {
|
||||
float d = -dot(plane.p, plane.n);
|
||||
float v = dot(ray.dir, plane.n);
|
||||
|
||||
cif (abs(v) < 1.0e-17)
|
||||
return;
|
||||
else {
|
||||
float t = -(dot(ray.org, plane.n) + d) / v;
|
||||
|
||||
cif ((t > 0.0) && (t < isect.t)) {
|
||||
isect.t = t;
|
||||
isect.hit = 1;
|
||||
isect.p = ray.org + ray.dir * t;
|
||||
isect.n = plane.n;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
ray_sphere_intersect(reference Isect isect, reference Ray ray,
|
||||
reference Sphere sphere) {
|
||||
vec rs = ray.org - sphere.center;
|
||||
|
||||
float B = dot(rs, ray.dir);
|
||||
float C = dot(rs, rs) - sphere.radius * sphere.radius;
|
||||
float D = B * B - C;
|
||||
|
||||
cif (D > 0.) {
|
||||
float t = -B - sqrt(D);
|
||||
|
||||
cif ((t > 0.0) && (t < isect.t)) {
|
||||
isect.t = t;
|
||||
isect.hit = 1;
|
||||
isect.p = ray.org + t * ray.dir;
|
||||
isect.n = isect.p - sphere.center;
|
||||
vnormalize(isect.n);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static inline void
|
||||
orthoBasis(reference vec basis[3], vec n) {
|
||||
basis[2] = n;
|
||||
basis[1].x = 0.0; basis[1].y = 0.0; basis[1].z = 0.0;
|
||||
|
||||
if ((n.x < 0.6) && (n.x > -0.6)) {
|
||||
basis[1].x = 1.0;
|
||||
} else if ((n.y < 0.6) && (n.y > -0.6)) {
|
||||
basis[1].y = 1.0;
|
||||
} else if ((n.z < 0.6) && (n.z > -0.6)) {
|
||||
basis[1].z = 1.0;
|
||||
} else {
|
||||
basis[1].x = 1.0;
|
||||
}
|
||||
|
||||
basis[0] = vcross(basis[1], basis[2]);
|
||||
vnormalize(basis[0]);
|
||||
|
||||
basis[1] = vcross(basis[2], basis[0]);
|
||||
vnormalize(basis[1]);
|
||||
}
|
||||
|
||||
|
||||
static inline float
|
||||
ambient_occlusion(reference Isect isect, reference Plane plane,
|
||||
reference Sphere spheres[3], reference RNGState rngstate) {
|
||||
float eps = 0.0001f;
|
||||
vec p, n;
|
||||
vec basis[3];
|
||||
float occlusion = 0.0;
|
||||
|
||||
p = isect.p + eps * isect.n;
|
||||
|
||||
orthoBasis(basis, isect.n);
|
||||
|
||||
static const uniform int ntheta = NAO_SAMPLES;
|
||||
static const uniform int nphi = NAO_SAMPLES;
|
||||
for (uniform int j = 0; j < ntheta; j++) {
|
||||
for (uniform int i = 0; i < nphi; i++) {
|
||||
Ray ray;
|
||||
Isect occIsect;
|
||||
|
||||
float theta = sqrt(frandom(rngstate));
|
||||
float phi = 2.0f * M_PI * frandom(rngstate);
|
||||
float x = cos(phi) * theta;
|
||||
float y = sin(phi) * theta;
|
||||
float z = sqrt(1.0 - theta * theta);
|
||||
|
||||
// local . global
|
||||
float rx = x * basis[0].x + y * basis[1].x + z * basis[2].x;
|
||||
float ry = x * basis[0].y + y * basis[1].y + z * basis[2].y;
|
||||
float rz = x * basis[0].z + y * basis[1].z + z * basis[2].z;
|
||||
|
||||
ray.org = p;
|
||||
ray.dir.x = rx;
|
||||
ray.dir.y = ry;
|
||||
ray.dir.z = rz;
|
||||
|
||||
occIsect.t = 1.0e+17;
|
||||
occIsect.hit = 0;
|
||||
|
||||
for (uniform int snum = 0; snum < 3; ++snum)
|
||||
ray_sphere_intersect(occIsect, ray, spheres[snum]);
|
||||
ray_plane_intersect (occIsect, ray, plane);
|
||||
|
||||
if (occIsect.hit) occlusion += 1.0;
|
||||
}
|
||||
}
|
||||
|
||||
occlusion = (ntheta * nphi - occlusion) / (float)(ntheta * nphi);
|
||||
return occlusion;
|
||||
}
|
||||
|
||||
|
||||
/* Compute the image for the scanlines from [y0,y1), for an overall image
|
||||
of width w and height h.
|
||||
*/
|
||||
void ao_scanlines(uniform int y0, uniform int y1, uniform int w, uniform int h,
|
||||
uniform int nsubsamples, reference uniform float image[]) {
|
||||
static Plane plane = { { 0.0f, -0.5f, 0.0f }, { 0.f, 1.f, 0.f } };
|
||||
static Sphere spheres[3] = {
|
||||
{ { -2.0f, 0.0f, -3.5f }, 0.5f },
|
||||
{ { -0.5f, 0.0f, -3.0f }, 0.5f },
|
||||
{ { 1.0f, 0.0f, -2.2f }, 0.5f } };
|
||||
RNGState rngstate;
|
||||
|
||||
seed_rng(rngstate, y0);
|
||||
|
||||
// Compute the mapping between the 'programCount'-wide program
|
||||
// instances running in parallel and samples in the image.
|
||||
//
|
||||
// For now, we'll always take four samples per pixel, so start by
|
||||
// initializing du and dv with offsets into subpixel samples. We'll
|
||||
// take care of further updating du and dv for the case where we're
|
||||
// doing more than 4 program instances in parallel shortly.
|
||||
uniform float uSteps[4] = { 0, 1, 0, 1 };
|
||||
uniform float vSteps[4] = { 0, 0, 1, 1 };
|
||||
float du = uSteps[programIndex % 4] / nsubsamples;
|
||||
float dv = vSteps[programIndex % 4] / nsubsamples;
|
||||
|
||||
// Now handle the case where we are able to do more than one pixel's
|
||||
// worth of work at once. nx records the number of pixels in the x
|
||||
// direction we do per iteration and ny the number in y.
|
||||
uniform int nx = 1, ny = 1;
|
||||
|
||||
if (programCount == 8) {
|
||||
// Do two pixels at once in the x direction
|
||||
nx = 2;
|
||||
if (programIndex >= 4)
|
||||
// And shift the offsets for the second pixel's worth of work
|
||||
++du;
|
||||
}
|
||||
else if (programCount == 16) {
|
||||
// Two at once in both x and y
|
||||
nx = ny = 2;
|
||||
if ((programIndex >= 4 && programIndex < 8) || programIndex >= 12)
|
||||
++du;
|
||||
if (programIndex >= 8)
|
||||
++dv;
|
||||
}
|
||||
|
||||
// Now loop over all of the pixels, stepping in x and y as calculated
|
||||
// above. (Assumes that ny divides y and nx divides x...)
|
||||
for (uniform int y = y0; y < y1; y += ny) {
|
||||
for (uniform int x = 0; x < w; x += nx) {
|
||||
// Figur out x,y pixel in NDC
|
||||
float px = (x + du - (w / 2.0f)) / (w / 2.0f);
|
||||
float py = -(y + dv - (h / 2.0f)) / (h / 2.0f);
|
||||
float ret = 0.f;
|
||||
Ray ray;
|
||||
Isect isect;
|
||||
|
||||
ray.org = 0.f;
|
||||
|
||||
// Poor man's perspective projection
|
||||
ray.dir.x = px;
|
||||
ray.dir.y = py;
|
||||
ray.dir.z = -1.0;
|
||||
vnormalize(ray.dir);
|
||||
|
||||
isect.t = 1.0e+17;
|
||||
isect.hit = 0;
|
||||
|
||||
for (uniform int snum = 0; snum < 3; ++snum)
|
||||
ray_sphere_intersect(isect, ray, spheres[snum]);
|
||||
ray_plane_intersect(isect, ray, plane);
|
||||
|
||||
// Note use of 'coherent' if statement; the set of rays we
|
||||
// trace will often all hit or all miss the scene
|
||||
cif (isect.hit)
|
||||
ret = ambient_occlusion(isect, plane, spheres, rngstate);
|
||||
|
||||
// This is a little grungy; we have results for
|
||||
// programCount-worth of values. Because we're doing 2x2
|
||||
// subsamples, we need to peel them off in groups of four,
|
||||
// average the four values for each pixel, and update the
|
||||
// output image.
|
||||
//
|
||||
// Store the varying value to a uniform array of the same size.
|
||||
// See the discussion about communication among program
|
||||
// instances in the ispc user's manual for more discussion on
|
||||
// this idiom.
|
||||
uniform float retArray[programCount];
|
||||
retArray[programIndex] = ret;
|
||||
|
||||
// offset to the first pixel in the image
|
||||
uniform int offset = 3 * (y * w + x);
|
||||
for (uniform int p = 0; p < programCount; p += 4, ++offset) {
|
||||
// Get the four sample values for this pixel
|
||||
uniform float sumret = retArray[p] + retArray[p+1] + retArray[p+2] +
|
||||
retArray[p+3];
|
||||
|
||||
// Normalize by number of samples taken
|
||||
sumret /= nsubsamples * nsubsamples;
|
||||
|
||||
// Store result in the image
|
||||
image[offset+0] = sumret;
|
||||
image[offset+1] = sumret;
|
||||
image[offset+2] = sumret;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export void ao_ispc(uniform int w, uniform int h, uniform int nsubsamples,
|
||||
uniform float image[]) {
|
||||
ao_scanlines(0, h, w, h, nsubsamples, image);
|
||||
}
|
||||
161
examples/aobench_instrumented/aobench_instrumented.vcxproj
Executable file
161
examples/aobench_instrumented/aobench_instrumented.vcxproj
Executable file
@@ -0,0 +1,161 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ao.cpp" />
|
||||
<ClCompile Include="instrument.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="ao.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --instrument
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --instrument
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Filename).obj</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Filename).obj</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --instrument
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --instrument
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Filename).obj</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Filename).obj</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>aobench_instrumented</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
94
examples/aobench_instrumented/instrument.cpp
Normal file
94
examples/aobench_instrumented/instrument.cpp
Normal file
@@ -0,0 +1,94 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "instrument.h"
|
||||
#include <stdio.h>
|
||||
#include <assert.h>
|
||||
#include <string>
|
||||
#include <map>
|
||||
|
||||
struct CallInfo {
|
||||
CallInfo() { count = laneCount = allOff = 0; }
|
||||
int count;
|
||||
int laneCount;
|
||||
int allOff;
|
||||
};
|
||||
|
||||
static std::map<std::string, CallInfo> callInfo;
|
||||
|
||||
int countbits(int i) {
|
||||
int ret = 0;
|
||||
while (i) {
|
||||
if (i & 0x1)
|
||||
++ret;
|
||||
i >>= 1;
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
|
||||
// Callback function that ispc compiler emits calls to when --instrument
|
||||
// command-line flag is given while compiling.
|
||||
void
|
||||
ISPCInstrument(const char *fn, const char *note, int line, int mask) {
|
||||
char sline[16];
|
||||
sprintf(sline, "%04d", line);
|
||||
std::string s = std::string(fn) + std::string("(") + std::string(sline) +
|
||||
std::string(") - ") + std::string(note);
|
||||
|
||||
// Find or create a CallInfo instance for this callsite.
|
||||
CallInfo &ci = callInfo[s];
|
||||
|
||||
// And update its statistics...
|
||||
++ci.count;
|
||||
if (mask == 0)
|
||||
++ci.allOff;
|
||||
ci.laneCount += countbits(mask);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ISPCPrintInstrument() {
|
||||
// When program execution is done, go through the stats and print them
|
||||
// out. (This function is called by ao.cpp).
|
||||
std::map<std::string, CallInfo>::iterator citer = callInfo.begin();
|
||||
while (citer != callInfo.end()) {
|
||||
CallInfo &ci = citer->second;
|
||||
float activePct = 100.f * ci.laneCount / (4.f * ci.count);
|
||||
float allOffPct = 100.f * ci.allOff / ci.count;
|
||||
printf("%s: %d calls (%d / %.2f%% all off!), %.2f%% active lanes\n",
|
||||
citer->first.c_str(), ci.count, ci.allOff, allOffPct,
|
||||
activePct);
|
||||
++citer;
|
||||
}
|
||||
}
|
||||
45
examples/aobench_instrumented/instrument.h
Normal file
45
examples/aobench_instrumented/instrument.h
Normal file
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef INSTRUMENT_H
|
||||
#define INSTRUMENT_H 1
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
extern "C" {
|
||||
void ISPCInstrument(const char *fn, const char *note, int line, int mask);
|
||||
}
|
||||
|
||||
void ISPCPrintInstrument();
|
||||
|
||||
#endif // INSTRUMENT_H
|
||||
86
examples/examples.sln
Executable file
86
examples/examples.sln
Executable file
@@ -0,0 +1,86 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 11.00
|
||||
# Visual Studio 2010
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "simple", "simple\simple.vcxproj", "{947C5311-8B78-4D05-BEE4-BCF342D4B367}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "rt", "rt\rt.vcxproj", "{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "aobench", "aobench\aobench.vcxproj", "{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mandelbrot", "mandelbrot\mandelbrot.vcxproj", "{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "options", "options\options.vcxproj", "{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "mandelbrot_tasks", "mandelbrot_tasks\mandelbrot_tasks.vcxproj", "{E80DA7D4-AB22-4648-A068-327307156BE6}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "aobench_instrumented", "aobench_instrumented\aobench_instrumented.vcxproj", "{B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
Debug|x64 = Debug|x64
|
||||
Release|Win32 = Release|Win32
|
||||
Release|x64 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{947C5311-8B78-4D05-BEE4-BCF342D4B367}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{947C5311-8B78-4D05-BEE4-BCF342D4B367}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{947C5311-8B78-4D05-BEE4-BCF342D4B367}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{947C5311-8B78-4D05-BEE4-BCF342D4B367}.Debug|x64.Build.0 = Debug|x64
|
||||
{947C5311-8B78-4D05-BEE4-BCF342D4B367}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{947C5311-8B78-4D05-BEE4-BCF342D4B367}.Release|Win32.Build.0 = Release|Win32
|
||||
{947C5311-8B78-4D05-BEE4-BCF342D4B367}.Release|x64.ActiveCfg = Release|x64
|
||||
{947C5311-8B78-4D05-BEE4-BCF342D4B367}.Release|x64.Build.0 = Release|x64
|
||||
{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}.Debug|x64.Build.0 = Debug|x64
|
||||
{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}.Release|Win32.Build.0 = Release|Win32
|
||||
{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}.Release|x64.ActiveCfg = Release|x64
|
||||
{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}.Release|x64.Build.0 = Release|x64
|
||||
{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}.Debug|x64.Build.0 = Debug|x64
|
||||
{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}.Release|Win32.Build.0 = Release|Win32
|
||||
{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}.Release|x64.ActiveCfg = Release|x64
|
||||
{F29204CA-19DF-4F3C-87D5-03F4EEDAAFEB}.Release|x64.Build.0 = Release|x64
|
||||
{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}.Debug|x64.Build.0 = Debug|x64
|
||||
{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}.Release|Win32.Build.0 = Release|Win32
|
||||
{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}.Release|x64.ActiveCfg = Release|x64
|
||||
{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}.Release|x64.Build.0 = Release|x64
|
||||
{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}.Debug|x64.Build.0 = Debug|x64
|
||||
{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}.Release|Win32.Build.0 = Release|Win32
|
||||
{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}.Release|x64.ActiveCfg = Release|x64
|
||||
{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}.Release|x64.Build.0 = Release|x64
|
||||
{E80DA7D4-AB22-4648-A068-327307156BE6}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{E80DA7D4-AB22-4648-A068-327307156BE6}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{E80DA7D4-AB22-4648-A068-327307156BE6}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{E80DA7D4-AB22-4648-A068-327307156BE6}.Debug|x64.Build.0 = Debug|x64
|
||||
{E80DA7D4-AB22-4648-A068-327307156BE6}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{E80DA7D4-AB22-4648-A068-327307156BE6}.Release|Win32.Build.0 = Release|Win32
|
||||
{E80DA7D4-AB22-4648-A068-327307156BE6}.Release|x64.ActiveCfg = Release|x64
|
||||
{E80DA7D4-AB22-4648-A068-327307156BE6}.Release|x64.Build.0 = Release|x64
|
||||
{B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}.Debug|x64.ActiveCfg = Debug|x64
|
||||
{B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}.Debug|x64.Build.0 = Debug|x64
|
||||
{B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}.Release|Win32.Build.0 = Release|Win32
|
||||
{B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}.Release|x64.ActiveCfg = Release|x64
|
||||
{B3B4AE3D-6D5A-4CF9-AF5B-43CF2131B958}.Release|x64.Build.0 = Release|x64
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
26
examples/mandelbrot/Makefile
Normal file
26
examples/mandelbrot/Makefile
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse4x2
|
||||
|
||||
default: mandelbrot
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ mandelbrot
|
||||
|
||||
mandelbrot: dirs objs/mandelbrot.o objs/mandelbrot_serial.o objs/mandelbrot_ispc.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ objs/mandelbrot.o objs/mandelbrot_ispc.o objs/mandelbrot_serial.o -lm
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/mandelbrot.o: objs/mandelbrot_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
117
examples/mandelbrot/mandelbrot.cpp
Normal file
117
examples/mandelbrot/mandelbrot.cpp
Normal file
@@ -0,0 +1,117 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define NOMINMAX
|
||||
#pragma warning (disable: 4244)
|
||||
#pragma warning (disable: 4305)
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
#include "../timing.h"
|
||||
#include "mandelbrot_ispc.h"
|
||||
using namespace ispc;
|
||||
|
||||
extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
||||
int width, int height, int maxIterations,
|
||||
int output[]);
|
||||
|
||||
/* Write a PPM image file with the image of the Mandelbrot set */
|
||||
static void
|
||||
writePPM(int *buf, int width, int height, const char *fn) {
|
||||
FILE *fp = fopen(fn, "wb");
|
||||
fprintf(fp, "P6\n");
|
||||
fprintf(fp, "%d %d\n", width, height);
|
||||
fprintf(fp, "255\n");
|
||||
for (int i = 0; i < width*height; ++i) {
|
||||
// Map the iteration count to colors by just alternating between
|
||||
// two greys.
|
||||
char c = (buf[i] & 0x1) ? 240 : 20;
|
||||
for (int j = 0; j < 3; ++j)
|
||||
fputc(c, fp);
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
|
||||
int main() {
|
||||
unsigned int width = 768;
|
||||
unsigned int height = 512;
|
||||
float x0 = -2;
|
||||
float x1 = 1;
|
||||
float y0 = -1;
|
||||
float y1 = 1;
|
||||
|
||||
int maxIterations = 256;
|
||||
int *buf = new int[width*height];
|
||||
|
||||
//
|
||||
// Compute the image using the ispc implementation; report the minimum
|
||||
// time of three runs.
|
||||
//
|
||||
double minISPC = 1e30;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
reset_and_start_timer();
|
||||
mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||
double dt = get_elapsed_mcycles();
|
||||
minISPC = std::min(minISPC, dt);
|
||||
}
|
||||
|
||||
printf("[mandelbrot ispc]:\t\t[%.3f] million cycles\n", minISPC);
|
||||
writePPM(buf, width, height, "mandelbrot-ispc.ppm");
|
||||
|
||||
// Clear out the buffer
|
||||
for (unsigned int i = 0; i < width * height; ++i)
|
||||
buf[i] = 0;
|
||||
|
||||
//
|
||||
// And run the serial implementation 3 times, again reporting the
|
||||
// minimum time.
|
||||
//
|
||||
double minSerial = 1e30;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
reset_and_start_timer();
|
||||
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||
double dt = get_elapsed_mcycles();
|
||||
minSerial = std::min(minSerial, dt);
|
||||
}
|
||||
|
||||
printf("[mandelbrot serial]:\t\t[%.3f] millon cycles\n", minSerial);
|
||||
writePPM(buf, width, height, "mandelbrot-serial.ppm");
|
||||
|
||||
printf("\t\t\t\t(%.2fx speedup from ISPC)\n", minSerial/minISPC);
|
||||
|
||||
return 0;
|
||||
}
|
||||
76
examples/mandelbrot/mandelbrot.ispc
Normal file
76
examples/mandelbrot/mandelbrot.ispc
Normal file
@@ -0,0 +1,76 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
static inline int mandel(float c_re, float c_im, int count) {
|
||||
float z_re = c_re, z_im = c_im;
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
if (z_re * z_re + z_im * z_im > 4.)
|
||||
break;
|
||||
|
||||
float new_re = z_re*z_re - z_im*z_im;
|
||||
float new_im = 2.f * z_re * z_im;
|
||||
z_re = c_re + new_re;
|
||||
z_im = c_im + new_im;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
export void mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform float x1, uniform float y1,
|
||||
uniform int width, uniform int height,
|
||||
uniform int maxIterations,
|
||||
reference uniform int output[])
|
||||
{
|
||||
float dx = (x1 - x0) / width;
|
||||
float dy = (y1 - y0) / height;
|
||||
|
||||
for (uniform int j = 0; j < height; j++) {
|
||||
// Note that we'll be doing programCount computations in parallel,
|
||||
// so increment i by that much. This assumes that width evenly
|
||||
// divides programCount.
|
||||
for (uniform int i = 0; i < width; i += programCount) {
|
||||
// Figure out the position on the complex plane to compute the
|
||||
// number of iterations at. Note that the x values are
|
||||
// different across different program instances, since its
|
||||
// initializer incorporates the value of the programIndex
|
||||
// variable.
|
||||
float x = x0 + (programIndex + i) * dx;
|
||||
float y = y0 + j * dy;
|
||||
|
||||
int index = j * width + i + programIndex;
|
||||
output[index] = mandel(x, y, maxIterations);
|
||||
}
|
||||
}
|
||||
}
|
||||
161
examples/mandelbrot/mandelbrot.vcxproj
Executable file
161
examples/mandelbrot/mandelbrot.vcxproj
Executable file
@@ -0,0 +1,161 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{6D3EF8C5-AE26-407B-9ECE-C27CB988D9C1}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>mandelbrot</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="mandelbrot.cpp" />
|
||||
<ClCompile Include="mandelbrot_serial.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="mandelbrot.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
68
examples/mandelbrot/mandelbrot_serial.cpp
Normal file
68
examples/mandelbrot/mandelbrot_serial.cpp
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
static int mandel(float c_re, float c_im, int count) {
|
||||
float z_re = c_re, z_im = c_im;
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
if (z_re * z_re + z_im * z_im > 4.)
|
||||
break;
|
||||
|
||||
float new_re = z_re*z_re - z_im*z_im;
|
||||
float new_im = 2.f * z_re * z_im;
|
||||
z_re = c_re + new_re;
|
||||
z_im = c_im + new_im;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
||||
int width, int height, int maxIterations,
|
||||
int output[])
|
||||
{
|
||||
float dx = (x1 - x0) / width;
|
||||
float dy = (y1 - y0) / height;
|
||||
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int i = 0; i < width; ++i) {
|
||||
float x = x0 + i * dx;
|
||||
float y = y0 + j * dy;
|
||||
|
||||
int index = (j * width + i);
|
||||
output[index] = mandel(x, y, maxIterations);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
38
examples/mandelbrot_tasks/Makefile
Normal file
38
examples/mandelbrot_tasks/Makefile
Normal file
@@ -0,0 +1,38 @@
|
||||
|
||||
ARCH = $(shell uname)
|
||||
|
||||
TASK_CXX=tasks_pthreads.cpp
|
||||
TASK_LIB=-lpthread
|
||||
|
||||
ifeq ($(ARCH), Darwin)
|
||||
TASK_CXX=tasks_gcd.cpp
|
||||
TASK_LIB=
|
||||
endif
|
||||
|
||||
TASK_OBJ=$(addprefix objs/, $(TASK_CXX:.cpp=.o))
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse4x2
|
||||
|
||||
default: mandelbrot
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ mandelbrot
|
||||
|
||||
mandelbrot: dirs objs/mandelbrot.o objs/mandelbrot_serial.o objs/mandelbrot_ispc.o $(TASK_OBJ)
|
||||
$(CXX) $(CXXFLAGS) -o $@ objs/mandelbrot.o objs/mandelbrot_ispc.o objs/mandelbrot_serial.o $(TASK_OBJ) -lm $(TASK_LIB)
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/mandelbrot.o: objs/mandelbrot_ispc.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
120
examples/mandelbrot_tasks/mandelbrot.cpp
Normal file
120
examples/mandelbrot_tasks/mandelbrot.cpp
Normal file
@@ -0,0 +1,120 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define NOMINMAX
|
||||
#pragma warning (disable: 4244)
|
||||
#pragma warning (disable: 4305)
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <algorithm>
|
||||
#include "../timing.h"
|
||||
#include "mandelbrot_ispc.h"
|
||||
using namespace ispc;
|
||||
|
||||
extern void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
||||
int width, int height, int maxIterations,
|
||||
int output[]);
|
||||
|
||||
/* Write a PPM image file with the image of the Mandelbrot set */
|
||||
static void
|
||||
writePPM(int *buf, int width, int height, const char *fn) {
|
||||
FILE *fp = fopen(fn, "wb");
|
||||
fprintf(fp, "P6\n");
|
||||
fprintf(fp, "%d %d\n", width, height);
|
||||
fprintf(fp, "255\n");
|
||||
for (int i = 0; i < width*height; ++i) {
|
||||
// Map the iteration count to colors by just alternating between
|
||||
// two greys.
|
||||
char c = (buf[i] & 0x1) ? 240 : 20;
|
||||
for (int j = 0; j < 3; ++j)
|
||||
fputc(c, fp);
|
||||
}
|
||||
fclose(fp);
|
||||
}
|
||||
|
||||
|
||||
int main() {
|
||||
unsigned int width = 1536;
|
||||
unsigned int height = 1024;
|
||||
float x0 = -2;
|
||||
float x1 = 1;
|
||||
float y0 = -1;
|
||||
float y1 = 1;
|
||||
|
||||
extern void TasksInit();
|
||||
TasksInit();
|
||||
|
||||
int maxIterations = 512;
|
||||
int *buf = new int[width*height];
|
||||
|
||||
//
|
||||
// Compute the image using the ispc implementation; report the minimum
|
||||
// time of three runs.
|
||||
//
|
||||
double minISPC = 1e30;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
reset_and_start_timer();
|
||||
mandelbrot_ispc(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||
double dt = get_elapsed_mcycles();
|
||||
minISPC = std::min(minISPC, dt);
|
||||
}
|
||||
|
||||
printf("[mandelbrot ispc+tasks]:\t[%.3f] million cycles\n", minISPC);
|
||||
writePPM(buf, width, height, "mandelbrot-ispc.ppm");
|
||||
|
||||
// Clear out the buffer
|
||||
for (unsigned int i = 0; i < width * height; ++i)
|
||||
buf[i] = 0;
|
||||
|
||||
//
|
||||
// And run the serial implementation 3 times, again reporting the
|
||||
// minimum time.
|
||||
//
|
||||
double minSerial = 1e30;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
reset_and_start_timer();
|
||||
mandelbrot_serial(x0, y0, x1, y1, width, height, maxIterations, buf);
|
||||
double dt = get_elapsed_mcycles();
|
||||
minSerial = std::min(minSerial, dt);
|
||||
}
|
||||
|
||||
printf("[mandelbrot serial]:\t\t[%.3f] millon cycles\n", minSerial);
|
||||
writePPM(buf, width, height, "mandelbrot-serial.ppm");
|
||||
|
||||
printf("\t\t\t\t(%.2fx speedup from ISPC)\n", minSerial/minISPC);
|
||||
|
||||
return 0;
|
||||
}
|
||||
86
examples/mandelbrot_tasks/mandelbrot.ispc
Normal file
86
examples/mandelbrot_tasks/mandelbrot.ispc
Normal file
@@ -0,0 +1,86 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
static inline int
|
||||
mandel(float c_re, float c_im, int count) {
|
||||
float z_re = c_re, z_im = c_im;
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
if (z_re * z_re + z_im * z_im > 4.)
|
||||
break;
|
||||
|
||||
float new_re = z_re*z_re - z_im*z_im;
|
||||
float new_im = 2.f * z_re * z_im;
|
||||
z_re = c_re + new_re;
|
||||
z_im = c_im + new_im;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
|
||||
/* Task to compute the Mandelbrot iterations for a span of scanlines from
|
||||
[ystart,yend).
|
||||
*/
|
||||
task void
|
||||
mandelbrot_scanlines(uniform int ystart, uniform int yend,
|
||||
uniform float x0, uniform float dx,
|
||||
uniform float y0, uniform float dy,
|
||||
uniform int width, uniform int maxIterations,
|
||||
reference uniform int output[]) {
|
||||
for (uniform int j = ystart; j < yend; ++j) {
|
||||
for (uniform int i = 0; i < width; i += programCount) {
|
||||
float x = x0 + (programIndex + i) * dx;
|
||||
float y = y0 + j * dy;
|
||||
|
||||
int index = j * width + i + programIndex;
|
||||
output[index] = mandel(x, y, maxIterations);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export void
|
||||
mandelbrot_ispc(uniform float x0, uniform float y0,
|
||||
uniform float x1, uniform float y1,
|
||||
uniform int width, uniform int height,
|
||||
uniform int maxIterations, reference uniform int output[]) {
|
||||
uniform float dx = (x1 - x0) / width;
|
||||
uniform float dy = (y1 - y0) / height;
|
||||
|
||||
/* Launch task to compute results for spans of 'span' scanlines. */
|
||||
uniform int span = 2;
|
||||
for (uniform int j = 0; j < height; j += span)
|
||||
launch < mandelbrot_scanlines(j, j+span, x0, dx, y0, dy, width,
|
||||
maxIterations, output) >;
|
||||
}
|
||||
68
examples/mandelbrot_tasks/mandelbrot_serial.cpp
Normal file
68
examples/mandelbrot_tasks/mandelbrot_serial.cpp
Normal file
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
static int mandel(float c_re, float c_im, int count) {
|
||||
float z_re = c_re, z_im = c_im;
|
||||
int i;
|
||||
for (i = 0; i < count; ++i) {
|
||||
if (z_re * z_re + z_im * z_im > 4.)
|
||||
break;
|
||||
|
||||
float new_re = z_re*z_re - z_im*z_im;
|
||||
float new_im = 2.f * z_re * z_im;
|
||||
z_re = c_re + new_re;
|
||||
z_im = c_im + new_im;
|
||||
}
|
||||
|
||||
return i;
|
||||
}
|
||||
|
||||
void mandelbrot_serial(float x0, float y0, float x1, float y1,
|
||||
int width, int height, int maxIterations,
|
||||
int output[])
|
||||
{
|
||||
float dx = (x1 - x0) / width;
|
||||
float dy = (y1 - y0) / height;
|
||||
|
||||
for (int j = 0; j < height; j++) {
|
||||
for (int i = 0; i < width; ++i) {
|
||||
float x = x0 + i * dx;
|
||||
float y = y0 + j * dy;
|
||||
|
||||
int index = (j * width + i);
|
||||
output[index] = mandel(x, y, maxIterations);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
162
examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj
Executable file
162
examples/mandelbrot_tasks/mandelbrot_tasks.vcxproj
Executable file
@@ -0,0 +1,162 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{E80DA7D4-AB22-4648-A068-327307156BE6}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>mandelbrot</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="mandelbrot.cpp" />
|
||||
<ClCompile Include="mandelbrot_serial.cpp" />
|
||||
<ClCompile Include="tasks_concrt.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="mandelbrot.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
115
examples/mandelbrot_tasks/tasks_concrt.cpp
Normal file
115
examples/mandelbrot_tasks/tasks_concrt.cpp
Normal file
@@ -0,0 +1,115 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* Simple task system implementation for ispc based on Microsoft's
|
||||
Concurrency Runtime. */
|
||||
|
||||
#include <windows.h>
|
||||
#include <concrt.h>
|
||||
using namespace Concurrency;
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
|
||||
// ispc expects these functions to have C linkage / not be mangled
|
||||
extern "C" {
|
||||
void ISPCLaunch(void *f, void *data);
|
||||
void ISPCSync();
|
||||
}
|
||||
|
||||
typedef void (*TaskFuncType)(void *, int, int);
|
||||
|
||||
struct TaskInfo {
|
||||
TaskFuncType ispcFunc;
|
||||
void *ispcData;
|
||||
};
|
||||
|
||||
// This is a simple implementation that just aborts if more than MAX_TASKS
|
||||
// are launched. It could easily be extended to be more general...
|
||||
|
||||
#define MAX_TASKS 4096
|
||||
static int taskOffset;
|
||||
static TaskInfo taskInfo[MAX_TASKS];
|
||||
static event *events[MAX_TASKS];
|
||||
static CRITICAL_SECTION criticalSection;
|
||||
|
||||
void
|
||||
TasksInit() {
|
||||
InitializeCriticalSection(&criticalSection);
|
||||
for (int i = 0; i < MAX_TASKS; ++i)
|
||||
events[i] = new event;
|
||||
}
|
||||
|
||||
|
||||
void __cdecl
|
||||
lRunTask(LPVOID param) {
|
||||
TaskInfo *ti = (TaskInfo *)param;
|
||||
|
||||
// Actually run the task.
|
||||
// FIXME: like the tasks_gcd.cpp implementation, this is passing bogus
|
||||
// values for the threadIndex and threadCount builtins, which in turn
|
||||
// will cause bugs in code that uses those. FWIW this example doesn't
|
||||
// use them...
|
||||
int threadIndex = 0;
|
||||
int threadCount = 1;
|
||||
ti->ispcFunc(ti->ispcData, threadIndex, threadCount);
|
||||
|
||||
// Signal the event that this task is done
|
||||
int taskNum = ti - &taskInfo[0];
|
||||
events[taskNum]->set();
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ISPCLaunch(void *func, void *data) {
|
||||
// Get a TaskInfo struct for this task
|
||||
EnterCriticalSection(&criticalSection);
|
||||
TaskInfo *ti = &taskInfo[taskOffset++];
|
||||
assert(taskOffset < MAX_TASKS);
|
||||
LeaveCriticalSection(&criticalSection);
|
||||
|
||||
// And pass it on to the Concurrency Runtime...
|
||||
ti->ispcFunc = (TaskFuncType)func;
|
||||
ti->ispcData = data;
|
||||
CurrentScheduler::ScheduleTask(lRunTask, ti);
|
||||
}
|
||||
|
||||
|
||||
void ISPCSync() {
|
||||
event::wait_for_multiple(&events[0], taskOffset, true,
|
||||
COOPERATIVE_TIMEOUT_INFINITE);
|
||||
|
||||
for (int i = 0; i < taskOffset; ++i)
|
||||
events[i]->reset();
|
||||
|
||||
taskOffset = 0;
|
||||
}
|
||||
90
examples/mandelbrot_tasks/tasks_gcd.cpp
Normal file
90
examples/mandelbrot_tasks/tasks_gcd.cpp
Normal file
@@ -0,0 +1,90 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/* A simple task system for ispc programs based on Apple's Grand Central
|
||||
Dispatch. */
|
||||
|
||||
#include <dispatch/dispatch.h>
|
||||
|
||||
static dispatch_queue_t gcdQueue;
|
||||
static dispatch_group_t gcdGroup;
|
||||
|
||||
// ispc expects these functions to have C linkage / not be mangled
|
||||
extern "C" {
|
||||
void ISPCLaunch(void *f, void *data);
|
||||
void ISPCSync();
|
||||
}
|
||||
|
||||
struct TaskInfo {
|
||||
void *func;
|
||||
void *data;
|
||||
};
|
||||
|
||||
|
||||
void
|
||||
TasksInit() {
|
||||
gcdQueue = dispatch_get_global_queue(DISPATCH_QUEUE_PRIORITY_DEFAULT, 0);
|
||||
gcdGroup = dispatch_group_create();
|
||||
}
|
||||
|
||||
|
||||
static void
|
||||
lRunTask(void *ti) {
|
||||
typedef void (*TaskFuncType)(void *, int, int);
|
||||
TaskInfo *taskInfo = (TaskInfo *)ti;
|
||||
|
||||
TaskFuncType func = (TaskFuncType)(taskInfo->func);
|
||||
|
||||
// FIXME: these are bogus values; may cause bugs in code that depends
|
||||
// on them having unique values in different threads.
|
||||
int threadIndex = 0;
|
||||
int threadCount = 1;
|
||||
// Actually run the task
|
||||
func(taskInfo->data, threadIndex, threadCount);
|
||||
|
||||
// FIXME: taskInfo leaks...
|
||||
}
|
||||
|
||||
|
||||
void ISPCLaunch(void *func, void *data) {
|
||||
TaskInfo *ti = new TaskInfo;
|
||||
ti->func = func;
|
||||
ti->data = data;
|
||||
dispatch_group_async_f(gcdGroup, gcdQueue, ti, lRunTask);
|
||||
}
|
||||
|
||||
|
||||
void ISPCSync() {
|
||||
// Wait for all of the tasks in the group to complete before returning
|
||||
dispatch_group_wait(gcdGroup, DISPATCH_TIME_FOREVER);
|
||||
}
|
||||
285
examples/mandelbrot_tasks/tasks_pthreads.cpp
Normal file
285
examples/mandelbrot_tasks/tasks_pthreads.cpp
Normal file
@@ -0,0 +1,285 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <pthread.h>
|
||||
#include <semaphore.h>
|
||||
#include <string.h>
|
||||
#include <unistd.h>
|
||||
#include <assert.h>
|
||||
#include <stdio.h>
|
||||
#include <fcntl.h>
|
||||
#include <sys/types.h>
|
||||
#include <sys/stat.h>
|
||||
#include <sys/param.h>
|
||||
#include <sys/sysctl.h>
|
||||
#include <stdint.h>
|
||||
#include <stdlib.h>
|
||||
#include <errno.h>
|
||||
#include <vector>
|
||||
|
||||
// ispc expects these functions to have C linkage / not be mangled
|
||||
extern "C" {
|
||||
void ISPCLaunch(void *f, void *data);
|
||||
void ISPCSync();
|
||||
}
|
||||
|
||||
|
||||
static int nThreads;
|
||||
static pthread_t *threads;
|
||||
static pthread_mutex_t taskQueueMutex;
|
||||
static std::vector<std::pair<void *, void *> > taskQueue;
|
||||
static sem_t *workerSemaphore;
|
||||
static uint32_t numUnfinishedTasks;
|
||||
static pthread_mutex_t tasksRunningConditionMutex;
|
||||
static pthread_cond_t tasksRunningCondition;
|
||||
|
||||
static void *lTaskEntry(void *arg);
|
||||
|
||||
/** Figure out how many CPU cores there are in the system
|
||||
*/
|
||||
static int
|
||||
lNumCPUCores() {
|
||||
#if defined(__linux__)
|
||||
return sysconf(_SC_NPROCESSORS_ONLN);
|
||||
#else
|
||||
// Mac
|
||||
int mib[2];
|
||||
mib[0] = CTL_HW;
|
||||
size_t length = 2;
|
||||
if (sysctlnametomib("hw.logicalcpu", mib, &length) == -1) {
|
||||
fprintf(stderr, "sysctlnametomib() filed. Guessing 2 cores.");
|
||||
return 2;
|
||||
}
|
||||
assert(length == 2);
|
||||
|
||||
int nCores = 0;
|
||||
size_t size = sizeof(nCores);
|
||||
|
||||
if (sysctl(mib, 2, &nCores, &size, NULL, 0) == -1) {
|
||||
fprintf(stderr, "sysctl() to find number of cores present failed. Guessing 2.");
|
||||
return 2;
|
||||
}
|
||||
return nCores;
|
||||
#endif
|
||||
}
|
||||
|
||||
void
|
||||
TasksInit() {
|
||||
nThreads = lNumCPUCores();
|
||||
|
||||
threads = new pthread_t[nThreads];
|
||||
|
||||
int err;
|
||||
if ((err = pthread_mutex_init(&taskQueueMutex, NULL)) != 0) {
|
||||
fprintf(stderr, "Error creating mutex: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
char name[32];
|
||||
sprintf(name, "mandelbrot.%d", (int)getpid());
|
||||
workerSemaphore = sem_open(name, O_CREAT, S_IRUSR|S_IWUSR, 0);
|
||||
if (!workerSemaphore) {
|
||||
fprintf(stderr, "Error creating semaphore: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ((err = pthread_cond_init(&tasksRunningCondition, NULL)) != 0) {
|
||||
fprintf(stderr, "Error creating condition variable: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
if ((err = pthread_mutex_init(&tasksRunningConditionMutex, NULL)) != 0) {
|
||||
fprintf(stderr, "Error creating mutex: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
for (int i = 0; i < nThreads; ++i) {
|
||||
err = pthread_create(&threads[i], NULL, &lTaskEntry, reinterpret_cast<void *>(i));
|
||||
if (err != 0) {
|
||||
fprintf(stderr, "Error creating pthread %d: %s\n", i, strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
ISPCLaunch(void *f, void *d) {
|
||||
//
|
||||
// Acquire mutex, add task
|
||||
//
|
||||
int err;
|
||||
if ((err = pthread_mutex_lock(&taskQueueMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
taskQueue.push_back(std::make_pair(f, d));
|
||||
|
||||
if ((err = pthread_mutex_unlock(&taskQueueMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_unlock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
//
|
||||
// Update count of number of tasks left to run
|
||||
//
|
||||
if ((err = pthread_mutex_lock(&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
++numUnfinishedTasks;
|
||||
|
||||
if ((err = pthread_mutex_unlock(&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
//
|
||||
// Post to the worker semaphore to wake up worker threads that are
|
||||
// sleeping waiting for tasks to show up
|
||||
//
|
||||
if ((err = sem_post(workerSemaphore)) != 0) {
|
||||
fprintf(stderr, "Error from sem_post: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
static void *
|
||||
lTaskEntry(void *arg) {
|
||||
int threadIndex = int(reinterpret_cast<int64_t>(arg));
|
||||
int threadCount = nThreads;
|
||||
|
||||
while (true) {
|
||||
int err;
|
||||
if ((err = sem_wait(workerSemaphore)) != 0) {
|
||||
fprintf(stderr, "Error from sem_wait: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
std::pair<void *, void *> myTask;
|
||||
//
|
||||
// Acquire mutex, get task
|
||||
//
|
||||
if ((err = pthread_mutex_lock(&taskQueueMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
if (taskQueue.size() == 0) {
|
||||
//
|
||||
// Task queue is empty, go back and wait on the semaphore
|
||||
//
|
||||
if ((err = pthread_mutex_unlock(&taskQueueMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_unlock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
continue;
|
||||
}
|
||||
|
||||
myTask = taskQueue.back();
|
||||
taskQueue.pop_back();
|
||||
|
||||
if ((err = pthread_mutex_unlock(&taskQueueMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_unlock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
//
|
||||
// Do work for _myTask_
|
||||
//
|
||||
typedef void (*TaskFunType)(void *, int, int);
|
||||
TaskFunType func = (TaskFunType)myTask.first;
|
||||
func(myTask.second, threadIndex, threadCount);
|
||||
|
||||
//
|
||||
// Decrement the number of unfinished tasks counter
|
||||
//
|
||||
if ((err = pthread_mutex_lock(&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
int unfinished = --numUnfinishedTasks;
|
||||
if (unfinished == 0) {
|
||||
//
|
||||
// Signal the "no more tasks are running" condition if all of
|
||||
// them are done.
|
||||
//
|
||||
int err;
|
||||
if ((err = pthread_cond_signal(&tasksRunningCondition)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_cond_signal: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
if ((err = pthread_mutex_unlock(&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
pthread_exit(NULL);
|
||||
return 0;
|
||||
}
|
||||
|
||||
|
||||
void ISPCSync() {
|
||||
int err;
|
||||
if ((err = pthread_mutex_lock(&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
|
||||
// As long as there are tasks running, wait on the condition variable;
|
||||
// doing so causes this thread to go to sleep until someone signals on
|
||||
// the tasksRunningCondition condition variable.
|
||||
while (numUnfinishedTasks > 0) {
|
||||
if ((err = pthread_cond_wait(&tasksRunningCondition,
|
||||
&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_cond_wait: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
|
||||
// We acquire ownership of the condition variable mutex when the above
|
||||
// pthread_cond_wait returns.
|
||||
// FIXME: is there a lurking issue here if numUnfinishedTasks gets back
|
||||
// to zero by the time we get to ISPCSync() and thence we're trying to
|
||||
// unlock a mutex we don't have a lock on?
|
||||
if ((err = pthread_mutex_unlock(&tasksRunningConditionMutex)) != 0) {
|
||||
fprintf(stderr, "Error from pthread_mutex_lock: %s\n", strerror(err));
|
||||
exit(1);
|
||||
}
|
||||
}
|
||||
26
examples/options/Makefile
Normal file
26
examples/options/Makefile
Normal file
@@ -0,0 +1,26 @@
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -g -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse4x2
|
||||
|
||||
default: options
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ options
|
||||
|
||||
options: dirs objs/options.o objs/options_serial.o objs/options_ispc.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ objs/options.o objs/options_ispc.o objs/options_serial.o -lm
|
||||
|
||||
objs/%.o: %.cpp
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/options.o: objs/options_ispc.h options_defs.h
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o: %.ispc options_defs.h
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
151
examples/options/options.cpp
Normal file
151
examples/options/options.cpp
Normal file
@@ -0,0 +1,151 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdlib.h>
|
||||
#include <string.h>
|
||||
#include <assert.h>
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
#ifndef __APPLE__
|
||||
#include <malloc.h>
|
||||
#endif // !__APPLE__
|
||||
using std::max;
|
||||
|
||||
#include "options_defs.h"
|
||||
#include "../timing.h"
|
||||
|
||||
#include "options_ispc.h"
|
||||
using namespace ispc;
|
||||
|
||||
// Allocate memory with 64-byte alignment.
|
||||
float *AllocFloats(int count) {
|
||||
int size = count * sizeof(float);
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
return (float *)_aligned_malloc(size, 64);
|
||||
#elif defined (__APPLE__)
|
||||
// Allocate excess memory to ensure an aligned pointer can be returned
|
||||
void *mem = malloc(size + (64-1) + sizeof(void*));
|
||||
char *amem = ((char*)mem) + sizeof(void*);
|
||||
amem += 64 - (reinterpret_cast<uint64_t>(amem) & (64 - 1));
|
||||
((void**)amem)[-1] = mem;
|
||||
return (float *)amem;
|
||||
#else
|
||||
return (float *)memalign(64, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
extern void black_scholes_serial(float Sa[], float Xa[], float Ta[],
|
||||
float ra[], float va[],
|
||||
float result[], int count);
|
||||
|
||||
extern void binomial_put_serial(float Sa[], float Xa[], float Ta[],
|
||||
float ra[], float va[],
|
||||
float result[], int count);
|
||||
|
||||
int main() {
|
||||
// Pointers passed to ispc code must have alignment of the target's
|
||||
// vector width at minimum.
|
||||
float *S = AllocFloats(N_OPTIONS);
|
||||
float *X = AllocFloats(N_OPTIONS);
|
||||
float *T = AllocFloats(N_OPTIONS);
|
||||
float *r = AllocFloats(N_OPTIONS);
|
||||
float *v = AllocFloats(N_OPTIONS);
|
||||
float *result = AllocFloats(N_OPTIONS);
|
||||
|
||||
for (int i = 0; i < N_OPTIONS; ++i) {
|
||||
S[i] = 100; // stock price
|
||||
X[i] = 98; // option strike price
|
||||
T[i] = 2; // time (years)
|
||||
r[i] = .02; // risk-free interest rate
|
||||
v[i] = 5; // volatility
|
||||
}
|
||||
|
||||
//
|
||||
// Binomial options pricing model, ispc implementation
|
||||
//
|
||||
reset_and_start_timer();
|
||||
binomial_put_ispc(S, X, T, r, v, result, N_OPTIONS);
|
||||
double binomial_ispc = get_elapsed_mcycles();
|
||||
float sum = 0.f;
|
||||
for (int i = 0; i < N_OPTIONS; ++i)
|
||||
sum += result[i];
|
||||
printf("[binomial ispc]:\t\t[%.3f] million cycles (avg %f)\n",
|
||||
binomial_ispc, sum / N_OPTIONS);
|
||||
|
||||
//
|
||||
// Binomial options, serial implementation
|
||||
//
|
||||
reset_and_start_timer();
|
||||
binomial_put_serial(S, X, T, r, v, result, N_OPTIONS);
|
||||
double binomial_serial = get_elapsed_mcycles();
|
||||
sum = 0.f;
|
||||
for (int i = 0; i < N_OPTIONS; ++i)
|
||||
sum += result[i];
|
||||
printf("[binomial serial]:\t\t[%.3f] million cycles (avg %f)\n",
|
||||
binomial_serial, sum / N_OPTIONS);
|
||||
|
||||
printf("\t\t\t\t(%.2fx speedup from ISPC)\n", binomial_serial / binomial_ispc);
|
||||
|
||||
//
|
||||
// Black-Scholes options pricing model, ispc implementation
|
||||
//
|
||||
sum = 0.f;
|
||||
reset_and_start_timer();
|
||||
for (int a = 0; a < N_BLACK_SCHOLES_ROUNDS; ++a) {
|
||||
black_scholes_ispc(S, X, T, r, v, result, N_OPTIONS);
|
||||
for (int i = 0; i < N_OPTIONS; ++i)
|
||||
sum += result[i];
|
||||
}
|
||||
double bs_ispc = get_elapsed_mcycles();
|
||||
printf("[black-scholes ispc]:\t\t[%.3f] million cycles (avg %f)\n",
|
||||
bs_ispc, sum / (N_BLACK_SCHOLES_ROUNDS * N_OPTIONS));
|
||||
|
||||
//
|
||||
// Black-Scholes options pricing model, serial implementation
|
||||
//
|
||||
sum = 0.f;
|
||||
reset_and_start_timer();
|
||||
for (int a = 0; a < N_BLACK_SCHOLES_ROUNDS; ++a) {
|
||||
black_scholes_serial(S, X, T, r, v, result, N_OPTIONS);
|
||||
for (int i = 0; i < N_OPTIONS; ++i)
|
||||
sum += result[i];
|
||||
}
|
||||
double bs_serial = get_elapsed_mcycles();
|
||||
printf("[black-scholes serial]:\t\t[%.3f] million cycles (avg %f)\n", bs_serial,
|
||||
sum / (N_BLACK_SCHOLES_ROUNDS * N_OPTIONS));
|
||||
|
||||
printf("\t\t\t\t(%.2fx speedup from ISPC)\n", bs_serial / bs_ispc);
|
||||
|
||||
return 0;
|
||||
}
|
||||
103
examples/options/options.ispc
Normal file
103
examples/options/options.ispc
Normal file
@@ -0,0 +1,103 @@
|
||||
// -*- mode: c++ -*-
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include "options_defs.h"
|
||||
|
||||
// Cumulative normal distribution function
|
||||
static inline float
|
||||
CND(float X) {
|
||||
float L = abs(X);
|
||||
|
||||
float k = 1.0 / (1.0 + 0.2316419 * L);
|
||||
float k2 = k*k;
|
||||
float k3 = k2*k;
|
||||
float k4 = k2*k2;
|
||||
float k5 = k3*k2;
|
||||
|
||||
const float invSqrt2Pi = 0.39894228040f;
|
||||
float w = (0.31938153f * k - 0.356563782f * k2 + 1.781477937f * k3 +
|
||||
-1.821255978f * k4 + 1.330274429f * k5);
|
||||
w *= invSqrt2Pi * exp(-L * L * .5f);
|
||||
|
||||
if (X > 0.f)
|
||||
w = 1.0 - w;
|
||||
return w;
|
||||
}
|
||||
|
||||
export void
|
||||
black_scholes_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[],
|
||||
uniform float ra[], uniform float va[],
|
||||
uniform float result[], uniform int count) {
|
||||
for (uniform int i = 0; i < count; i += programCount) {
|
||||
float S = Sa[i + programIndex], X = Xa[i + programIndex];
|
||||
float T = Ta[i + programIndex], r = ra[i + programIndex];
|
||||
float v = va[i + programIndex];
|
||||
|
||||
float d1 = (log(S/X) + (r + v * v * .5f) * T) / (v * sqrt(T));
|
||||
float d2 = d1 - v * sqrt(T);
|
||||
|
||||
result[i + programIndex] = S * CND(d1) - X * exp(-r * T) * CND(d2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
export void
|
||||
binomial_put_ispc(uniform float Sa[], uniform float Xa[], uniform float Ta[],
|
||||
uniform float ra[], uniform float va[],
|
||||
uniform float result[], uniform int count) {
|
||||
float V[BINOMIAL_NUM];
|
||||
|
||||
for (uniform int i = 0; i < count; i += programCount) {
|
||||
float S = Sa[i + programIndex], X = Xa[i + programIndex];
|
||||
float T = Ta[i + programIndex], r = ra[i + programIndex];
|
||||
float v = va[i + programIndex];
|
||||
|
||||
float dt = T / BINOMIAL_NUM;
|
||||
float u = exp(v * sqrt(dt));
|
||||
float d = 1. / u;
|
||||
float disc = exp(r * dt);
|
||||
float Pu = (disc - d) / (u - d);
|
||||
|
||||
for (uniform int j = 0; j < BINOMIAL_NUM; ++j) {
|
||||
float upow = pow(u, (float)(2*j-BINOMIAL_NUM));
|
||||
V[j] = max(0., X - S * upow);
|
||||
}
|
||||
|
||||
for (uniform int j = BINOMIAL_NUM-1; j >= 0; --j)
|
||||
for (uniform int k = 0; k < j; ++k)
|
||||
V[k] = ((1 - Pu) * V[k] + Pu * V[k + 1]) / disc;
|
||||
|
||||
result[i + programIndex] = V[0];
|
||||
}
|
||||
}
|
||||
168
examples/options/options.vcxproj
Executable file
168
examples/options/options.vcxproj
Executable file
@@ -0,0 +1,168 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{8C7B5D29-1E76-44E6-BBB8-09830E5DEEAE}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>options</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<DisableSpecificWarnings>4305</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<DisableSpecificWarnings>4305</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<DisableSpecificWarnings>4305</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<DisableSpecificWarnings>4305</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="options.cpp" />
|
||||
<ClCompile Include="options_serial.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="options.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86 --target=sse4x2
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --target=sse4x2
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Filename).obj;%(Filename)_ispc.h</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="options_defs.h" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
42
examples/options/options_defs.h
Normal file
42
examples/options/options_defs.h
Normal file
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifndef OPTIONS_DEFS_H
|
||||
#define OPTIONS_DEFS_H 1
|
||||
|
||||
#define BINOMIAL_NUM 64
|
||||
#define N_OPTIONS 65536
|
||||
#define N_BLACK_SCHOLES_ROUNDS 20
|
||||
|
||||
|
||||
#endif // OPTIONS_DEFS_H
|
||||
114
examples/options/options_serial.cpp
Normal file
114
examples/options/options_serial.cpp
Normal file
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define NOMINMAX
|
||||
#pragma warning (disable: 4244)
|
||||
#pragma warning (disable: 4305)
|
||||
#endif
|
||||
|
||||
#include "options_defs.h"
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
|
||||
// Cumulative normal distribution function
|
||||
static inline float
|
||||
CND(float X) {
|
||||
float L = fabsf(X);
|
||||
|
||||
float k = 1.0 / (1.0 + 0.2316419 * L);
|
||||
float k2 = k*k;
|
||||
float k3 = k2*k;
|
||||
float k4 = k2*k2;
|
||||
float k5 = k3*k2;
|
||||
|
||||
const float invSqrt2Pi = 0.39894228040f;
|
||||
float w = (0.31938153f * k - 0.356563782f * k2 + 1.781477937f * k3 +
|
||||
-1.821255978f * k4 + 1.330274429f * k5);
|
||||
w *= invSqrt2Pi * expf(-L * L * .5f);
|
||||
|
||||
if (X > 0.f)
|
||||
w = 1.0 - w;
|
||||
return w;
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
black_scholes_serial(float Sa[], float Xa[], float Ta[],
|
||||
float ra[], float va[],
|
||||
float result[], int count) {
|
||||
for (int i = 0; i < count; ++i) {
|
||||
float S = Sa[i], X = Xa[i];
|
||||
float T = Ta[i], r = ra[i];
|
||||
float v = va[i];
|
||||
|
||||
float d1 = (logf(S/X) + (r + v * v * .5f) * T) / (v * sqrtf(T));
|
||||
float d2 = d1 - v * sqrtf(T);
|
||||
|
||||
result[i] = S * CND(d1) - X * expf(-r * T) * CND(d2);
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
binomial_put_serial(float Sa[], float Xa[], float Ta[],
|
||||
float ra[], float va[],
|
||||
float result[], int count) {
|
||||
float V[BINOMIAL_NUM];
|
||||
|
||||
for (int i = 0; i < count; ++i) {
|
||||
float S = Sa[i], X = Xa[i];
|
||||
float T = Ta[i], r = ra[i];
|
||||
float v = va[i];
|
||||
|
||||
float dt = T / BINOMIAL_NUM;
|
||||
float u = expf(v * sqrtf(dt));
|
||||
float d = 1. / u;
|
||||
float disc = expf(r * dt);
|
||||
float Pu = (disc - d) / (u - d);
|
||||
|
||||
for (int j = 0; j < BINOMIAL_NUM; ++j) {
|
||||
float upow = powf(u, (float)(2*j-BINOMIAL_NUM));
|
||||
V[j] = std::max(0.f, X - S * upow);
|
||||
}
|
||||
|
||||
for (int j = BINOMIAL_NUM-1; j >= 0; --j)
|
||||
for (int k = 0; k < j; ++k)
|
||||
V[k] = ((1 - Pu) * V[k] + Pu * V[k + 1]) / disc;
|
||||
|
||||
result[i] = V[0];
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
24
examples/rt/Makefile
Normal file
24
examples/rt/Makefile
Normal file
@@ -0,0 +1,24 @@
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2 --target=sse4x2
|
||||
|
||||
default: rt
|
||||
|
||||
.PHONY: dirs clean
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ rt
|
||||
|
||||
rt: dirs objs/rt.o objs/rt_serial.o objs/rt_ispc.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ objs/rt.o objs/rt_ispc.o objs/rt_serial.o -lm
|
||||
|
||||
objs/%.o: %.cpp objs/rt_ispc.h
|
||||
$(CXX) $< $(CXXFLAGS) -c -o $@
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
BIN
examples/rt/cornell.bvh
Normal file
BIN
examples/rt/cornell.bvh
Normal file
Binary file not shown.
BIN
examples/rt/cornell.camera
Normal file
BIN
examples/rt/cornell.camera
Normal file
Binary file not shown.
244
examples/rt/rt.cpp
Normal file
244
examples/rt/rt.cpp
Normal file
@@ -0,0 +1,244 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define NOMINMAX
|
||||
#pragma warning (disable: 4244)
|
||||
#pragma warning (disable: 4305)
|
||||
#endif
|
||||
|
||||
#include <stdio.h>
|
||||
#include <math.h>
|
||||
#include <algorithm>
|
||||
#include <assert.h>
|
||||
#include <sys/types.h>
|
||||
#ifndef __APPLE__
|
||||
#include <malloc.h>
|
||||
#endif
|
||||
#include "../timing.h"
|
||||
#include "rt_ispc.h"
|
||||
|
||||
using namespace ispc;
|
||||
|
||||
typedef unsigned int uint;
|
||||
|
||||
template <typename T>
|
||||
T *AllocAligned(int count) {
|
||||
int size = count * sizeof(T);
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
return (T *)_aligned_malloc(size, 64);
|
||||
#elif defined (__APPLE__)
|
||||
// Allocate excess memory to ensure an aligned pointer can be returned
|
||||
void *mem = malloc(size + (64-1) + sizeof(void*));
|
||||
char *amem = ((char*)mem) + sizeof(void*);
|
||||
amem += 64 - (reinterpret_cast<uint64_t>(amem) & (64 - 1));
|
||||
((void**)amem)[-1] = mem;
|
||||
return (T *)amem;
|
||||
#else
|
||||
return (T *)memalign(64, size);
|
||||
#endif
|
||||
}
|
||||
|
||||
extern void raytrace_serial(int width, int height, const float raster2camera[4][4],
|
||||
const float camera2world[4][4], float image[],
|
||||
int id[], const LinearBVHNode nodes[],
|
||||
const Triangle triangles[]);
|
||||
|
||||
|
||||
static void writeImage(int *idImage, float *depthImage, int width, int height,
|
||||
const char *filename) {
|
||||
FILE *f = fopen(filename, "wb");
|
||||
if (!f) {
|
||||
perror(filename);
|
||||
exit(1);
|
||||
}
|
||||
|
||||
fprintf(f, "P6\n%d %d\n255\n", width, height);
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
// use the bits from the object id of the hit object to make a
|
||||
// random color
|
||||
int id = idImage[y * width + x];
|
||||
unsigned char r = 0, g = 0, b = 0;
|
||||
|
||||
for (int i = 0; i < 8; ++i) {
|
||||
// extract bit 3*i for red, 3*i+1 for green, 3*i+2 for blue
|
||||
int rbit = (id & (1 << (3*i))) >> (3*i);
|
||||
int gbit = (id & (1 << (3*i+1))) >> (3*i+1);
|
||||
int bbit = (id & (1 << (3*i+2))) >> (3*i+2);
|
||||
// and then set the bits of the colors starting from the
|
||||
// high bits...
|
||||
r |= rbit << (7-i);
|
||||
g |= gbit << (7-i);
|
||||
b |= bbit << (7-i);
|
||||
}
|
||||
fputc(r, f);
|
||||
fputc(g, f);
|
||||
fputc(b, f);
|
||||
}
|
||||
}
|
||||
fclose(f);
|
||||
}
|
||||
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
if (argc != 2) {
|
||||
fprintf(stderr, "usage: rt <filename base>\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
#define READ(var, n) \
|
||||
if (fread(&(var), sizeof(var), n, f) != (unsigned int)n) { \
|
||||
fprintf(stderr, "Unexpected EOF reading scene file\n"); \
|
||||
return 1; \
|
||||
} else /* eat ; */
|
||||
|
||||
//
|
||||
// Read the camera specification information from the camera file
|
||||
//
|
||||
char fnbuf[1024];
|
||||
sprintf(fnbuf, "%s.camera", argv[1]);
|
||||
FILE *f = fopen(fnbuf, "rb");
|
||||
if (!f) {
|
||||
perror(argv[1]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
//
|
||||
// Nothing fancy, and trouble if we run on a big-endian system, just
|
||||
// fread in the bits
|
||||
//
|
||||
int width, height;
|
||||
float camera2world[4][4], raster2camera[4][4];
|
||||
READ(width, 1);
|
||||
READ(height, 1);
|
||||
READ(camera2world[0][0], 16);
|
||||
READ(raster2camera[0][0], 16);
|
||||
|
||||
//
|
||||
// Read in the serialized BVH
|
||||
//
|
||||
sprintf(fnbuf, "%s.bvh", argv[1]);
|
||||
f = fopen(fnbuf, "rb");
|
||||
if (!f) {
|
||||
perror(argv[2]);
|
||||
return 1;
|
||||
}
|
||||
|
||||
// The BVH file starts with an int that gives the total number of BVH
|
||||
// nodes
|
||||
uint nNodes;
|
||||
READ(nNodes, 1);
|
||||
|
||||
LinearBVHNode *nodes = AllocAligned<LinearBVHNode>(nNodes);
|
||||
for (unsigned int i = 0; i < nNodes; ++i) {
|
||||
// Each node is 6x floats for a boox, then an integer for an offset
|
||||
// to the second child node, then an integer that encodes the type
|
||||
// of node, the total number of int it if a leaf node, etc.
|
||||
float b[6];
|
||||
READ(b[0], 6);
|
||||
nodes[i].bounds[0].v[0] = b[0];
|
||||
nodes[i].bounds[0].v[1] = b[1];
|
||||
nodes[i].bounds[0].v[2] = b[2];
|
||||
nodes[i].bounds[1].v[0] = b[3];
|
||||
nodes[i].bounds[1].v[1] = b[4];
|
||||
nodes[i].bounds[1].v[2] = b[5];
|
||||
READ(nodes[i].offset, 1);
|
||||
READ(nodes[i].primsAxis, 1);
|
||||
}
|
||||
|
||||
// And then read the triangles
|
||||
uint nTris;
|
||||
READ(nTris, 1);
|
||||
Triangle *triangles = AllocAligned<Triangle>(nTris);
|
||||
for (uint i = 0; i < nTris; ++i) {
|
||||
// 9x floats for the 3 vertices
|
||||
float v[9];
|
||||
READ(v[0], 9);
|
||||
float *vp = v;
|
||||
for (int j = 0; j < 3; ++j) {
|
||||
triangles[i].p[j].v[0] = *vp++;
|
||||
triangles[i].p[j].v[1] = *vp++;
|
||||
triangles[i].p[j].v[2] = *vp++;
|
||||
}
|
||||
// And create an object id
|
||||
triangles[i].id = i+1;
|
||||
}
|
||||
fclose(f);
|
||||
|
||||
// round image resolution up to multiple of 4 to makethings easy for
|
||||
// the code that assigns pixels to ispc program instances
|
||||
height = (height + 3) & ~3;
|
||||
width = (width + 3) & ~3;
|
||||
|
||||
// allocate images; one to hold hit object ids, one to hold depth to
|
||||
// the first interseciton
|
||||
int *id = new int[width*height];
|
||||
float *image = new float[width*height];
|
||||
|
||||
//
|
||||
// Run 3 iterations with ispc, record the minimum time
|
||||
//
|
||||
double minTimeISPC = 1e30;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
reset_and_start_timer();
|
||||
raytrace(width, height, raster2camera, camera2world,
|
||||
image, id, nodes, triangles);
|
||||
double dt = get_elapsed_mcycles();
|
||||
minTimeISPC = std::min(dt, minTimeISPC);
|
||||
}
|
||||
printf("[rt ispc]:\t\t\t[%.3f] million cycles for %d x %d image\n", minTimeISPC, width, height);
|
||||
|
||||
writeImage(id, image, width, height, "rt-ispc.ppm");
|
||||
|
||||
//
|
||||
// And 3 iterations with the serial implementation, reporting the
|
||||
// minimum time.
|
||||
//
|
||||
double minTimeSerial = 1e30;
|
||||
for (int i = 0; i < 3; ++i) {
|
||||
reset_and_start_timer();
|
||||
raytrace_serial(width, height, raster2camera, camera2world,
|
||||
image, id, nodes, triangles);
|
||||
double dt = get_elapsed_mcycles();
|
||||
minTimeSerial = std::min(dt, minTimeSerial);
|
||||
}
|
||||
printf("[rt serial]:\t\t\t[%.3f] million cycles for %d x %d image\n",
|
||||
minTimeSerial, width, height);
|
||||
printf("\t\t\t\t(%.2fx speedup from ISPC)\n", minTimeSerial / minTimeISPC);
|
||||
|
||||
writeImage(id, image, width, height, "rt-serial.ppm");
|
||||
|
||||
return 0;
|
||||
}
|
||||
273
examples/rt/rt.ispc
Normal file
273
examples/rt/rt.ispc
Normal file
@@ -0,0 +1,273 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define bool int
|
||||
|
||||
typedef float<3> float3;
|
||||
|
||||
struct Ray {
|
||||
float3 origin, dir, invDir;
|
||||
uniform unsigned int dirIsNeg[3];
|
||||
float mint, maxt;
|
||||
int hitId;
|
||||
};
|
||||
|
||||
struct Triangle {
|
||||
uniform float3 p[3];
|
||||
uniform int id;
|
||||
};
|
||||
|
||||
struct LinearBVHNode {
|
||||
uniform float3 bounds[2];
|
||||
uniform unsigned int offset; // num primitives for leaf, second child for interior
|
||||
uniform unsigned int primsAxis; // 0:7 nPrimitives, 8:15 split axis, 16:31 padding
|
||||
};
|
||||
|
||||
static inline uniform int nPrims(const reference LinearBVHNode node) {
|
||||
return (node.primsAxis & 0xff);
|
||||
}
|
||||
|
||||
static inline uniform int axis(const reference LinearBVHNode node) {
|
||||
return ((node.primsAxis >> 8) & 0xff);
|
||||
}
|
||||
|
||||
static inline uniform bool isInterior(const reference LinearBVHNode node) {
|
||||
return nPrims(node) == 0;
|
||||
}
|
||||
|
||||
static inline float3 Cross(const float3 v1, const float3 v2) {
|
||||
float v1x = v1.x, v1y = v1.y, v1z = v1.z;
|
||||
float v2x = v2.x, v2y = v2.y, v2z = v2.z;
|
||||
float3 ret;
|
||||
ret.x = (v1y * v2z) - (v1z * v2y);
|
||||
ret.y = (v1z * v2x) - (v1x * v2z);
|
||||
ret.z = (v1x * v2y) - (v1y * v2x);
|
||||
return ret;
|
||||
}
|
||||
|
||||
static inline float Dot(const float3 a, const float3 b) {
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
|
||||
static void generateRay(uniform const float raster2camera[4][4],
|
||||
uniform const float camera2world[4][4],
|
||||
float x, float y, reference Ray ray) {
|
||||
ray.mint = 0.f;
|
||||
ray.maxt = 1e30f;
|
||||
|
||||
ray.hitId = 0;
|
||||
|
||||
// transform raster coordinate (x, y, 0) to camera space
|
||||
float camx = raster2camera[0][0] * x + raster2camera[0][1] * y + raster2camera[0][3];
|
||||
float camy = raster2camera[1][0] * x + raster2camera[1][1] * y + raster2camera[1][3];
|
||||
float camz = raster2camera[2][3];
|
||||
float camw = raster2camera[3][3];
|
||||
camx /= camw;
|
||||
camy /= camw;
|
||||
camz /= camw;
|
||||
|
||||
ray.dir.x = camera2world[0][0] * camx + camera2world[0][1] * camy + camera2world[0][2] * camz;
|
||||
ray.dir.y = camera2world[1][0] * camx + camera2world[1][1] * camy + camera2world[1][2] * camz;
|
||||
ray.dir.z = camera2world[2][0] * camx + camera2world[2][1] * camy + camera2world[2][2] * camz;
|
||||
|
||||
ray.origin.x = camera2world[0][3] / camera2world[3][3];
|
||||
ray.origin.y = camera2world[1][3] / camera2world[3][3];
|
||||
ray.origin.z = camera2world[2][3] / camera2world[3][3];
|
||||
|
||||
ray.invDir = 1.f / ray.dir;
|
||||
|
||||
ray.dirIsNeg[0] = any(ray.invDir.x < 0) ? 1 : 0;
|
||||
ray.dirIsNeg[1] = any(ray.invDir.y < 0) ? 1 : 0;
|
||||
ray.dirIsNeg[2] = any(ray.invDir.z < 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
static inline bool BBoxIntersect(const reference uniform float3 bounds[2],
|
||||
const reference Ray ray) {
|
||||
float t0 = ray.mint, t1 = ray.maxt;
|
||||
|
||||
// Check all three axis-aligned slabs. Don't try to early out; it's
|
||||
// not worth the trouble
|
||||
float3 tNear = (bounds[0] - ray.origin) * ray.invDir;
|
||||
float3 tFar = (bounds[1] - ray.origin) * ray.invDir;
|
||||
if (tNear.x > tFar.x) {
|
||||
float tmp = tNear.x;
|
||||
tNear.x = tFar.x;
|
||||
tFar.x = tmp;
|
||||
}
|
||||
t0 = max(tNear.x, t0);
|
||||
t1 = min(tFar.x, t1);
|
||||
|
||||
if (tNear.y > tFar.y) {
|
||||
float tmp = tNear.y;
|
||||
tNear.y = tFar.y;
|
||||
tFar.y = tmp;
|
||||
}
|
||||
t0 = max(tNear.y, t0);
|
||||
t1 = min(tFar.y, t1);
|
||||
|
||||
if (tNear.z > tFar.z) {
|
||||
float tmp = tNear.z;
|
||||
tNear.z = tFar.z;
|
||||
tFar.z = tmp;
|
||||
}
|
||||
t0 = max(tNear.z, t0);
|
||||
t1 = min(tFar.z, t1);
|
||||
|
||||
return (t0 <= t1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
static inline bool TriIntersect(const reference Triangle tri, reference Ray ray) {
|
||||
uniform float3 e1 = tri.p[1] - tri.p[0];
|
||||
uniform float3 e2 = tri.p[2] - tri.p[0];
|
||||
|
||||
float3 s1 = Cross(ray.dir, e2);
|
||||
float divisor = Dot(s1, e1);
|
||||
bool hit = true;
|
||||
|
||||
if (divisor == 0.)
|
||||
hit = false;
|
||||
float invDivisor = 1.f / divisor;
|
||||
|
||||
// Compute first barycentric coordinate
|
||||
float3 d = ray.origin - tri.p[0];
|
||||
float b1 = Dot(d, s1) * invDivisor;
|
||||
if (b1 < 0. || b1 > 1.)
|
||||
hit = false;
|
||||
|
||||
// Compute second barycentric coordinate
|
||||
float3 s2 = Cross(d, e1);
|
||||
float b2 = Dot(ray.dir, s2) * invDivisor;
|
||||
if (b2 < 0. || b1 + b2 > 1.)
|
||||
hit = false;
|
||||
|
||||
// Compute _t_ to intersection point
|
||||
float t = Dot(e2, s2) * invDivisor;
|
||||
if (t < ray.mint || t > ray.maxt)
|
||||
hit = false;
|
||||
|
||||
if (hit) {
|
||||
ray.maxt = t;
|
||||
ray.hitId = tri.id;
|
||||
}
|
||||
return hit;
|
||||
}
|
||||
|
||||
|
||||
bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
||||
reference Ray r) {
|
||||
Ray ray = r;
|
||||
bool hit = false;
|
||||
// Follow ray through BVH nodes to find primitive intersections
|
||||
uniform int todoOffset = 0, nodeNum = 0;
|
||||
uniform int todo[64];
|
||||
|
||||
while (true) {
|
||||
// Check ray against BVH node
|
||||
LinearBVHNode node = nodes[nodeNum];
|
||||
if (any(BBoxIntersect(node.bounds, ray))) {
|
||||
uniform unsigned int nPrimitives = nPrims(node);
|
||||
if (nPrimitives > 0) {
|
||||
// Intersect ray with primitives in leaf BVH node
|
||||
uniform unsigned int primitivesOffset = node.offset;
|
||||
for (uniform unsigned int i = 0; i < nPrimitives; ++i) {
|
||||
if (TriIntersect(tris[primitivesOffset+i], ray))
|
||||
hit = true;
|
||||
}
|
||||
if (todoOffset == 0)
|
||||
break;
|
||||
nodeNum = todo[--todoOffset];
|
||||
}
|
||||
else {
|
||||
// Put far BVH node on _todo_ stack, advance to near node
|
||||
if (r.dirIsNeg[axis(node)]) {
|
||||
todo[todoOffset++] = nodeNum + 1;
|
||||
nodeNum = node.offset;
|
||||
}
|
||||
else {
|
||||
todo[todoOffset++] = node.offset;
|
||||
nodeNum = nodeNum + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (todoOffset == 0)
|
||||
break;
|
||||
nodeNum = todo[--todoOffset];
|
||||
}
|
||||
}
|
||||
r.maxt = ray.maxt;
|
||||
r.hitId = ray.hitId;
|
||||
|
||||
return hit;
|
||||
}
|
||||
|
||||
|
||||
export void raytrace(uniform int width, uniform int height,
|
||||
const uniform float raster2camera[4][4],
|
||||
const uniform float camera2world[4][4],
|
||||
uniform float image[], uniform int id[],
|
||||
const LinearBVHNode nodes[],
|
||||
const Triangle triangles[]) {
|
||||
static const uniform float udx[16] = { 0, 1, 0, 1, 2, 3, 2, 3,
|
||||
0, 1, 0, 1, 2, 3, 2, 3 };
|
||||
static const uniform float udy[16] = { 0, 0, 1, 1, 0, 0, 1, 1,
|
||||
2, 2, 3, 3, 2, 2, 3, 3 };
|
||||
|
||||
// The outer loops are always over blocks of 4x4 pixels
|
||||
for (uniform int y = 0; y < height; y += 4) {
|
||||
for (uniform int x = 0; x < width; x += 4) {
|
||||
// Now we have a block of 4x4=16 pixels to process; it will
|
||||
// take 16/programCount iterations of this loop to process
|
||||
// them.
|
||||
for (uniform int o = 0; o < 16 / programCount; ++o) {
|
||||
// Map program instances to samples in the udx/udy arrays
|
||||
// to figure out which pixel each program instance is
|
||||
// responsible for
|
||||
const float dx = udx[o * programCount + programIndex];
|
||||
const float dy = udy[o * programCount + programIndex];
|
||||
|
||||
Ray ray;
|
||||
generateRay(raster2camera, camera2world, x+dx, y+dy, ray);
|
||||
BVHIntersect(nodes, triangles, ray);
|
||||
|
||||
int offset = (y + (int)dy) * width + (x + (int)dx);
|
||||
image[offset] = ray.maxt;
|
||||
id[offset] = ray.hitId;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
165
examples/rt/rt.vcxproj
Executable file
165
examples/rt/rt.vcxproj
Executable file
@@ -0,0 +1,165 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{E787BC3F-2D2E-425E-A64D-4721E2FF3DC9}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>rt</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="rt.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Filename).obj</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Filename).obj</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Filename).obj</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Filename).obj</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="rt.cpp" />
|
||||
<ClCompile Include="rt_serial.cpp" />
|
||||
</ItemGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
288
examples/rt/rt_serial.cpp
Normal file
288
examples/rt/rt_serial.cpp
Normal file
@@ -0,0 +1,288 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#ifdef _MSC_VER
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
#define NOMINMAX
|
||||
#pragma warning (disable: 4244)
|
||||
#pragma warning (disable: 4305)
|
||||
#endif
|
||||
|
||||
#include <algorithm>
|
||||
|
||||
// Just enough of a float3 class to do what we need in this file.
|
||||
#ifdef _MSC_VER
|
||||
__declspec(align(16))
|
||||
#endif
|
||||
struct float3 {
|
||||
float3() { }
|
||||
float3(float xx, float yy, float zz) { x = xx; y = yy; z = zz; }
|
||||
|
||||
float3 operator*(float f) const { return float3(x*f, y*f, z*f); }
|
||||
float3 operator-(const float3 &f2) const {
|
||||
return float3(x-f2.x, y-f2.y, z-f2.z);
|
||||
}
|
||||
float3 operator*(const float3 &f2) const {
|
||||
return float3(x*f2.x, y*f2.y, z*f2.z);
|
||||
}
|
||||
float x, y, z;
|
||||
float pad; // match padding/alignment of ispc version
|
||||
}
|
||||
#ifndef _MSC_VER
|
||||
__attribute__ ((aligned(16)))
|
||||
#endif
|
||||
;
|
||||
|
||||
struct Ray {
|
||||
float3 origin, dir, invDir;
|
||||
unsigned int dirIsNeg[3];
|
||||
float mint, maxt;
|
||||
int hitId;
|
||||
};
|
||||
|
||||
|
||||
// Declare these in a namespace so the mangling matches
|
||||
namespace ispc {
|
||||
struct Triangle {
|
||||
float3 p[3];
|
||||
int id;
|
||||
};
|
||||
|
||||
struct LinearBVHNode {
|
||||
float3 bounds[2];
|
||||
unsigned int offset; // primitives for leaf, second child for interior
|
||||
unsigned int primsAxis; // 0:7 nPrimitives, 8:15 split axis, 16:31 padding
|
||||
};
|
||||
}
|
||||
|
||||
using namespace ispc;
|
||||
|
||||
inline int nPrims(const LinearBVHNode &node) {
|
||||
return (node.primsAxis & 0xff);
|
||||
}
|
||||
|
||||
inline int axis(const LinearBVHNode &node) {
|
||||
return ((node.primsAxis >> 8) & 0xff);
|
||||
}
|
||||
|
||||
inline bool isInterior(const LinearBVHNode &node) {
|
||||
return nPrims(node) == 0;
|
||||
}
|
||||
|
||||
inline float3 Cross(const float3 &v1, const float3 &v2) {
|
||||
float v1x = v1.x, v1y = v1.y, v1z = v1.z;
|
||||
float v2x = v2.x, v2y = v2.y, v2z = v2.z;
|
||||
float3 ret;
|
||||
ret.x = (v1y * v2z) - (v1z * v2y);
|
||||
ret.y = (v1z * v2x) - (v1x * v2z);
|
||||
ret.z = (v1x * v2y) - (v1y * v2x);
|
||||
return ret;
|
||||
}
|
||||
|
||||
inline float Dot(const float3 &a, const float3 &b) {
|
||||
return a.x * b.x + a.y * b.y + a.z * b.z;
|
||||
}
|
||||
|
||||
|
||||
static void generateRay(const float raster2camera[4][4],
|
||||
const float camera2world[4][4],
|
||||
float x, float y, Ray &ray) {
|
||||
ray.mint = 0.f;
|
||||
ray.maxt = 1e30f;
|
||||
|
||||
ray.hitId = 0;
|
||||
|
||||
// transform raster coordinate (x, y, 0) to camera space
|
||||
float camx = raster2camera[0][0] * x + raster2camera[0][1] * y + raster2camera[0][3];
|
||||
float camy = raster2camera[1][0] * x + raster2camera[1][1] * y + raster2camera[1][3];
|
||||
float camz = raster2camera[2][3];
|
||||
float camw = raster2camera[3][3];
|
||||
camx /= camw;
|
||||
camy /= camw;
|
||||
camz /= camw;
|
||||
|
||||
ray.dir.x = camera2world[0][0] * camx + camera2world[0][1] * camy + camera2world[0][2] * camz;
|
||||
ray.dir.y = camera2world[1][0] * camx + camera2world[1][1] * camy + camera2world[1][2] * camz;
|
||||
ray.dir.z = camera2world[2][0] * camx + camera2world[2][1] * camy + camera2world[2][2] * camz;
|
||||
|
||||
ray.origin.x = camera2world[0][3] / camera2world[3][3];
|
||||
ray.origin.y = camera2world[1][3] / camera2world[3][3];
|
||||
ray.origin.z = camera2world[2][3] / camera2world[3][3];
|
||||
|
||||
ray.invDir.x = 1.f / ray.dir.x;
|
||||
ray.invDir.y = 1.f / ray.dir.y;
|
||||
ray.invDir.z = 1.f / ray.dir.z;
|
||||
|
||||
ray.dirIsNeg[0] = (ray.invDir.x < 0) ? 1 : 0;
|
||||
ray.dirIsNeg[1] = (ray.invDir.y < 0) ? 1 : 0;
|
||||
ray.dirIsNeg[2] = (ray.invDir.z < 0) ? 1 : 0;
|
||||
}
|
||||
|
||||
|
||||
static inline bool BBoxIntersect(const float3 bounds[2],
|
||||
const Ray &ray) {
|
||||
float t0 = ray.mint, t1 = ray.maxt;
|
||||
|
||||
float3 tNear = (bounds[0] - ray.origin) * ray.invDir;
|
||||
float3 tFar = (bounds[1] - ray.origin) * ray.invDir;
|
||||
if (tNear.x > tFar.x) {
|
||||
float tmp = tNear.x;
|
||||
tNear.x = tFar.x;
|
||||
tFar.x = tmp;
|
||||
}
|
||||
t0 = std::max(tNear.x, t0);
|
||||
t1 = std::min(tFar.x, t1);
|
||||
|
||||
if (tNear.y > tFar.y) {
|
||||
float tmp = tNear.y;
|
||||
tNear.y = tFar.y;
|
||||
tFar.y = tmp;
|
||||
}
|
||||
t0 = std::max(tNear.y, t0);
|
||||
t1 = std::min(tFar.y, t1);
|
||||
|
||||
if (tNear.z > tFar.z) {
|
||||
float tmp = tNear.z;
|
||||
tNear.z = tFar.z;
|
||||
tFar.z = tmp;
|
||||
}
|
||||
t0 = std::max(tNear.z, t0);
|
||||
t1 = std::min(tFar.z, t1);
|
||||
|
||||
return (t0 <= t1);
|
||||
}
|
||||
|
||||
|
||||
|
||||
inline bool TriIntersect(const Triangle &tri, Ray &ray) {
|
||||
float3 e1 = tri.p[1] - tri.p[0];
|
||||
float3 e2 = tri.p[2] - tri.p[0];
|
||||
|
||||
float3 s1 = Cross(ray.dir, e2);
|
||||
float divisor = Dot(s1, e1);
|
||||
|
||||
if (divisor == 0.)
|
||||
return false;
|
||||
float invDivisor = 1.f / divisor;
|
||||
|
||||
// Compute first barycentric coordinate
|
||||
float3 d = ray.origin - tri.p[0];
|
||||
float b1 = Dot(d, s1) * invDivisor;
|
||||
if (b1 < 0. || b1 > 1.)
|
||||
return false;
|
||||
|
||||
// Compute second barycentric coordinate
|
||||
float3 s2 = Cross(d, e1);
|
||||
float b2 = Dot(ray.dir, s2) * invDivisor;
|
||||
if (b2 < 0. || b1 + b2 > 1.)
|
||||
return false;
|
||||
|
||||
// Compute _t_ to intersection point
|
||||
float t = Dot(e2, s2) * invDivisor;
|
||||
if (t < ray.mint || t > ray.maxt)
|
||||
return false;
|
||||
|
||||
ray.maxt = t;
|
||||
ray.hitId = tri.id;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
bool BVHIntersect(const LinearBVHNode nodes[], const Triangle tris[],
|
||||
Ray &r) {
|
||||
Ray ray = r;
|
||||
bool hit = false;
|
||||
// Follow ray through BVH nodes to find primitive intersections
|
||||
int todoOffset = 0, nodeNum = 0;
|
||||
int todo[64];
|
||||
|
||||
while (true) {
|
||||
// Check ray against BVH node
|
||||
const LinearBVHNode &node = nodes[nodeNum];
|
||||
if (BBoxIntersect(node.bounds, ray)) {
|
||||
unsigned int nPrimitives = nPrims(node);
|
||||
if (nPrimitives > 0) {
|
||||
// Intersect ray with primitives in leaf BVH node
|
||||
unsigned int primitivesOffset = node.offset;
|
||||
for (unsigned int i = 0; i < nPrimitives; ++i) {
|
||||
if (TriIntersect(tris[primitivesOffset+i], ray))
|
||||
hit = true;
|
||||
}
|
||||
if (todoOffset == 0)
|
||||
break;
|
||||
nodeNum = todo[--todoOffset];
|
||||
}
|
||||
else {
|
||||
// Put far BVH node on _todo_ stack, advance to near node
|
||||
if (r.dirIsNeg[axis(node)]) {
|
||||
todo[todoOffset++] = nodeNum + 1;
|
||||
nodeNum = node.offset;
|
||||
}
|
||||
else {
|
||||
todo[todoOffset++] = node.offset;
|
||||
nodeNum = nodeNum + 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (todoOffset == 0)
|
||||
break;
|
||||
nodeNum = todo[--todoOffset];
|
||||
}
|
||||
}
|
||||
r.maxt = ray.maxt;
|
||||
r.hitId = ray.hitId;
|
||||
|
||||
return hit;
|
||||
}
|
||||
|
||||
|
||||
void raytrace_serial(int width, int height,
|
||||
const float raster2camera[4][4],
|
||||
const float camera2world[4][4],
|
||||
float image[],
|
||||
int id[],
|
||||
const LinearBVHNode nodes[],
|
||||
const Triangle triangles[]) {
|
||||
for (int y = 0; y < height; ++y) {
|
||||
for (int x = 0; x < width; ++x) {
|
||||
Ray ray;
|
||||
generateRay(raster2camera, camera2world, x, y, ray);
|
||||
BVHIntersect(nodes, triangles, ray);
|
||||
|
||||
int offset = y * width + x;
|
||||
image[offset] = ray.maxt;
|
||||
id[offset] = ray.hitId;
|
||||
}
|
||||
}
|
||||
}
|
||||
BIN
examples/rt/sponza.bvh
Normal file
BIN
examples/rt/sponza.bvh
Normal file
Binary file not shown.
BIN
examples/rt/sponza.camera
Normal file
BIN
examples/rt/sponza.camera
Normal file
Binary file not shown.
BIN
examples/rt/teapot.bvh
Normal file
BIN
examples/rt/teapot.bvh
Normal file
Binary file not shown.
BIN
examples/rt/teapot.camera
Normal file
BIN
examples/rt/teapot.camera
Normal file
Binary file not shown.
25
examples/simple/Makefile
Normal file
25
examples/simple/Makefile
Normal file
@@ -0,0 +1,25 @@
|
||||
|
||||
CXX=g++
|
||||
CXXFLAGS=-Iobjs/ -O3 -Wall
|
||||
ISPC=ispc
|
||||
ISPCFLAGS=-O2
|
||||
|
||||
default: simple
|
||||
|
||||
.PHONY: dirs clean
|
||||
.PRECIOUS: objs/simple.h
|
||||
|
||||
dirs:
|
||||
/bin/mkdir -p objs/
|
||||
|
||||
clean:
|
||||
/bin/rm -rf objs *~ simple
|
||||
|
||||
simple: dirs objs/simple.o objs/simple_ispc.o
|
||||
$(CXX) $(CXXFLAGS) -o $@ objs/simple.o objs/simple_ispc.o
|
||||
|
||||
objs/simple.o: simple.cpp objs/simple_ispc.h
|
||||
$(CXX) $(CXXFLAGS) -c -o $@ $<
|
||||
|
||||
objs/%_ispc.h objs/%_ispc.o: %.ispc
|
||||
$(ISPC) $(ISPCFLAGS) $< -o objs/$*_ispc.o -h objs/$*_ispc.h
|
||||
63
examples/simple/simple.cpp
Normal file
63
examples/simple/simple.cpp
Normal file
@@ -0,0 +1,63 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdio.h>
|
||||
|
||||
// Include the header file that the ispc compiler generates
|
||||
#include "simple_ispc.h"
|
||||
using namespace ispc;
|
||||
|
||||
int main() {
|
||||
// Pointers passed to ispc-compiled code are currently required to have
|
||||
// alignment equal to the target's native vector size. Here we align
|
||||
// to 32 bytes to be safe for both SSE and AVX targets.
|
||||
#ifdef _MSC_VER
|
||||
__declspec(align(32)) float vin[16], vout[16];
|
||||
#else
|
||||
float vin[16] __attribute__((aligned(32)));
|
||||
float vout[16] __attribute__((aligned(32)));
|
||||
#endif
|
||||
|
||||
// Initialize input buffer
|
||||
for (int i = 0; i < 16; ++i)
|
||||
vin[i] = (float)i;
|
||||
|
||||
// Call simple() function from simple.ispc file
|
||||
simple(vin, vout, 16);
|
||||
|
||||
// Print results
|
||||
for (int i = 0; i < 16; ++i)
|
||||
printf("%d: simple(%f) = %f\n", i, vin[i], vout[i]);
|
||||
|
||||
return 0;
|
||||
}
|
||||
53
examples/simple/simple.ispc
Normal file
53
examples/simple/simple.ispc
Normal file
@@ -0,0 +1,53 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
|
||||
export void simple(uniform float vin[], uniform float vout[],
|
||||
uniform int count) {
|
||||
// Compute the result for 'programCount' values in parallel
|
||||
for (uniform int i = 0; i < count; i += programCount) {
|
||||
int index = i + programIndex;
|
||||
// Load the appropriate input value for this program instance.
|
||||
float v = vin[index];
|
||||
|
||||
// Do an arbitrary little computation, but at least make the
|
||||
// computation dependent on the value being processed
|
||||
if (v < 3.)
|
||||
v = v * v;
|
||||
else
|
||||
v = sqrt(v);
|
||||
|
||||
// And write the result to the output array.
|
||||
vout[index] = v;
|
||||
}
|
||||
}
|
||||
164
examples/simple/simple.vcxproj
Executable file
164
examples/simple/simple.vcxproj
Executable file
@@ -0,0 +1,164 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Debug|x64">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|x64">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>x64</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="simple.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="simple.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
cl /E /TP %(Filename).ispc | ispc -O2 -o %(Filename).obj -h %(Filename)_ispc.h
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%(Filename).obj</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">%(Filename).obj</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h --arch=x86
|
||||
</Command>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
cl /E /TP %(Filename).ispc | ispc -O2 - -o %(Filename).obj -h %(Filename)_ispc.h
|
||||
</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%(Filename).obj</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|x64'">%(Filename).obj</Outputs>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{947C5311-8B78-4D05-BEE4-BCF342D4B367}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>simple</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
67
examples/timing.h
Normal file
67
examples/timing.h
Normal file
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#include <stdint.h>
|
||||
|
||||
|
||||
#ifdef WIN32
|
||||
#include <windows.h>
|
||||
#define rdtsc __rdtsc
|
||||
#else
|
||||
extern "C" {
|
||||
__inline__ uint64_t rdtsc() {
|
||||
uint32_t low, high;
|
||||
__asm__ __volatile__ (
|
||||
"xorl %%eax,%%eax \n cpuid"
|
||||
::: "%rax", "%rbx", "%rcx", "%rdx" );
|
||||
__asm__ __volatile__ (
|
||||
"rdtsc" : "=a" (low), "=d" (high));
|
||||
return (uint64_t)high << 32 | low;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
|
||||
static uint64_t start, end;
|
||||
|
||||
static inline void reset_and_start_timer()
|
||||
{
|
||||
start = rdtsc();
|
||||
}
|
||||
|
||||
/* Returns the number of millions of elapsed processor cycles since the
|
||||
last reset_and_start_timer() call. */
|
||||
static inline double get_elapsed_mcycles()
|
||||
{
|
||||
end = rdtsc();
|
||||
return (end-start) / (1024. * 1024.);
|
||||
}
|
||||
543
expr.h
Normal file
543
expr.h
Normal file
@@ -0,0 +1,543 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file expr.h
|
||||
@brief Expr abstract base class and expression implementations
|
||||
*/
|
||||
|
||||
#ifndef ISPC_EXPR_H
|
||||
#define ISPC_EXPR_H 1
|
||||
|
||||
#include "ispc.h"
|
||||
|
||||
class FunctionSymbolExpr;
|
||||
|
||||
/** @brief Expr is the abstract base class that defines the interface that
|
||||
all expression types must implement.
|
||||
*/
|
||||
class Expr : public ASTNode {
|
||||
public:
|
||||
Expr(SourcePos p) : ASTNode(p) { }
|
||||
|
||||
/** This is the main method for Expr implementations to implement. It
|
||||
should call methods in the FunctionEmitContext to emit LLVM IR
|
||||
instructions to the current basic block in order to generate an
|
||||
llvm::Value that represents the expression's value. */
|
||||
virtual llvm::Value *GetValue(FunctionEmitContext *ctx) const = 0;
|
||||
|
||||
/** For expressions that can provide an lvalue (e.g. array indexing),
|
||||
this function should emit IR that computes the expression's lvalue
|
||||
and returns the corresponding llvm::Value. Expressions that can't
|
||||
provide an lvalue should leave this unimplemented; the default
|
||||
implementation returns NULL. */
|
||||
virtual llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
|
||||
|
||||
/** Returns the Type of the expression. */
|
||||
virtual const Type *GetType() const = 0;
|
||||
|
||||
/** For expressions that have values based on a symbol (e.g. regular
|
||||
symbol references, array indexing, etc.), this returns a pointer to
|
||||
that symbol. */
|
||||
virtual Symbol *GetBaseSymbol() const;
|
||||
|
||||
/** If this is a constant expression that can be converted to a
|
||||
constant of the given type, this method should return the
|
||||
corresponding llvm::Constant value. Otherwise it should return
|
||||
NULL. */
|
||||
virtual llvm::Constant *GetConstant(const Type *type) const;
|
||||
|
||||
/** This method should perform early optimizations of the expression
|
||||
(constant folding, etc.) and return a pointer to the resulting
|
||||
expression. If an error is encountered during optimization, NULL
|
||||
should be returned. */
|
||||
virtual Expr *Optimize() = 0;
|
||||
|
||||
/** This method should perform type checking of the expression and
|
||||
return a pointer to the resulting expression. If an error is
|
||||
encountered, NULL should be returned. */
|
||||
virtual Expr *TypeCheck() = 0;
|
||||
|
||||
/** Prints the expression to standard output (used for debugging). */
|
||||
virtual void Print() const = 0;
|
||||
|
||||
/** This method tries to convert the expression to the given type. In
|
||||
the event of failure, if the failureOk parameter is true, then no
|
||||
error is issued. If failureOk is false, then an error is printed
|
||||
that incorporates the given error message string. In either
|
||||
failure case, NULL is returned. */
|
||||
Expr *TypeConv(const Type *type, const char *errorMsgBase = NULL,
|
||||
bool failureOk = false);
|
||||
};
|
||||
|
||||
|
||||
/** @brief Unary expression */
|
||||
class UnaryExpr : public Expr {
|
||||
public:
|
||||
enum Op {
|
||||
PreInc, ///< Pre-increment
|
||||
PreDec, ///< Pre-decrement
|
||||
PostInc, ///< Post-increment
|
||||
PostDec, ///< Post-decrement
|
||||
Negate, ///< Negation
|
||||
LogicalNot, ///< Logical not
|
||||
BitNot, ///< Bit not
|
||||
};
|
||||
|
||||
UnaryExpr(Op op, Expr *expr, SourcePos pos);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
void Print() const;
|
||||
Expr *Optimize();
|
||||
Expr *TypeCheck();
|
||||
|
||||
private:
|
||||
const Op op;
|
||||
Expr *expr;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Binary expression */
|
||||
class BinaryExpr : public Expr {
|
||||
public:
|
||||
enum Op {
|
||||
Add, ///< Addition
|
||||
Sub, ///< Subtraction
|
||||
Mul, ///< Multiplication
|
||||
Div, ///< Division
|
||||
Mod, ///< Modulus
|
||||
Shl, ///< Shift left
|
||||
Shr, ///< Shift right
|
||||
|
||||
Lt, ///< Less than
|
||||
Gt, ///< Greater than
|
||||
Le, ///< Less than or equal
|
||||
Ge, ///< Greater than or equal
|
||||
Equal, ///< Equal
|
||||
NotEqual, ///< Not equal
|
||||
|
||||
BitAnd, ///< Bitwise AND
|
||||
BitXor, ///< Bitwise XOR
|
||||
BitOr, ///< Bitwise OR
|
||||
LogicalAnd, ///< Logical AND
|
||||
LogicalOr, ///< Logical OR
|
||||
|
||||
Comma, ///< Comma operator
|
||||
};
|
||||
|
||||
BinaryExpr(Op o, Expr *a, Expr *b, SourcePos p);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
void Print() const;
|
||||
|
||||
Expr *Optimize();
|
||||
Expr *TypeCheck();
|
||||
|
||||
private:
|
||||
const Op op;
|
||||
Expr *arg0, *arg1;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Assignment expression */
|
||||
class AssignExpr : public Expr {
|
||||
public:
|
||||
enum Op {
|
||||
Assign, ///< Regular assignment
|
||||
MulAssign, ///< *= assignment
|
||||
DivAssign, ///< /= assignment
|
||||
ModAssign, ///< %= assignment
|
||||
AddAssign, ///< += assignment
|
||||
SubAssign, ///< -= assignment
|
||||
ShlAssign, ///< <<= assignment
|
||||
ShrAssign, ///< >>= assignment
|
||||
AndAssign, ///< &= assignment
|
||||
XorAssign, ///< ^= assignment
|
||||
OrAssign, ///< |= assignment
|
||||
};
|
||||
|
||||
AssignExpr(Op o, Expr *a, Expr *b, SourcePos p);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
void Print() const;
|
||||
|
||||
Expr *Optimize();
|
||||
Expr *TypeCheck();
|
||||
|
||||
private:
|
||||
const Op op;
|
||||
Expr *lvalue, *rvalue;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Selection expression, corresponding to "test ? a : b".
|
||||
|
||||
Returns the value of "a" or "b", depending on the value of "test".
|
||||
*/
|
||||
class SelectExpr : public Expr {
|
||||
public:
|
||||
SelectExpr(Expr *test, Expr *a, Expr *b, SourcePos p);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
void Print() const;
|
||||
|
||||
Expr *Optimize();
|
||||
Expr *TypeCheck();
|
||||
|
||||
private:
|
||||
Expr *test, *expr1, *expr2;
|
||||
};
|
||||
|
||||
|
||||
/** @brief A list of expressions.
|
||||
|
||||
These are mostly used for representing curly-brace delimited
|
||||
initializers for initializers for complex types and for representing
|
||||
the arguments passed to a function call.
|
||||
*/
|
||||
class ExprList : public Expr {
|
||||
public:
|
||||
ExprList(SourcePos p) : Expr(p) { }
|
||||
ExprList(Expr *e, SourcePos p) : Expr(p) { exprs.push_back(e); }
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
void Print() const;
|
||||
llvm::Constant *GetConstant(const Type *type) const;
|
||||
ExprList *Optimize();
|
||||
ExprList *TypeCheck();
|
||||
|
||||
std::vector<Expr *> exprs;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Expression representing a function call.
|
||||
*/
|
||||
class FunctionCallExpr : public Expr {
|
||||
public:
|
||||
FunctionCallExpr(Expr *func, ExprList *args, SourcePos p, bool isLaunch);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
void Print() const;
|
||||
|
||||
Expr *Optimize();
|
||||
Expr *TypeCheck();
|
||||
|
||||
private:
|
||||
Expr *func;
|
||||
ExprList *args;
|
||||
bool isLaunch;
|
||||
|
||||
void resolveFunctionOverloads();
|
||||
bool tryResolve(bool (*matchFunc)(Expr *, const Type *));
|
||||
};
|
||||
|
||||
|
||||
/** @brief Expression representing indexing into something with an integer
|
||||
offset.
|
||||
|
||||
This is used for both array indexing and indexing into VectorTypes.
|
||||
*/
|
||||
class IndexExpr : public Expr {
|
||||
public:
|
||||
IndexExpr(Expr *arrayOrVector, Expr *index, SourcePos p);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
Symbol *GetBaseSymbol() const;
|
||||
void Print() const;
|
||||
|
||||
Expr *Optimize();
|
||||
Expr *TypeCheck();
|
||||
|
||||
private:
|
||||
Expr *arrayOrVector, *index;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Expression representing member selection ("foo.bar").
|
||||
*/
|
||||
class MemberExpr : public Expr {
|
||||
public:
|
||||
MemberExpr(Expr *expr, const char *identifier, SourcePos pos,
|
||||
SourcePos identifierPos);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
Symbol *GetBaseSymbol() const;
|
||||
void Print() const;
|
||||
Expr *Optimize();
|
||||
Expr *TypeCheck();
|
||||
|
||||
private:
|
||||
std::string getCandidateNearMatches() const;
|
||||
int getElementNumber() const;
|
||||
|
||||
Expr *expr;
|
||||
std::string identifier;
|
||||
const SourcePos identifierPos;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Expression representing a compile-time constant value.
|
||||
|
||||
This class can currently represent compile-time constants of anything
|
||||
that is an AtomicType; for anything more complex, we don't currently
|
||||
have a representation of a compile-time constant that can be further
|
||||
reasoned about.
|
||||
*/
|
||||
class ConstExpr : public Expr {
|
||||
public:
|
||||
/** Create a ConstExpr from a uniform int32 value */
|
||||
ConstExpr(const Type *t, int32_t i, SourcePos p);
|
||||
/** Create a ConstExpr from a varying int32 value */
|
||||
ConstExpr(const Type *t, int32_t *i, SourcePos p);
|
||||
/** Create a ConstExpr from a uniform uint32 value */
|
||||
ConstExpr(const Type *t, uint32_t u, SourcePos p);
|
||||
/** Create a ConstExpr from a varying uint32 value */
|
||||
ConstExpr(const Type *t, uint32_t *u, SourcePos p);
|
||||
/** Create a ConstExpr from a uniform float value */
|
||||
ConstExpr(const Type *t, float f, SourcePos p);
|
||||
/** Create a ConstExpr from a varying float value */
|
||||
ConstExpr(const Type *t, float *f, SourcePos p);
|
||||
/** Create a ConstExpr from a uniform double value */
|
||||
ConstExpr(const Type *t, double d, SourcePos p);
|
||||
/** Create a ConstExpr from a varying double value */
|
||||
ConstExpr(const Type *t, double *d, SourcePos p);
|
||||
/** Create a ConstExpr from a uniform int64 value */
|
||||
ConstExpr(const Type *t, int64_t i, SourcePos p);
|
||||
/** Create a ConstExpr from a varying int64 value */
|
||||
ConstExpr(const Type *t, int64_t *i, SourcePos p);
|
||||
/** Create a ConstExpr from a uniform uint64 value */
|
||||
ConstExpr(const Type *t, uint64_t i, SourcePos p);
|
||||
/** Create a ConstExpr from a varying uint64 value */
|
||||
ConstExpr(const Type *t, uint64_t *i, SourcePos p);
|
||||
/** Create a ConstExpr from a uniform bool value */
|
||||
ConstExpr(const Type *t, bool b, SourcePos p);
|
||||
/** Create a ConstExpr from a varying bool value */
|
||||
ConstExpr(const Type *t, bool *b, SourcePos p);
|
||||
/** Create a ConstExpr of the same type as the given old ConstExpr,
|
||||
with values given by the "vales" parameter. */
|
||||
ConstExpr(ConstExpr *old, double *values);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
void Print() const;
|
||||
llvm::Constant *GetConstant(const Type *type) const;
|
||||
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
|
||||
/** Return the ConstExpr's values as booleans, doing type conversion
|
||||
from the actual type if needed. If forceVarying is true, then type
|
||||
convert to 'varying' so as to always return a number of values
|
||||
equal to the target vector width into the given pointer. */
|
||||
int AsBool(bool *, bool forceVarying = false) const;
|
||||
|
||||
/** Return the ConstExpr's values as int32s, doing type conversion
|
||||
from the actual type if needed. If forceVarying is true, then type
|
||||
convert to 'varying' so as to always return a number of values
|
||||
equal to the target vector width into the given pointer. */
|
||||
int AsInt32(int32_t *, bool forceVarying = false) const;
|
||||
|
||||
/** Return the ConstExpr's values as uint32s, doing type conversion
|
||||
from the actual type if needed. If forceVarying is true, then type
|
||||
convert to 'varying' so as to always return a number of values
|
||||
equal to the target vector width into the given pointer. */
|
||||
int AsUInt32(uint32_t *, bool forceVarying = false) const;
|
||||
|
||||
/** Return the ConstExpr's values as floats, doing type conversion
|
||||
from the actual type if needed. If forceVarying is true, then type
|
||||
convert to 'varying' so as to always return a number of values
|
||||
equal to the target vector width into the given pointer. */
|
||||
int AsFloat(float *, bool forceVarying = false) const;
|
||||
|
||||
/** Return the ConstExpr's values as int64s, doing type conversion
|
||||
from the actual type if needed. If forceVarying is true, then type
|
||||
convert to 'varying' so as to always return a number of values
|
||||
equal to the target vector width into the given pointer. */
|
||||
int AsInt64(int64_t *, bool forceVarying = false) const;
|
||||
|
||||
/** Return the ConstExpr's values as uint64s, doing type conversion
|
||||
from the actual type if needed. If forceVarying is true, then type
|
||||
convert to 'varying' so as to always return a number of values
|
||||
equal to the target vector width into the given pointer. */
|
||||
int AsUInt64(uint64_t *, bool forceVarying = false) const;
|
||||
|
||||
/** Return the ConstExpr's values as doubles, doing type conversion
|
||||
from the actual type if needed. If forceVarying is true, then type
|
||||
convert to 'varying' so as to always return a number of values
|
||||
equal to the target vector width into the given pointer. */
|
||||
int AsDouble(double *, bool forceVarying = false) const;
|
||||
|
||||
/** Return the number of values in the ConstExpr; should be either 1,
|
||||
if it has uniform type, or the target's vector width if it's
|
||||
varying. */
|
||||
int Count() const;
|
||||
|
||||
private:
|
||||
const AtomicType *type;
|
||||
union {
|
||||
int32_t int32Val[ISPC_MAX_NVEC];
|
||||
uint32_t uint32Val[ISPC_MAX_NVEC];
|
||||
bool boolVal[ISPC_MAX_NVEC];
|
||||
float floatVal[ISPC_MAX_NVEC];
|
||||
double doubleVal[ISPC_MAX_NVEC];
|
||||
int64_t int64Val[ISPC_MAX_NVEC];
|
||||
uint64_t uint64Val[ISPC_MAX_NVEC];
|
||||
};
|
||||
};
|
||||
|
||||
|
||||
/** @brief Expression representing a type cast of the given expression to a
|
||||
probably-different type. */
|
||||
class TypeCastExpr : public Expr {
|
||||
public:
|
||||
TypeCastExpr(const Type *t, Expr *e, SourcePos p);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
void Print() const;
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
|
||||
private:
|
||||
const Type *type;
|
||||
Expr *expr;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Expression that represents taking a reference of a (non-reference)
|
||||
variable. */
|
||||
class ReferenceExpr : public Expr {
|
||||
public:
|
||||
ReferenceExpr(Expr *e, SourcePos p);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
Symbol *GetBaseSymbol() const;
|
||||
void Print() const;
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
|
||||
private:
|
||||
Expr *expr;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Expression that represents dereferencing a reference to get its
|
||||
value. */
|
||||
class DereferenceExpr : public Expr {
|
||||
public:
|
||||
DereferenceExpr(Expr *e, SourcePos p);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
Symbol *GetBaseSymbol() const;
|
||||
void Print() const;
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
|
||||
private:
|
||||
Expr *expr;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Expression representing a symbol reference in the program */
|
||||
class SymbolExpr : public Expr {
|
||||
public:
|
||||
SymbolExpr(Symbol *s, SourcePos p);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
llvm::Value *GetLValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
Symbol *GetBaseSymbol() const;
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
void Print() const;
|
||||
|
||||
private:
|
||||
Symbol *symbol;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Expression representing a function symbol in the program (generally
|
||||
used for a function call).
|
||||
*/
|
||||
class FunctionSymbolExpr : public Expr {
|
||||
public:
|
||||
FunctionSymbolExpr(std::vector<Symbol *> *candidateFunctions,
|
||||
SourcePos pos);
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
Symbol *GetBaseSymbol() const;
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
void Print() const;
|
||||
|
||||
private:
|
||||
friend class FunctionCallExpr;
|
||||
|
||||
/** All of the functions with the name given in the function call;
|
||||
there may be more then one, in which case we need to resolve which
|
||||
overload is the best match. */
|
||||
std::vector<Symbol *> *candidateFunctions;
|
||||
|
||||
/** The actual matching function found after overload resolution; this
|
||||
value is set by FunctionCallExpr::resolveFunctionOverloads() */
|
||||
Symbol *matchingFunc;
|
||||
};
|
||||
|
||||
|
||||
/** @brief A sync statement in the program (waits for all launched tasks before
|
||||
proceeding). */
|
||||
class SyncExpr : public Expr {
|
||||
public:
|
||||
SyncExpr(SourcePos p) : Expr(p) { }
|
||||
|
||||
llvm::Value *GetValue(FunctionEmitContext *ctx) const;
|
||||
const Type *GetType() const;
|
||||
Expr *TypeCheck();
|
||||
Expr *Optimize();
|
||||
void Print() const;
|
||||
};
|
||||
|
||||
#endif // ISPC_EXPR_H
|
||||
19
failing_tests/max-uint-1.ispc
Normal file
19
failing_tests/max-uint-1.ispc
Normal file
@@ -0,0 +1,19 @@
|
||||
static float float4(uniform float a, uniform float b, uniform float c,
|
||||
uniform float d) {
|
||||
float ret = 0;
|
||||
for (uniform int i = 0; i < programCount; i += 4) {
|
||||
ret = insert(ret, i + 0, a);
|
||||
ret = insert(ret, i + 1, b);
|
||||
ret = insert(ret, i + 2, c);
|
||||
ret = insert(ret, i + 3, d);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
export float f_f(float a) {
|
||||
unsigned int i = (unsigned int)a;
|
||||
return max((unsigned int)2, i);
|
||||
}
|
||||
|
||||
export float result() { return float4(2,2,3,4); }
|
||||
|
||||
8
failing_tests/max-uint.ispc
Normal file
8
failing_tests/max-uint.ispc
Normal file
@@ -0,0 +1,8 @@
|
||||
|
||||
export float f_f(float a) {
|
||||
unsigned int i = (unsigned int)a;
|
||||
return max((unsigned int)10, i);
|
||||
}
|
||||
|
||||
export float result() { return 10; }
|
||||
|
||||
19
failing_tests/min-uint-1.ispc
Normal file
19
failing_tests/min-uint-1.ispc
Normal file
@@ -0,0 +1,19 @@
|
||||
static float float4(uniform float a, uniform float b, uniform float c,
|
||||
uniform float d) {
|
||||
float ret = 0;
|
||||
for (uniform int i = 0; i < programCount; i += 4) {
|
||||
ret = insert(ret, i + 0, a);
|
||||
ret = insert(ret, i + 1, b);
|
||||
ret = insert(ret, i + 2, c);
|
||||
ret = insert(ret, i + 3, d);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
export float f_f(float a) {
|
||||
unsigned int i = (unsigned int)a;
|
||||
return min((unsigned int)2, i);
|
||||
}
|
||||
|
||||
export float result() { return float4(1,2,2,2); }
|
||||
|
||||
19
failing_tests/min-uint-2.ispc
Normal file
19
failing_tests/min-uint-2.ispc
Normal file
@@ -0,0 +1,19 @@
|
||||
static float float4(uniform float a, uniform float b, uniform float c,
|
||||
uniform float d) {
|
||||
float ret = 0;
|
||||
for (uniform int i = 0; i < programCount; i += 4) {
|
||||
ret = insert(ret, i + 0, a);
|
||||
ret = insert(ret, i + 1, b);
|
||||
ret = insert(ret, i + 2, c);
|
||||
ret = insert(ret, i + 3, d);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
|
||||
export float f_f(float a) {
|
||||
unsigned int i = (unsigned int)a;
|
||||
return min((unsigned int)20, i);
|
||||
}
|
||||
|
||||
export float result() { return float4(1,2,3,4); }
|
||||
|
||||
11
failing_tests/struct-array-assign.ispc
Normal file
11
failing_tests/struct-array-assign.ispc
Normal file
@@ -0,0 +1,11 @@
|
||||
|
||||
struct Foo {
|
||||
float f;
|
||||
};
|
||||
|
||||
|
||||
export float foo(Foo f[], int i, uniform int j) {
|
||||
Foo x = f[i];
|
||||
return x.f;
|
||||
}
|
||||
|
||||
137
ispc.cpp
Normal file
137
ispc.cpp
Normal file
@@ -0,0 +1,137 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file ispc.cpp
|
||||
@brief ispc global definitions
|
||||
*/
|
||||
|
||||
#include "ispc.h"
|
||||
#include "module.h"
|
||||
#include "util.h"
|
||||
#include <stdio.h>
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#include <windows.h>
|
||||
#include <direct.h>
|
||||
#endif
|
||||
#include <llvm/LLVMContext.h>
|
||||
#include <llvm/Module.h>
|
||||
#ifndef LLVM_2_8
|
||||
#include <llvm/Analysis/DIBuilder.h>
|
||||
#endif
|
||||
#include <llvm/Analysis/DebugInfo.h>
|
||||
#include <llvm/Support/Dwarf.h>
|
||||
|
||||
Globals *g;
|
||||
Module *m;
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Target
|
||||
|
||||
Target::Target() {
|
||||
arch = "x86-64";
|
||||
cpu = "nehalem";
|
||||
isa = SSE4;
|
||||
nativeVectorWidth = 4;
|
||||
vectorWidth = 4;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Opt
|
||||
|
||||
Opt::Opt() {
|
||||
level = 1;
|
||||
fastMath = false;
|
||||
disableBlendedMaskedStores = false;
|
||||
disableCoherentControlFlow = false;
|
||||
disableUniformControlFlow = false;
|
||||
disableGatherScatterOptimizations = false;
|
||||
disableMaskedStoreToStore = false;
|
||||
disableGatherScatterFlattening = false;
|
||||
disableUniformMemoryOptimizations = false;
|
||||
disableMaskedStoreOptimizations = false;
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Globals
|
||||
|
||||
Globals::Globals() {
|
||||
mathLib = Globals::Math_ISPC;
|
||||
|
||||
includeStdlib = true;
|
||||
runCPP = true;
|
||||
debugPrint = false;
|
||||
disableWarnings = false;
|
||||
emitPerfWarnings = true;
|
||||
emitInstrumentation = false;
|
||||
generateDebuggingSymbols = false;
|
||||
|
||||
ctx = new llvm::LLVMContext;
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
_getcwd(currentDirectory, sizeof(currentDirectory));
|
||||
#else
|
||||
getcwd(currentDirectory, sizeof(currentDirectory));
|
||||
#endif
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// ASTNode
|
||||
|
||||
ASTNode::~ASTNode() {
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// SourcePos
|
||||
|
||||
SourcePos::SourcePos(const char *n, int l, int c) {
|
||||
name = n ? n : m->module->getModuleIdentifier().c_str();
|
||||
first_line = last_line = l;
|
||||
first_column = last_column = c;
|
||||
}
|
||||
|
||||
llvm::DIFile SourcePos::GetDIFile() const {
|
||||
#ifdef LLVM_2_8
|
||||
return llvm::DIFile();
|
||||
#else
|
||||
std::string directory, filename;
|
||||
GetDirectoryAndFileName(g->currentDirectory, name, &directory, &filename);
|
||||
return m->diBuilder->createFile(filename, directory);
|
||||
#endif // LLVM_2_8
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
SourcePos::Print() const {
|
||||
printf(" @ [%s:%d.%d - %d.%d] ", name, first_line, first_column,
|
||||
last_line, last_column);
|
||||
}
|
||||
313
ispc.h
Normal file
313
ispc.h
Normal file
@@ -0,0 +1,313 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file ispc.h
|
||||
@brief Main ispc.header file
|
||||
*/
|
||||
|
||||
#ifndef ISPC_H
|
||||
#define ISPC_H
|
||||
|
||||
#if defined(_WIN32) || defined(_WIN64)
|
||||
#define ISPC_IS_WINDOWS
|
||||
#elif defined(__linux__)
|
||||
#define ISPC_IS_LINUX
|
||||
#elif defined(__APPLE__)
|
||||
#define ISPC_IS_APPLE
|
||||
#endif
|
||||
|
||||
#include <assert.h>
|
||||
#include <stdint.h>
|
||||
#include <vector>
|
||||
#include <string>
|
||||
|
||||
/** @def ISPC_MAX_NVEC maximum vector size of any of the compliation
|
||||
targets.
|
||||
*/
|
||||
#define ISPC_MAX_NVEC 16
|
||||
|
||||
// Forward declarations of a number of widely-used LLVM types
|
||||
namespace llvm {
|
||||
class BasicBlock;
|
||||
class Constant;
|
||||
class ConstantValue;
|
||||
class DIBuilder;
|
||||
class DIDescriptor;
|
||||
class DIFile;
|
||||
class DIType;
|
||||
class Function;
|
||||
class FunctionType;
|
||||
class LLVMContext;
|
||||
class Module;
|
||||
class Type;
|
||||
class Value;
|
||||
}
|
||||
|
||||
class ArrayType;
|
||||
class AtomicType;
|
||||
class DeclSpecs;
|
||||
class Declaration;
|
||||
class Declarator;
|
||||
class FunctionEmitContext;
|
||||
class Expr;
|
||||
class ExprList;
|
||||
class FunctionType;
|
||||
class GatherBuffer;
|
||||
class Module;
|
||||
class Stmt;
|
||||
class Symbol;
|
||||
class SymbolTable;
|
||||
class Type;
|
||||
|
||||
/** @brief Representation of a range of positions in a source file.
|
||||
|
||||
This class represents a range of characters in a source file
|
||||
(e.g. those that span a token's definition), from starting line and
|
||||
column to ending line and column. (These values are tracked by the
|
||||
lexing code). Both lines and columns are counted starting from one.
|
||||
*/
|
||||
struct SourcePos {
|
||||
SourcePos(const char *n = NULL, int l = 0, int c = 0);
|
||||
|
||||
const char *name;
|
||||
int first_line;
|
||||
int first_column;
|
||||
int last_line;
|
||||
int last_column;
|
||||
|
||||
/** Prints the filename and line/column range to standard output. */
|
||||
void Print() const;
|
||||
|
||||
/** Returns a LLVM DIFile object that represents the SourcePos's file */
|
||||
llvm::DIFile GetDIFile() const;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Abstract base class for nodes in the abstract syntax tree (AST).
|
||||
|
||||
This class defines a basic interface that all abstract syntax tree
|
||||
(AST) nodes must implement. The base classes for both expressions
|
||||
(Expr) and statements (Stmt) inherit from this class.
|
||||
*/
|
||||
class ASTNode {
|
||||
public:
|
||||
ASTNode(SourcePos p) : pos(p) { }
|
||||
virtual ~ASTNode();
|
||||
|
||||
/** The Optimize() method should perform any appropriate early-stage
|
||||
optimizations on the node (e.g. constant folding). The caller
|
||||
should use the returned ASTNode * in place of the original node.
|
||||
This method may return NULL if an error is encountered during
|
||||
optimization. */
|
||||
virtual ASTNode *Optimize() = 0;
|
||||
|
||||
/** Type checking should be performed by the node when this method is
|
||||
called. In the event of an error, a NULL value may be returned.
|
||||
As with ASTNode::Optimize(), the caller should store the returned
|
||||
pointer in place of the original ASTNode *. */
|
||||
virtual ASTNode *TypeCheck() = 0;
|
||||
|
||||
/** All AST nodes must track the file position where they are
|
||||
defined. */
|
||||
const SourcePos pos;
|
||||
};
|
||||
|
||||
/** @brief Structure that defines a compilation target
|
||||
|
||||
This structure defines a compilation target for the ispc compiler.
|
||||
*/
|
||||
struct Target {
|
||||
Target();
|
||||
|
||||
/** Enumerant giving the instruction sets that the compiler can
|
||||
target. */
|
||||
enum ISA { SSE2, SSE4, AVX };
|
||||
|
||||
/** Instruction set being compiled to. */
|
||||
ISA isa;
|
||||
|
||||
/** Target system architecture. (e.g. "x86-64", "x86"). */
|
||||
std::string arch;
|
||||
|
||||
/** Target CPU. (e.g. "corei7", "corei7-avx", ..) */
|
||||
std::string cpu;
|
||||
|
||||
/** Native vector width of the vector instruction set. Note that this
|
||||
value is directly derived from the ISA Being used (e.g. it's 4 for
|
||||
SSE, 8 for AVX, etc.) */
|
||||
int nativeVectorWidth;
|
||||
|
||||
/** Actual vector width currently being compiled to. This may be an
|
||||
integer multiple of the native vector width, for example if we're
|
||||
"doubling up" and compiling 8-wide on a 4-wide SSE system. */
|
||||
int vectorWidth;
|
||||
};
|
||||
|
||||
/** @brief Structure that collects optimization options
|
||||
|
||||
This structure collects all of the options related to optimization of
|
||||
generated code.
|
||||
*/
|
||||
struct Opt {
|
||||
Opt();
|
||||
|
||||
/** Optimization level. Currently, the only valid values are 0,
|
||||
indicating essentially no optimization, and 1, indicating as much
|
||||
optimization as possible. */
|
||||
int level;
|
||||
|
||||
/** Indicates whether "fast and loose" numerically unsafe optimizations
|
||||
should be performed. This is false by default. */
|
||||
bool fastMath;
|
||||
|
||||
/** On targets that don't have a masked store instruction but do have a
|
||||
blending instruction, by default, we simulate masked stores by
|
||||
loading the old value, blending, and storing the result. This can
|
||||
potentially be unsafe in multi-threaded code, in that it writes to
|
||||
locations that aren't supposed to be written to. Setting this
|
||||
value to true disables this work-around, and instead implements
|
||||
masked stores by 'scalarizing' them, so that we iterate over the
|
||||
ISIMD lanes and do a scalar write for the ones that are running. */
|
||||
bool disableBlendedMaskedStores;
|
||||
|
||||
/** Disables the 'coherent control flow' constructs in the
|
||||
language. (e.g. this causes "cif" statements to be demoted to "if"
|
||||
statements.) This is likely only useful for measuring the impact
|
||||
of coherent control flow. */
|
||||
bool disableCoherentControlFlow;
|
||||
|
||||
/** Disables uniform control flow optimizations (e.g. this changes an
|
||||
"if" statement with a uniform condition to have a varying
|
||||
condition). This is likely only useful for measuring the impact of
|
||||
uniform control flow. */
|
||||
bool disableUniformControlFlow;
|
||||
|
||||
/** Disables the backend optimizations related to gather/scatter
|
||||
(e.g. transforming gather from sequential locations to an unaligned
|
||||
load, etc.) This is likely only useful for measuring the impact of
|
||||
these optimizations. */
|
||||
bool disableGatherScatterOptimizations;
|
||||
|
||||
/** Disables the optimization that demotes masked stores to regular
|
||||
stores when the store is happening at the same control flow level
|
||||
where the variable was declared. This is likely only useful for
|
||||
measuring the impact of this optimization. */
|
||||
bool disableMaskedStoreToStore;
|
||||
|
||||
/** Disables the optimization that detects when the execution mask is
|
||||
all on and emits code for gathers and scatters that doesn't loop
|
||||
over the SIMD lanes but just does the scalar loads and stores
|
||||
directly. */
|
||||
bool disableGatherScatterFlattening;
|
||||
|
||||
/** Disables the optimizations that detect when arrays are being
|
||||
indexed with 'uniform' values and issue scalar loads/stores rather
|
||||
than gathers/scatters. This is likely only useful for measuring
|
||||
the impact of this optimization. */
|
||||
bool disableUniformMemoryOptimizations;
|
||||
|
||||
/** Disables optimizations for masked stores: masked stores with the
|
||||
mask all on are transformed to regular stores, and masked stores
|
||||
with the mask are all off are removed (which in turn can allow
|
||||
eliminating additional dead code related to computing the value
|
||||
stored). This is likely only useful for measuring the impact of
|
||||
this optimization. */
|
||||
bool disableMaskedStoreOptimizations;
|
||||
};
|
||||
|
||||
/** @brief This structure collects together a number of global variables.
|
||||
|
||||
This structure collects a number of global variables that mostly
|
||||
represent parameter settings for this compilation run. In particular,
|
||||
none of these values should change after compilation befins; their
|
||||
values are all set during command-line argument processing or very
|
||||
early during the compiler's execution, before any files are parsed.
|
||||
*/
|
||||
struct Globals {
|
||||
Globals();
|
||||
|
||||
/** Optimization option settings */
|
||||
Opt opt;
|
||||
/** Compilation target information */
|
||||
Target target;
|
||||
|
||||
/** There are a number of math libraries that can be used for
|
||||
transcendentals and the like during program compilation. */
|
||||
enum MathLib { Math_ISPC, Math_ISPCFast, Math_SVML, Math_System };
|
||||
MathLib mathLib;
|
||||
|
||||
/** Records whether the ispc standard library should be made available
|
||||
to the program during compilations. (Default is true.) */
|
||||
bool includeStdlib;
|
||||
|
||||
/** Indicates whether the C pre-processor should be run over the
|
||||
program source before compiling it. (Default is true.) */
|
||||
bool runCPP;
|
||||
|
||||
/** When \c true, voluminous debugging output will be printed during
|
||||
ispc's execution. */
|
||||
bool debugPrint;
|
||||
|
||||
/** Indicates whether all warning messages should be surpressed. */
|
||||
bool disableWarnings;
|
||||
|
||||
/** Indicates whether additional warnings should be issued about
|
||||
possible performance pitfalls. */
|
||||
bool emitPerfWarnings;
|
||||
|
||||
/** Indicates whether calls should be emitted in the program to an
|
||||
externally-defined program instrumentation function. (See the
|
||||
"Instrumenting your ispc programs" section in the user's
|
||||
manual.) */
|
||||
bool emitInstrumentation;
|
||||
|
||||
/** Indicates whether ispc should generate debugging symbols for the
|
||||
program in its output. */
|
||||
bool generateDebuggingSymbols;
|
||||
|
||||
/** Global LLVMContext object */
|
||||
llvm::LLVMContext *ctx;
|
||||
|
||||
/** Current working directory when the ispc compiler starts
|
||||
execution. */
|
||||
char currentDirectory[1024];
|
||||
|
||||
/** Arguments to pass along to the C pre-processor, if it is run on the
|
||||
program before compilation. */
|
||||
std::vector<std::string> cppArgs;
|
||||
};
|
||||
|
||||
extern Globals *g;
|
||||
extern Module *m;
|
||||
|
||||
#endif // ISPC_H
|
||||
25
ispc.sln
Executable file
25
ispc.sln
Executable file
@@ -0,0 +1,25 @@
|
||||
|
||||
Microsoft Visual Studio Solution File, Format Version 11.00
|
||||
# Visual Studio 2010
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ispc", "ispc.vcxproj", "{9861F490-F516-480C-B63C-D62A77AFA9D5}"
|
||||
EndProject
|
||||
Project("{8BC9CEB8-8B4A-11D0-8D11-00A0C91BC942}") = "ispc_test", "ispc_test.vcxproj", "{92547BA8-BE86-4E78-8799-1D72A70E5831}"
|
||||
EndProject
|
||||
Global
|
||||
GlobalSection(SolutionConfigurationPlatforms) = preSolution
|
||||
Debug|Win32 = Debug|Win32
|
||||
Release|Win32 = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(ProjectConfigurationPlatforms) = postSolution
|
||||
{9861F490-F516-480C-B63C-D62A77AFA9D5}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{9861F490-F516-480C-B63C-D62A77AFA9D5}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{9861F490-F516-480C-B63C-D62A77AFA9D5}.Release|Win32.ActiveCfg = Release|Win32
|
||||
{9861F490-F516-480C-B63C-D62A77AFA9D5}.Release|Win32.Build.0 = Release|Win32
|
||||
{92547BA8-BE86-4E78-8799-1D72A70E5831}.Debug|Win32.ActiveCfg = Debug|Win32
|
||||
{92547BA8-BE86-4E78-8799-1D72A70E5831}.Debug|Win32.Build.0 = Debug|Win32
|
||||
{92547BA8-BE86-4E78-8799-1D72A70E5831}.Release|Win32.ActiveCfg = Release|Win32
|
||||
EndGlobalSection
|
||||
GlobalSection(SolutionProperties) = preSolution
|
||||
HideSolutionNode = FALSE
|
||||
EndGlobalSection
|
||||
EndGlobal
|
||||
216
ispc.vcxproj
Executable file
216
ispc.vcxproj
Executable file
@@ -0,0 +1,216 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="builtins.cpp" />
|
||||
<ClCompile Include="ctx.cpp" />
|
||||
<ClCompile Include="decl.cpp" />
|
||||
<ClCompile Include="expr.cpp" />
|
||||
<ClCompile Include="gen-bitcode-avx.cpp" />
|
||||
<ClCompile Include="gen-bitcode-c.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse2.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse4.cpp" />
|
||||
<ClCompile Include="gen-bitcode-sse4x2.cpp" />
|
||||
<ClCompile Include="gen-stdlib.cpp" />
|
||||
<ClCompile Include="ispc.cpp" />
|
||||
<ClCompile Include="lex.cc" />
|
||||
<ClCompile Include="llvmutil.cpp" />
|
||||
<ClCompile Include="module.cpp" />
|
||||
<ClCompile Include="main.cpp" />
|
||||
<ClCompile Include="opt.cpp" />
|
||||
<ClCompile Include="parse.cc" />
|
||||
<CustomBuild Include="stdlib-c.c">
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -emit-llvm stdlib-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py stdlib-c.c > gen-bitcode-c.cpp</Command>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">clang stdlib-c.c</Message>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -emit-llvm stdlib-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py stdlib-c.c > gen-bitcode-c.cpp</Command>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">clang stdlib-c.c</Message>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-c.cpp</Outputs>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-c.cpp</Outputs>
|
||||
</CustomBuild>
|
||||
<ClCompile Include="stmt.cpp" />
|
||||
<ClCompile Include="sym.cpp" />
|
||||
<ClCompile Include="type.cpp" />
|
||||
<ClCompile Include="util.cpp" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClInclude Include="builtins.h" />
|
||||
<ClInclude Include="ctx.h" />
|
||||
<ClInclude Include="decl.h" />
|
||||
<ClInclude Include="expr.h" />
|
||||
<ClInclude Include="ispc.h" />
|
||||
<ClInclude Include="llvmutil.h" />
|
||||
<ClInclude Include="module.h" />
|
||||
<ClInclude Include="opt.h" />
|
||||
<ClInclude Include="stmt.h" />
|
||||
<ClInclude Include="sym.h" />
|
||||
<ClInclude Include="type.h" />
|
||||
<ClInclude Include="util.h" />
|
||||
<ClInclude Include="winstuff\unistd.h" />
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="stdlib.ispc">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">cl /EP /TP %(Filename).ispc /DISPC=1 /DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-stdlib.cpp</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">cl /EP /TP %(Filename).ispc /DISPC=1 /DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-stdlib.cpp</Outputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-stdlib.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-stdlib.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="stdlib-sse4.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 stdlib.m4 stdlib-sse4.ll | python bitcode2cpp.py stdlib-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">stdlib.m4;stdlib-sse.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 stdlib.m4 stdlib-sse4.ll | python bitcode2cpp.py stdlib-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">stdlib.m4;stdlib-sse.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="stdlib-sse4x2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 stdlib.m4 stdlib-sse4x2.ll | python bitcode2cpp.py stdlib-sse4x2.ll > gen-bitcode-sse4x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">stdlib.m4;stdlib-sse.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 stdlib.m4 stdlib-sse4x2.ll | python bitcode2cpp.py stdlib-sse4x2.ll > gen-bitcode-sse4x2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4x2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">stdlib.m4;stdlib-sse.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4x2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4x2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="stdlib-sse2.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 stdlib.m4 stdlib-sse2.ll | python bitcode2cpp.py stdlib-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">stdlib.m4;stdlib-sse.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 stdlib.m4 stdlib-sse2.ll | python bitcode2cpp.py stdlib-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse2.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">stdlib.m4;stdlib-sse.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse2.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse2.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="stdlib-avx.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 stdlib.m4 stdlib-avx.ll | python bitcode2cpp.py stdlib-avx.ll > gen-bitcode-avx.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">stdlib.m4;stdlib-sse.ll</AdditionalInputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 stdlib.m4 stdlib-avx.ll | python bitcode2cpp.py stdlib-avx.ll > gen-bitcode-avx.cpp</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx.cpp</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">stdlib.m4;stdlib-sse.ll</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx.cpp</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx.cpp</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<CustomBuild Include="lex.ll">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">flex -t lex.ll > lex.cc</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">lex.cc</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">flex -t lex.ll > lex.cc</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">lex.cc</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc.h;decl.h;parse.hh;sym.h</AdditionalInputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc.h;decl.h;parse.hh;sym.h</AdditionalInputs>
|
||||
</CustomBuild>
|
||||
<CustomBuild Include="parse.yy">
|
||||
<FileType>Document</FileType>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">bison -d -v -t -o parse.cc parse.yy</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">parse.cc;parse.h</Outputs>
|
||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">bison -d -v -t -o parse.cc parse.yy</Command>
|
||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">parse.cc;parse.h</Outputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">ispc.h;type.h;decl.h;expr.h;sym.h;stmt.h</AdditionalInputs>
|
||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">ispc.h;type.h;decl.h;expr.h;sym.h;stmt.h</AdditionalInputs>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Running bison on parse.yy</Message>
|
||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Running bison on parse.yy</Message>
|
||||
</CustomBuild>
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{9861F490-F516-480C-B63C-D62A77AFA9D5}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>ispc</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>NotUsing</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>NOMINMAX</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<DisableSpecificWarnings>4146;4800;4996;4355;4624</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>NotUsing</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>NOMINMAX</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)\include;.;.\winstuff;%(AdditionalIncludeDirectories)</AdditionalIncludeDirectories>
|
||||
<DisableSpecificWarnings>4146;4800;4996;4355;4624</DisableSpecificWarnings>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)\lib;%(AdditionalLibraryDirectories)</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
313
ispc_test.cpp
Normal file
313
ispc_test.cpp
Normal file
@@ -0,0 +1,313 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
#define _CRT_SECURE_NO_WARNINGS
|
||||
|
||||
#include <stdio.h>
|
||||
#include <stdint.h>
|
||||
|
||||
#ifdef ISPC_HAVE_SVML
|
||||
#include <xmmintrin.h>
|
||||
extern "C" {
|
||||
extern __m128 __svml_sinf4(__m128);
|
||||
extern __m128 __svml_cosf4(__m128);
|
||||
extern __m128 __svml_sincosf4(__m128 *,__m128);
|
||||
extern __m128 __svml_tanf4(__m128);
|
||||
extern __m128 __svml_atanf4(__m128);
|
||||
extern __m128 __svml_atan2f4(__m128, __m128);
|
||||
extern __m128 __svml_expf4(__m128);
|
||||
extern __m128 __svml_logf4(__m128);
|
||||
extern __m128 __svml_powf4(__m128, __m128);
|
||||
}
|
||||
#endif
|
||||
|
||||
#include <llvm/LLVMContext.h>
|
||||
#include <llvm/Module.h>
|
||||
#include <llvm/Type.h>
|
||||
#include <llvm/DerivedTypes.h>
|
||||
#include <llvm/Instructions.h>
|
||||
#include <llvm/ExecutionEngine/ExecutionEngine.h>
|
||||
#include <llvm/ExecutionEngine/JIT.h>
|
||||
#include <llvm/Target/TargetSelect.h>
|
||||
#include <llvm/Target/TargetOptions.h>
|
||||
#include <llvm/Target/TargetData.h>
|
||||
#include <llvm/Transforms/Scalar.h>
|
||||
#include <llvm/Transforms/IPO.h>
|
||||
#include <llvm/PassManager.h>
|
||||
#include <llvm/Support/CFG.h>
|
||||
#include <llvm/Analysis/Verifier.h>
|
||||
#include <llvm/Assembly/PrintModulePass.h>
|
||||
#include <llvm/Support/raw_ostream.h>
|
||||
#include <llvm/Bitcode/ReaderWriter.h>
|
||||
#include <llvm/Support/MemoryBuffer.h>
|
||||
#ifndef LLVM_2_8
|
||||
#include <llvm/Support/system_error.h>
|
||||
#endif
|
||||
|
||||
extern "C" {
|
||||
void ISPCLaunch(void *, void *);
|
||||
void ISPCSync();
|
||||
}
|
||||
|
||||
void ISPCLaunch(void *func, void *data) {
|
||||
typedef void (*TaskFuncType)(void *, int, int);
|
||||
TaskFuncType tft = (TaskFuncType)(func);
|
||||
tft(data, 0, 1);
|
||||
}
|
||||
|
||||
|
||||
void ISPCSync() {
|
||||
}
|
||||
|
||||
static void usage(int ret) {
|
||||
fprintf(stderr, "usage: ispc_test\n");
|
||||
fprintf(stderr, "\t[-h/--help]\tprint help\n");
|
||||
fprintf(stderr, "\t<files>\n");
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
static void svml_missing() {
|
||||
fprintf(stderr, "Program called unavailable SVML function!\n");
|
||||
exit(1);
|
||||
}
|
||||
|
||||
static bool lRunTest(const char *fn) {
|
||||
llvm::LLVMContext *ctx = new llvm::LLVMContext;
|
||||
|
||||
#ifdef LLVM_2_8
|
||||
std::string err;
|
||||
llvm::MemoryBuffer *buf = llvm::MemoryBuffer::getFileOrSTDIN(fn, &err);
|
||||
if (!buf) {
|
||||
fprintf(stderr, "Unable to open file \"%s\": %s\n", fn, err.c_str());
|
||||
delete ctx;
|
||||
return false;
|
||||
}
|
||||
std::string bcErr;
|
||||
llvm::Module *module = llvm::ParseBitcodeFile(buf, *ctx, &bcErr);
|
||||
#else
|
||||
llvm::OwningPtr<llvm::MemoryBuffer> buf;
|
||||
llvm::error_code err = llvm::MemoryBuffer::getFileOrSTDIN(fn, buf);
|
||||
if (err) {
|
||||
fprintf(stderr, "Unable to open file \"%s\": %s\n", fn, err.message().c_str());
|
||||
delete ctx;
|
||||
return false;
|
||||
}
|
||||
std::string bcErr;
|
||||
llvm::Module *module = llvm::ParseBitcodeFile(buf.get(), *ctx, &bcErr);
|
||||
#endif
|
||||
|
||||
if (!module) {
|
||||
fprintf(stderr, "Bitcode reader failed for \"%s\": %s\n", fn, bcErr.c_str());
|
||||
delete ctx;
|
||||
return false;
|
||||
}
|
||||
|
||||
std::string eeError;
|
||||
llvm::ExecutionEngine *ee = llvm::ExecutionEngine::createJIT(module, &eeError);
|
||||
if (!ee) {
|
||||
fprintf(stderr, "Unable to create ExecutionEngine: %s\n", eeError.c_str());
|
||||
return false;
|
||||
}
|
||||
|
||||
llvm::Function *func;
|
||||
if ((func = module->getFunction("ISPCLaunch")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)ISPCLaunch);
|
||||
if ((func = module->getFunction("ISPCSync")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)ISPCSync);
|
||||
if ((func = module->getFunction("putchar")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)putchar);
|
||||
if ((func = module->getFunction("printf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)printf);
|
||||
if ((func = module->getFunction("fflush")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)fflush);
|
||||
if ((func = module->getFunction("sinf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)sinf);
|
||||
if ((func = module->getFunction("cosf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)cosf);
|
||||
if ((func = module->getFunction("tanf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)tanf);
|
||||
if ((func = module->getFunction("atanf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)atanf);
|
||||
if ((func = module->getFunction("atan2f")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)atan2f);
|
||||
if ((func = module->getFunction("powf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)powf);
|
||||
if ((func = module->getFunction("expf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)expf);
|
||||
if ((func = module->getFunction("logf")) != NULL)
|
||||
ee->addGlobalMapping(func, (void *)logf);
|
||||
|
||||
#ifdef ISPC_HAVE_SVML
|
||||
#define DO_SVML(FUNC ,FUNCNAME) \
|
||||
if ((func = module->getFunction(FUNCNAME)) != NULL) \
|
||||
ee->addGlobalMapping(func, (void *)FUNC)
|
||||
#else
|
||||
#define DO_SVML(FUNC, FUNCNAME) \
|
||||
if ((func = module->getFunction(FUNCNAME)) != NULL) \
|
||||
ee->addGlobalMapping(func, (void *)svml_missing)
|
||||
#endif
|
||||
|
||||
DO_SVML(__svml_sinf4, "__svml_sinf4");
|
||||
DO_SVML(__svml_cosf4, "__svml_cosf4");
|
||||
DO_SVML(__svml_sincosf4, "__svml_sincosf4");
|
||||
DO_SVML(__svml_tanf4, "__svml_tanf4");
|
||||
DO_SVML(__svml_atanf4, "__svml_atanf4");
|
||||
DO_SVML(__svml_atan2f4, "__svml_atan2f4");
|
||||
DO_SVML(__svml_expf4, "__svml_expf4");
|
||||
DO_SVML(__svml_logf4, "__svml_logf4");
|
||||
DO_SVML(__svml_powf4, "__svml_powf4");
|
||||
|
||||
// figure out the vector width in the compiled code
|
||||
func = module->getFunction("width");
|
||||
if (!func) {
|
||||
fprintf(stderr, "No width() function found!\n");
|
||||
return false;
|
||||
}
|
||||
int width;
|
||||
{
|
||||
typedef int (*PFN)();
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
width = pfn();
|
||||
assert(width == 4 || width == 8 || width == 12 || width == 16);
|
||||
}
|
||||
|
||||
// find the value that returns the desired result
|
||||
func = module->getFunction("result");
|
||||
bool foundResult = (func != NULL);
|
||||
float result[16];
|
||||
for (int i = 0; i < 16; ++i)
|
||||
result[i] = 0;
|
||||
bool ok = true;
|
||||
if (foundResult) {
|
||||
typedef void (*PFN)(float *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(result);
|
||||
}
|
||||
else
|
||||
fprintf(stderr, "Warning: no result() function found.\n");
|
||||
|
||||
// try to find a function to run
|
||||
float returned[16];
|
||||
for (int i = 0; i < 16; ++i)
|
||||
returned[i] = 0;
|
||||
float vfloat[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
|
||||
double vdouble[16] = { 1,2,3,4,5,6,7,8,9,10,11,12,13,14,15,16};
|
||||
int vint[16] = { 2,4,6,8,10,12,14,16,18,20,22,24,26,28,30,32 };
|
||||
int vint2[16] = { 5,6,7,8,9,10,11,12,13,14,15,16,17,18,19,20};
|
||||
|
||||
if ((func = module->getFunction("f_v")) != NULL) {
|
||||
typedef void (*PFN)(float *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned);
|
||||
}
|
||||
else if ((func = module->getFunction("f_f")) != NULL) {
|
||||
typedef void (*PFN)(float *, float *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
llvm::verifyFunction(*func);
|
||||
pfn(returned, vfloat);
|
||||
}
|
||||
else if ((func = module->getFunction("f_fu")) != NULL) {
|
||||
typedef void (*PFN)(float *, float *, float fu);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
llvm::verifyFunction(*func);
|
||||
pfn(returned, vfloat, 5.);
|
||||
}
|
||||
else if ((func = module->getFunction("f_fi")) != NULL) {
|
||||
typedef void (*PFN)(float *, float *, int *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vfloat, vint);
|
||||
}
|
||||
else if ((func = module->getFunction("f_du")) != NULL) {
|
||||
typedef void (*PFN)(float *, double *, double);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vdouble, 5.);
|
||||
}
|
||||
else if ((func = module->getFunction("f_duf")) != NULL) {
|
||||
typedef void (*PFN)(float *, double *, float);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vdouble, 5.f);
|
||||
}
|
||||
else if ((func = module->getFunction("f_di")) != NULL) {
|
||||
typedef void (*PFN)(float *, double *, int *);
|
||||
PFN pfn = reinterpret_cast<PFN>(ee->getPointerToFunction(func));
|
||||
pfn(returned, vdouble, vint2);
|
||||
}
|
||||
else {
|
||||
fprintf(stderr, "Unable to find runnable function in file \"%s\"\n", fn);
|
||||
ok = false;
|
||||
}
|
||||
|
||||
// see if we got the right result
|
||||
if (ok) {
|
||||
if (foundResult) {
|
||||
for (int i = 0; i < width; ++i)
|
||||
if (returned[i] != result[i]) {
|
||||
ok = false;
|
||||
fprintf(stderr, "Test \"%s\" RETURNED %d: %g / %a EXPECTED %g / %a\n",
|
||||
fn, i, returned[i], returned[i], result[i], result[i]);
|
||||
}
|
||||
}
|
||||
else {
|
||||
for (int i = 0; i < width; ++i)
|
||||
fprintf(stderr, "Test \"%s\" returned %d: %g / %a\n",
|
||||
fn, i, returned[i], returned[i]);
|
||||
}
|
||||
}
|
||||
|
||||
delete ee;
|
||||
delete ctx;
|
||||
|
||||
return ok && foundResult;
|
||||
}
|
||||
|
||||
int main(int argc, char *argv[]) {
|
||||
llvm::InitializeNativeTarget();
|
||||
|
||||
std::vector<const char *> files;
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (!strcmp(argv[i], "--help") || !strcmp(argv[i], "-h"))
|
||||
usage(0);
|
||||
else
|
||||
files.push_back(argv[i]);
|
||||
}
|
||||
|
||||
int passes = 0, fails = 0;
|
||||
for (unsigned int i = 0; i < files.size(); ++i) {
|
||||
if (lRunTest(files[i])) ++passes;
|
||||
else ++fails;
|
||||
}
|
||||
|
||||
if (fails > 0)
|
||||
fprintf(stderr, "%d/%d tests passed\n", passes, passes+fails);
|
||||
return fails > 0;
|
||||
}
|
||||
88
ispc_test.vcxproj
Executable file
88
ispc_test.vcxproj
Executable file
@@ -0,0 +1,88 @@
|
||||
<?xml version="1.0" encoding="utf-8"?>
|
||||
<Project DefaultTargets="Build" ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
|
||||
<ItemGroup Label="ProjectConfigurations">
|
||||
<ProjectConfiguration Include="Debug|Win32">
|
||||
<Configuration>Debug</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
<ProjectConfiguration Include="Release|Win32">
|
||||
<Configuration>Release</Configuration>
|
||||
<Platform>Win32</Platform>
|
||||
</ProjectConfiguration>
|
||||
</ItemGroup>
|
||||
<ItemGroup>
|
||||
<ClCompile Include="ispc_test.cpp" />
|
||||
</ItemGroup>
|
||||
<PropertyGroup Label="Globals">
|
||||
<ProjectGuid>{92547BA8-BE86-4E78-8799-1D72A70E5831}</ProjectGuid>
|
||||
<Keyword>Win32Proj</Keyword>
|
||||
<RootNamespace>ispc_test</RootNamespace>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>true</UseDebugLibraries>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
|
||||
<ConfigurationType>Application</ConfigurationType>
|
||||
<UseDebugLibraries>false</UseDebugLibraries>
|
||||
<WholeProgramOptimization>true</WholeProgramOptimization>
|
||||
<CharacterSet>Unicode</CharacterSet>
|
||||
</PropertyGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
|
||||
<ImportGroup Label="ExtensionSettings">
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
|
||||
</ImportGroup>
|
||||
<PropertyGroup Label="UserMacros" />
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<LinkIncremental>true</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<LinkIncremental>false</LinkIncremental>
|
||||
</PropertyGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
|
||||
<ClCompile>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<Optimization>Disabled</Optimization>
|
||||
<PreprocessorDefinitions>_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)/lib</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMJIT.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
|
||||
<ClCompile>
|
||||
<WarningLevel>Level3</WarningLevel>
|
||||
<PrecompiledHeader>
|
||||
</PrecompiledHeader>
|
||||
<Optimization>MaxSpeed</Optimization>
|
||||
<FunctionLevelLinking>true</FunctionLevelLinking>
|
||||
<IntrinsicFunctions>true</IntrinsicFunctions>
|
||||
<PreprocessorDefinitions>NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
|
||||
<AdditionalIncludeDirectories>$(LLVM_INSTALL_DIR)/include</AdditionalIncludeDirectories>
|
||||
</ClCompile>
|
||||
<Link>
|
||||
<SubSystem>Console</SubSystem>
|
||||
<GenerateDebugInformation>true</GenerateDebugInformation>
|
||||
<EnableCOMDATFolding>true</EnableCOMDATFolding>
|
||||
<OptimizeReferences>true</OptimizeReferences>
|
||||
<AdditionalLibraryDirectories>$(LLVM_INSTALL_DIR)/lib</AdditionalLibraryDirectories>
|
||||
<AdditionalDependencies>LLVMAnalysis.lib;LLVMArchive.lib;LLVMAsmPrinter.lib;LLVMBitReader.lib;LLVMBitWriter.lib;LLVMCodeGen.lib;LLVMCore.lib;LLVMExecutionEngine.lib;LLVMInstCombine.lib;LLVMInstrumentation.lib;LLVMipa.lib;LLVMipo.lib;LLVMJIT.lib;LLVMLinker.lib;LLVMMC.lib;LLVMMCParser.lib;LLVMObject.lib;LLVMScalarOpts.lib;LLVMSelectionDAG.lib;LLVMSupport.lib;LLVMTarget.lib;LLVMTransformUtils.lib;LLVMX86ASMPrinter.lib;LLVMX86ASMParser.lib;LLVMX86Utils.lib;LLVMX86CodeGen.lib;LLVMX86Disassembler.lib;LLVMX86Info.lib;%(AdditionalDependencies)</AdditionalDependencies>
|
||||
</Link>
|
||||
</ItemDefinitionGroup>
|
||||
<Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
|
||||
<ImportGroup Label="ExtensionTargets">
|
||||
</ImportGroup>
|
||||
</Project>
|
||||
426
lex.ll
Normal file
426
lex.ll
Normal file
@@ -0,0 +1,426 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
%{
|
||||
|
||||
#include "ispc.h"
|
||||
#include "decl.h"
|
||||
#include "parse.hh"
|
||||
#include "sym.h"
|
||||
#include "util.h"
|
||||
#include "module.h"
|
||||
|
||||
static uint32_t lParseBinary(const char *ptr, SourcePos pos);
|
||||
static void lCComment(SourcePos *);
|
||||
static void lCppComment(SourcePos *);
|
||||
static void lHandleCppHash(SourcePos *);
|
||||
static void lStringConst(YYSTYPE *, SourcePos *);
|
||||
|
||||
#define YY_USER_ACTION \
|
||||
yylloc->first_line = yylloc->last_line; \
|
||||
yylloc->first_column = yylloc->last_column; \
|
||||
yylloc->last_column += yyleng;
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
inline int isatty(int) { return 0; }
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
|
||||
%}
|
||||
|
||||
%option nounput
|
||||
%option noyywrap
|
||||
%option bison-bridge
|
||||
%option bison-locations
|
||||
%option nounistd
|
||||
|
||||
WHITESPACE [ \t\r]+
|
||||
INT_NUMBER (([0-9]+)|(0x[0-9a-fA-F]+)|(0b[01]+))
|
||||
FLOAT_NUMBER (([0-9]+|(([0-9]+\.[0-9]*[fF]?)|(\.[0-9]+)))([eE][-+]?[0-9]+)?[fF]?)|([-]?0x[01]\.?[0-9a-fA-F]+p[-+]?[0-9]+[fF]?)
|
||||
|
||||
IDENT [a-zA-Z_][a-zA-Z_0-9]*
|
||||
|
||||
%%
|
||||
"/*" { lCComment(yylloc); }
|
||||
"//" { lCppComment(yylloc); }
|
||||
|
||||
bool { return TOKEN_BOOL; }
|
||||
break { return TOKEN_BREAK; }
|
||||
case { return TOKEN_CASE; }
|
||||
cbreak { return TOKEN_CBREAK; }
|
||||
ccontinue { return TOKEN_CCONTINUE; }
|
||||
cdo { return TOKEN_CDO; }
|
||||
cfor { return TOKEN_CFOR; }
|
||||
char { return TOKEN_CHAR; }
|
||||
cif { return TOKEN_CIF; }
|
||||
cwhile { return TOKEN_CWHILE; }
|
||||
const { return TOKEN_CONST; }
|
||||
continue { return TOKEN_CONTINUE; }
|
||||
creturn { return TOKEN_CRETURN; }
|
||||
default { return TOKEN_DEFAULT; }
|
||||
do { return TOKEN_DO; }
|
||||
double { return TOKEN_DOUBLE; }
|
||||
else { return TOKEN_ELSE; }
|
||||
enum { return TOKEN_ENUM; }
|
||||
export { return TOKEN_EXPORT; }
|
||||
extern { return TOKEN_EXTERN; }
|
||||
false { return TOKEN_FALSE; }
|
||||
float { return TOKEN_FLOAT; }
|
||||
for { return TOKEN_FOR; }
|
||||
goto { return TOKEN_GOTO; }
|
||||
if { return TOKEN_IF; }
|
||||
inline { return TOKEN_INLINE; }
|
||||
int { return TOKEN_INT; }
|
||||
int32 { return TOKEN_INT; }
|
||||
int64 { return TOKEN_INT64; }
|
||||
launch { return TOKEN_LAUNCH; }
|
||||
print { return TOKEN_PRINT; }
|
||||
reference { return TOKEN_REFERENCE; }
|
||||
return { return TOKEN_RETURN; }
|
||||
soa { return TOKEN_SOA; }
|
||||
static { return TOKEN_STATIC; }
|
||||
struct { return TOKEN_STRUCT; }
|
||||
switch { return TOKEN_SWITCH; }
|
||||
sync { return TOKEN_SYNC; }
|
||||
task { return TOKEN_TASK; }
|
||||
true { return TOKEN_TRUE; }
|
||||
typedef { return TOKEN_TYPEDEF; }
|
||||
uniform { return TOKEN_UNIFORM; }
|
||||
unsigned { return TOKEN_UNSIGNED; }
|
||||
varying { return TOKEN_VARYING; }
|
||||
void { return TOKEN_VOID; }
|
||||
while { return TOKEN_WHILE; }
|
||||
|
||||
L?\"(\\.|[^\\"])*\" { lStringConst(yylval, yylloc); return TOKEN_STRING_LITERAL; }
|
||||
|
||||
{IDENT} {
|
||||
/* We have an identifier--is it a type name or an identifier?
|
||||
The symbol table will straighten us out... */
|
||||
yylval->stringVal = new std::string(yytext);
|
||||
if (m->symbolTable->LookupType(yytext) != NULL)
|
||||
return TOKEN_TYPE_NAME;
|
||||
else
|
||||
return TOKEN_IDENTIFIER;
|
||||
}
|
||||
|
||||
{INT_NUMBER} {
|
||||
char *endPtr = NULL;
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
unsigned long val;
|
||||
#else
|
||||
unsigned long long val;
|
||||
#endif
|
||||
|
||||
if (yytext[0] == '0' && yytext[1] == 'b')
|
||||
val = lParseBinary(yytext+2, *yylloc);
|
||||
else {
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
val = strtoul(yytext, &endPtr, 0);
|
||||
#else
|
||||
val = strtoull(yytext, &endPtr, 0);
|
||||
#endif
|
||||
}
|
||||
yylval->int32Val = (int32_t)val;
|
||||
if (val != (unsigned int)yylval->int32Val)
|
||||
Warning(*yylloc, "32-bit integer has insufficient bits to represent value %s (%x %llx)",
|
||||
yytext, yylval->int32Val, (unsigned long long)val);
|
||||
return TOKEN_INT_CONSTANT;
|
||||
}
|
||||
|
||||
{INT_NUMBER}[uU] {
|
||||
char *endPtr = NULL;
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
unsigned long val;
|
||||
#else
|
||||
unsigned long long val;
|
||||
#endif
|
||||
|
||||
if (yytext[0] == '0' && yytext[1] == 'b')
|
||||
val = lParseBinary(yytext+2, *yylloc);
|
||||
else {
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
val = strtoul(yytext, &endPtr, 0);
|
||||
#else
|
||||
val = strtoull(yytext, &endPtr, 0);
|
||||
#endif
|
||||
}
|
||||
|
||||
yylval->int32Val = (int32_t)val;
|
||||
if (val != (unsigned int)yylval->int32Val)
|
||||
Warning(*yylloc, "32-bit integer has insufficient bits to represent value %s (%x %llx)",
|
||||
yytext, yylval->int32Val, (unsigned long long)val);
|
||||
return TOKEN_UINT_CONSTANT;
|
||||
}
|
||||
|
||||
{FLOAT_NUMBER} {
|
||||
/* FIXME: need to implement a hex float constant parser so that we can
|
||||
support them on Windows (which doesn't handle them in its atof()
|
||||
implementation... */
|
||||
yylval->floatVal = atof(yytext);
|
||||
return TOKEN_FLOAT_CONSTANT;
|
||||
}
|
||||
|
||||
"++" { return TOKEN_INC_OP; }
|
||||
"--" { return TOKEN_DEC_OP; }
|
||||
"<<" { return TOKEN_LEFT_OP; }
|
||||
">>" { return TOKEN_RIGHT_OP; }
|
||||
"<=" { return TOKEN_LE_OP; }
|
||||
">=" { return TOKEN_GE_OP; }
|
||||
"==" { return TOKEN_EQ_OP; }
|
||||
"!=" { return TOKEN_NE_OP; }
|
||||
"&&" { return TOKEN_AND_OP; }
|
||||
"||" { return TOKEN_OR_OP; }
|
||||
"*=" { return TOKEN_MUL_ASSIGN; }
|
||||
"/=" { return TOKEN_DIV_ASSIGN; }
|
||||
"%=" { return TOKEN_MOD_ASSIGN; }
|
||||
"+=" { return TOKEN_ADD_ASSIGN; }
|
||||
"-=" { return TOKEN_SUB_ASSIGN; }
|
||||
"<<=" { return TOKEN_LEFT_ASSIGN; }
|
||||
">>=" { return TOKEN_RIGHT_ASSIGN; }
|
||||
"&=" { return TOKEN_AND_ASSIGN; }
|
||||
"^=" { return TOKEN_XOR_ASSIGN; }
|
||||
"|=" { return TOKEN_OR_ASSIGN; }
|
||||
";" { return ';'; }
|
||||
("{"|"<%") { return '{'; }
|
||||
("}"|"%>") { return '}'; }
|
||||
"," { return ','; }
|
||||
":" { return ':'; }
|
||||
"=" { return '='; }
|
||||
"(" { return '('; }
|
||||
")" { return ')'; }
|
||||
("["|"<:") { return '['; }
|
||||
("]"|":>") { return ']'; }
|
||||
"." { return '.'; }
|
||||
"&" { return '&'; }
|
||||
"!" { return '!'; }
|
||||
"~" { return '~'; }
|
||||
"-" { return '-'; }
|
||||
"+" { return '+'; }
|
||||
"*" { return '*'; }
|
||||
"/" { return '/'; }
|
||||
"%" { return '%'; }
|
||||
"<" { return '<'; }
|
||||
">" { return '>'; }
|
||||
"^" { return '^'; }
|
||||
"|" { return '|'; }
|
||||
"?" { return '?'; }
|
||||
|
||||
{WHITESPACE} { }
|
||||
|
||||
\n {
|
||||
yylloc->last_line++;
|
||||
yylloc->last_column = 1;
|
||||
}
|
||||
|
||||
#(line)?[ ][0-9]+[ ]\"(\\.|[^\\"])*\"[^\n]* {
|
||||
lHandleCppHash(yylloc);
|
||||
}
|
||||
|
||||
. {
|
||||
Error(*yylloc, "Illegal character: %c (0x%x)", yytext[0], int(yytext[0]));
|
||||
YY_USER_ACTION
|
||||
}
|
||||
|
||||
%%
|
||||
|
||||
/*sizeof { return TOKEN_SIZEOF; }*/
|
||||
/*"->" { return TOKEN_PTR_OP; }*/
|
||||
/*short { return TOKEN_SHORT; }*/
|
||||
/*long { return TOKEN_LONG; }*/
|
||||
/*signed { return TOKEN_SIGNED; }*/
|
||||
/*volatile { return TOKEN_VOLATILE; }*/
|
||||
/*"long"[ \t\v\f\n]+"long" { return TOKEN_LONGLONG; }*/
|
||||
/*union { return TOKEN_UNION; }*/
|
||||
/*"..." { return TOKEN_ELLIPSIS; }*/
|
||||
|
||||
/** Return the integer version of a binary constant from a string.
|
||||
*/
|
||||
static uint32_t
|
||||
lParseBinary(const char *ptr, SourcePos pos) {
|
||||
uint32_t val = 0;
|
||||
bool warned = false;
|
||||
|
||||
while (*ptr != '\0') {
|
||||
/* if this hits, the regexp for 0b... constants is broken */
|
||||
assert(*ptr == '0' || *ptr == '1');
|
||||
|
||||
if ((val & (1<<31)) && warned == false) {
|
||||
// We're about to shift out a set bit
|
||||
// FIXME: 64-bit int constants...
|
||||
Warning(pos, "Can't represent binary constant with 32-bit integer type");
|
||||
warned = true;
|
||||
}
|
||||
|
||||
val = (val << 1) | (*ptr == '0' ? 0 : 1);
|
||||
++ptr;
|
||||
}
|
||||
return val;
|
||||
}
|
||||
|
||||
|
||||
/** Handle a C-style comment in the source.
|
||||
*/
|
||||
static void
|
||||
lCComment(SourcePos *pos) {
|
||||
char c, prev = 0;
|
||||
|
||||
while ((c = yyinput()) != 0) {
|
||||
if (c == '\n') {
|
||||
pos->last_line++;
|
||||
pos->last_column = 1;
|
||||
}
|
||||
if (c == '/' && prev == '*')
|
||||
return;
|
||||
prev = c;
|
||||
}
|
||||
Error(*pos, "unterminated comment");
|
||||
}
|
||||
|
||||
/** Handle a C++-style comment--eat everything up until the end of the line.
|
||||
*/
|
||||
static void
|
||||
lCppComment(SourcePos *pos) {
|
||||
char c;
|
||||
do {
|
||||
c = yyinput();
|
||||
} while (c != 0 && c != '\n');
|
||||
if (c == '\n') {
|
||||
pos->last_line++;
|
||||
pos->last_column = 1;
|
||||
}
|
||||
}
|
||||
|
||||
/** Handle a line that starts with a # character; this should be something
|
||||
left behind by the preprocessor indicating the source file/line
|
||||
that our current position corresponds to.
|
||||
*/
|
||||
static void lHandleCppHash(SourcePos *pos) {
|
||||
char *ptr, *src;
|
||||
|
||||
// Advance past the opening stuff on the line.
|
||||
assert(yytext[0] == '#');
|
||||
if (yytext[1] == ' ')
|
||||
// On Linux/OSX, the preprocessor gives us lines like
|
||||
// # 1234 "foo.c"
|
||||
ptr = yytext + 2;
|
||||
else {
|
||||
// On windows, cl.exe's preprocessor gives us lines of the form:
|
||||
// #line 1234 "foo.c"
|
||||
assert(!strncmp(yytext+1, "line ", 5));
|
||||
ptr = yytext + 6;
|
||||
}
|
||||
|
||||
// Now we can set the line number based on the integer in the string
|
||||
// that ptr is pointing at.
|
||||
pos->last_line = strtol(ptr, &src, 10) - 1;
|
||||
pos->last_column = 1;
|
||||
// Make sure that the character after the integer is a space and that
|
||||
// then we have open quotes
|
||||
assert(src != ptr && src[0] == ' ' && src[1] == '"');
|
||||
src += 2;
|
||||
|
||||
// And the filename is everything up until the closing quotes
|
||||
std::string filename;
|
||||
while (*src != '"') {
|
||||
assert(*src && *src != '\n');
|
||||
filename.push_back(*src);
|
||||
++src;
|
||||
}
|
||||
pos->name = strdup(filename.c_str());
|
||||
}
|
||||
|
||||
|
||||
/** Given a pointer to a position in a string, return the character that it
|
||||
represents, accounting for the escape characters supported in string
|
||||
constants. (i.e. given the literal string "\\", return the character
|
||||
'/'). The return value is the new position in the string and the
|
||||
decoded character is returned in *pChar.
|
||||
*/
|
||||
static char *
|
||||
lEscapeChar(char *str, char *pChar, SourcePos *pos)
|
||||
{
|
||||
if (*str != '\\') {
|
||||
*pChar = *str;
|
||||
}
|
||||
else {
|
||||
char *tail;
|
||||
++str;
|
||||
switch (*str) {
|
||||
case '\'': *pChar = '\''; break;
|
||||
case '\"': *pChar = '\"'; break;
|
||||
case '?': *pChar = '\?'; break;
|
||||
case '\\': *pChar = '\\'; break;
|
||||
case 'a': *pChar = '\a'; break;
|
||||
case 'b': *pChar = '\b'; break;
|
||||
case 'f': *pChar = '\f'; break;
|
||||
case 'n': *pChar = '\n'; break;
|
||||
case 'r': *pChar = '\r'; break;
|
||||
case 't': *pChar = '\t'; break;
|
||||
case 'v': *pChar = '\v'; break;
|
||||
// octal constants \012
|
||||
case '0': case '1': case '2': case '3': case '4':
|
||||
case '5': case '6': case '7':
|
||||
*pChar = strtol(str, &tail, 8);
|
||||
str = tail - 1;
|
||||
break;
|
||||
// hexidecimal constant \xff
|
||||
case 'x':
|
||||
*pChar = strtol(str, &tail, 16);
|
||||
str = tail - 1;
|
||||
break;
|
||||
default:
|
||||
Error(*pos, "Bad character escape sequence: '%s'\n.", str);
|
||||
break;
|
||||
}
|
||||
}
|
||||
++str;
|
||||
return str;
|
||||
}
|
||||
|
||||
|
||||
/** Parse a string constant in the source file. For each character in the
|
||||
string, handle any escaped characters with lEscapeChar() and keep eating
|
||||
characters until we come to the closing quote.
|
||||
*/
|
||||
static void
|
||||
lStringConst(YYSTYPE *yylval, SourcePos *pos)
|
||||
{
|
||||
char *p;
|
||||
std::string str;
|
||||
p = strchr(yytext, '"') + 1;
|
||||
while (*p != '\"') {
|
||||
char cval;
|
||||
p = lEscapeChar(p, &cval, pos);
|
||||
str.push_back(cval);
|
||||
}
|
||||
yylval->stringVal = new std::string(str);
|
||||
}
|
||||
329
llvmutil.cpp
Normal file
329
llvmutil.cpp
Normal file
@@ -0,0 +1,329 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file llvmutil.cpp
|
||||
@brief Implementations of various LLVM utility types and classes.
|
||||
*/
|
||||
|
||||
#include "llvmutil.h"
|
||||
#include "type.h"
|
||||
|
||||
const llvm::Type *LLVMTypes::VoidType = NULL;
|
||||
const llvm::PointerType *LLVMTypes::VoidPointerType = NULL;
|
||||
const llvm::Type *LLVMTypes::BoolType = NULL;
|
||||
const llvm::Type *LLVMTypes::Int8Type = NULL;
|
||||
const llvm::Type *LLVMTypes::Int16Type = NULL;
|
||||
const llvm::Type *LLVMTypes::Int32Type = NULL;
|
||||
const llvm::Type *LLVMTypes::Int32PointerType = NULL;
|
||||
const llvm::Type *LLVMTypes::Int64Type = NULL;
|
||||
const llvm::Type *LLVMTypes::Int64PointerType = NULL;
|
||||
const llvm::Type *LLVMTypes::FloatType = NULL;
|
||||
const llvm::Type *LLVMTypes::FloatPointerType = NULL;
|
||||
const llvm::Type *LLVMTypes::DoubleType = NULL;
|
||||
|
||||
const llvm::VectorType *LLVMTypes::MaskType = NULL;
|
||||
const llvm::VectorType *LLVMTypes::BoolVectorType = NULL;
|
||||
const llvm::VectorType *LLVMTypes::Int1VectorType = NULL;
|
||||
const llvm::VectorType *LLVMTypes::Int32VectorType = NULL;
|
||||
const llvm::Type *LLVMTypes::Int32VectorPointerType = NULL;
|
||||
const llvm::VectorType *LLVMTypes::Int64VectorType = NULL;
|
||||
const llvm::Type *LLVMTypes::Int64VectorPointerType = NULL;
|
||||
const llvm::VectorType *LLVMTypes::FloatVectorType = NULL;
|
||||
const llvm::Type *LLVMTypes::FloatVectorPointerType = NULL;
|
||||
const llvm::VectorType *LLVMTypes::DoubleVectorType = NULL;
|
||||
const llvm::ArrayType *LLVMTypes::VoidPointerVectorType = NULL;
|
||||
|
||||
llvm::Constant *LLVMTrue = NULL;
|
||||
llvm::Constant *LLVMFalse = NULL;
|
||||
llvm::Constant *LLVMMaskAllOn = NULL;
|
||||
llvm::Constant *LLVMMaskAllOff = NULL;
|
||||
|
||||
|
||||
void
|
||||
InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
||||
LLVMTypes::VoidType = llvm::Type::getVoidTy(*ctx);
|
||||
LLVMTypes::VoidPointerType = llvm::PointerType::get(llvm::Type::getInt8Ty(*ctx), 0);
|
||||
LLVMTypes::BoolType = llvm::Type::getInt1Ty(*ctx);
|
||||
LLVMTypes::Int8Type = llvm::Type::getInt8Ty(*ctx);
|
||||
LLVMTypes::Int16Type = llvm::Type::getInt16Ty(*ctx);
|
||||
LLVMTypes::Int32Type = llvm::Type::getInt32Ty(*ctx);
|
||||
LLVMTypes::Int32PointerType = llvm::PointerType::get(LLVMTypes::Int32Type, 0);
|
||||
LLVMTypes::Int64Type = llvm::Type::getInt64Ty(*ctx);
|
||||
LLVMTypes::Int64PointerType = llvm::PointerType::get(LLVMTypes::Int64Type, 0);
|
||||
LLVMTypes::FloatType = llvm::Type::getFloatTy(*ctx);
|
||||
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
|
||||
LLVMTypes::DoubleType = llvm::Type::getDoubleTy(*ctx);
|
||||
|
||||
// Note that both the mask and bool vectors are vector of int32s
|
||||
// (not i1s). LLVM ends up generating much better SSE code with
|
||||
// this representation.
|
||||
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.vectorWidth);
|
||||
|
||||
LLVMTypes::Int1VectorType =
|
||||
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
|
||||
LLVMTypes::Int32VectorType =
|
||||
llvm::VectorType::get(LLVMTypes::Int32Type, target.vectorWidth);
|
||||
LLVMTypes::Int32VectorPointerType = llvm::PointerType::get(LLVMTypes::Int32VectorType, 0);
|
||||
LLVMTypes::Int64VectorType =
|
||||
llvm::VectorType::get(LLVMTypes::Int64Type, target.vectorWidth);
|
||||
LLVMTypes::Int64VectorPointerType = llvm::PointerType::get(LLVMTypes::Int64VectorType, 0);
|
||||
LLVMTypes::FloatVectorType =
|
||||
llvm::VectorType::get(LLVMTypes::FloatType, target.vectorWidth);
|
||||
LLVMTypes::FloatVectorPointerType = llvm::PointerType::get(LLVMTypes::FloatVectorType, 0);
|
||||
LLVMTypes::DoubleVectorType =
|
||||
llvm::VectorType::get(LLVMTypes::DoubleType, target.vectorWidth);
|
||||
LLVMTypes::VoidPointerVectorType =
|
||||
llvm::ArrayType::get(LLVMTypes::VoidPointerType, target.vectorWidth);
|
||||
|
||||
LLVMTrue = llvm::ConstantInt::getTrue(*ctx);
|
||||
LLVMFalse = llvm::ConstantInt::getFalse(*ctx);
|
||||
|
||||
std::vector<llvm::Constant *> maskOnes;
|
||||
llvm::Constant *onMask = NULL;
|
||||
onMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), -1,
|
||||
true /*signed*/); // 0xffffffff
|
||||
|
||||
for (int i = 0; i < target.vectorWidth; ++i)
|
||||
maskOnes.push_back(onMask);
|
||||
LLVMMaskAllOn = llvm::ConstantVector::get(LLVMTypes::MaskType, maskOnes);
|
||||
|
||||
std::vector<llvm::Constant *> maskZeros;
|
||||
llvm::Constant *offMask = NULL;
|
||||
offMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0,
|
||||
true /*signed*/);
|
||||
|
||||
for (int i = 0; i < target.vectorWidth; ++i)
|
||||
maskZeros.push_back(offMask);
|
||||
LLVMMaskAllOff = llvm::ConstantVector::get(LLVMTypes::MaskType, maskZeros);
|
||||
}
|
||||
|
||||
|
||||
llvm::ConstantInt *LLVMInt32(int32_t ival) {
|
||||
return llvm::ConstantInt::get(llvm::Type::getInt32Ty(*g->ctx), ival,
|
||||
true /*signed*/);
|
||||
}
|
||||
|
||||
|
||||
llvm::ConstantInt *
|
||||
LLVMUInt32(uint32_t ival) {
|
||||
return llvm::ConstantInt::get(llvm::Type::getInt32Ty(*g->ctx), ival,
|
||||
false /*unsigned*/);
|
||||
}
|
||||
|
||||
|
||||
llvm::ConstantInt *
|
||||
LLVMInt64(int64_t ival) {
|
||||
return llvm::ConstantInt::get(llvm::Type::getInt64Ty(*g->ctx), ival,
|
||||
true /*signed*/);
|
||||
}
|
||||
|
||||
|
||||
llvm::ConstantInt *
|
||||
LLVMUInt64(uint64_t ival) {
|
||||
return llvm::ConstantInt::get(llvm::Type::getInt64Ty(*g->ctx), ival,
|
||||
false /*unsigned*/);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMFloat(float fval) {
|
||||
return llvm::ConstantFP::get(llvm::Type::getFloatTy(*g->ctx), fval);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMDouble(double dval) {
|
||||
return llvm::ConstantFP::get(llvm::Type::getDoubleTy(*g->ctx), dval);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMInt32Vector(int32_t ival) {
|
||||
llvm::Constant *v = LLVMInt32(ival);
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(v);
|
||||
return llvm::ConstantVector::get(LLVMTypes::Int32VectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMInt32Vector(const int32_t *ivec) {
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(LLVMInt32(ivec[i]));
|
||||
return llvm::ConstantVector::get(LLVMTypes::Int32VectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMUInt32Vector(uint32_t ival) {
|
||||
llvm::Constant *v = LLVMUInt32(ival);
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(v);
|
||||
return llvm::ConstantVector::get(LLVMTypes::Int32VectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMUInt32Vector(const uint32_t *ivec) {
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(LLVMUInt32(ivec[i]));
|
||||
return llvm::ConstantVector::get(LLVMTypes::Int32VectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMFloatVector(float fval) {
|
||||
llvm::Constant *v = LLVMFloat(fval);
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(v);
|
||||
return llvm::ConstantVector::get(LLVMTypes::FloatVectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMFloatVector(const float *fvec) {
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(LLVMFloat(fvec[i]));
|
||||
return llvm::ConstantVector::get(LLVMTypes::FloatVectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMDoubleVector(double dval) {
|
||||
llvm::Constant *v = LLVMDouble(dval);
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(v);
|
||||
return llvm::ConstantVector::get(LLVMTypes::DoubleVectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMDoubleVector(const double *dvec) {
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(LLVMDouble(dvec[i]));
|
||||
return llvm::ConstantVector::get(LLVMTypes::DoubleVectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMInt64Vector(int64_t ival) {
|
||||
llvm::Constant *v = LLVMInt64(ival);
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(v);
|
||||
return llvm::ConstantVector::get(LLVMTypes::Int64VectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMInt64Vector(const int64_t *ivec) {
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(LLVMInt64(ivec[i]));
|
||||
return llvm::ConstantVector::get(LLVMTypes::Int64VectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMUInt64Vector(uint64_t ival) {
|
||||
llvm::Constant *v = LLVMUInt64(ival);
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(v);
|
||||
return llvm::ConstantVector::get(LLVMTypes::Int64VectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMUInt64Vector(const uint64_t *ivec) {
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(LLVMUInt64(ivec[i]));
|
||||
return llvm::ConstantVector::get(LLVMTypes::Int64VectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMBoolVector(bool b) {
|
||||
llvm::Constant *v;
|
||||
if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, b ? 0xffffffff : 0,
|
||||
false /*unsigned*/);
|
||||
else {
|
||||
assert(LLVMTypes::BoolVectorType->getElementType() ==
|
||||
llvm::Type::getInt1Ty(*g->ctx));
|
||||
v = b ? LLVMTrue : LLVMFalse;
|
||||
}
|
||||
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i)
|
||||
vals.push_back(v);
|
||||
return llvm::ConstantVector::get(LLVMTypes::BoolVectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
llvm::Constant *
|
||||
LLVMBoolVector(const bool *bvec) {
|
||||
std::vector<llvm::Constant *> vals;
|
||||
for (int i = 0; i < g->target.vectorWidth; ++i) {
|
||||
llvm::Constant *v;
|
||||
if (LLVMTypes::BoolVectorType == LLVMTypes::Int32VectorType)
|
||||
v = llvm::ConstantInt::get(LLVMTypes::Int32Type, bvec[i] ? 0xffffffff : 0,
|
||||
false /*unsigned*/);
|
||||
else {
|
||||
assert(LLVMTypes::BoolVectorType->getElementType() ==
|
||||
llvm::Type::getInt1Ty(*g->ctx));
|
||||
v = bvec[i] ? LLVMTrue : LLVMFalse;
|
||||
}
|
||||
|
||||
vals.push_back(v);
|
||||
}
|
||||
return llvm::ConstantVector::get(LLVMTypes::BoolVectorType, vals);
|
||||
}
|
||||
|
||||
|
||||
const llvm::ArrayType *
|
||||
LLVMPointerVectorType(const llvm::Type *t) {
|
||||
// NOTE: ArrayType, not VectorType
|
||||
return llvm::ArrayType::get(llvm::PointerType::get(t, 0),
|
||||
g->target.vectorWidth);
|
||||
}
|
||||
157
llvmutil.h
Normal file
157
llvmutil.h
Normal file
@@ -0,0 +1,157 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file llvmutil.h
|
||||
@brief Header file with declarations for various LLVM utility stuff
|
||||
*/
|
||||
|
||||
#ifndef ISPC_LLVMUTIL_H
|
||||
#define ISPC_LLVMUTIL_H 1
|
||||
|
||||
#include "ispc.h"
|
||||
#include <llvm/LLVMContext.h>
|
||||
#include <llvm/Type.h>
|
||||
#include <llvm/DerivedTypes.h>
|
||||
#include <llvm/Constants.h>
|
||||
|
||||
/** This structure holds pointers to a variety of LLVM types; code
|
||||
elsewhere can use them from here, ratherthan needing to make more
|
||||
verbose LLVM API calls.
|
||||
*/
|
||||
struct LLVMTypes {
|
||||
static const llvm::Type *VoidType;
|
||||
static const llvm::PointerType *VoidPointerType;
|
||||
static const llvm::Type *BoolType;
|
||||
static const llvm::Type *Int8Type;
|
||||
static const llvm::Type *Int16Type;
|
||||
static const llvm::Type *Int32Type;
|
||||
static const llvm::Type *Int32PointerType;
|
||||
static const llvm::Type *Int64Type;
|
||||
static const llvm::Type *Int64PointerType;
|
||||
static const llvm::Type *FloatType;
|
||||
static const llvm::Type *FloatPointerType;
|
||||
static const llvm::Type *DoubleType;
|
||||
|
||||
static const llvm::VectorType *MaskType;
|
||||
static const llvm::VectorType *BoolVectorType;
|
||||
static const llvm::VectorType *Int1VectorType;
|
||||
static const llvm::VectorType *Int32VectorType;
|
||||
static const llvm::Type *Int32VectorPointerType;
|
||||
static const llvm::VectorType *Int64VectorType;
|
||||
static const llvm::Type *Int64VectorPointerType;
|
||||
static const llvm::VectorType *FloatVectorType;
|
||||
static const llvm::Type *FloatVectorPointerType;
|
||||
static const llvm::VectorType *DoubleVectorType;
|
||||
static const llvm::ArrayType *VoidPointerVectorType;
|
||||
};
|
||||
|
||||
/** These variables hold the corresponding LLVM constant values as a
|
||||
convenience to code elsewhere in the system.
|
||||
*/
|
||||
extern llvm::Constant *LLVMTrue, *LLVMFalse;
|
||||
|
||||
/** This should be called early in initialization to initialize the members
|
||||
of LLVMTypes and the LLVMTrue/LLVMFalse constants. However, it can't
|
||||
be called until the compilation target is known.
|
||||
*/
|
||||
extern void InitLLVMUtil(llvm::LLVMContext *ctx, Target target);
|
||||
|
||||
/** Returns an LLVM i32 constant of the given value */
|
||||
extern llvm::ConstantInt *LLVMInt32(int32_t i);
|
||||
/** Returns an LLVM i32 constant of the given value */
|
||||
extern llvm::ConstantInt *LLVMUInt32(uint32_t i);
|
||||
/** Returns an LLVM i64 constant of the given value */
|
||||
extern llvm::ConstantInt *LLVMInt64(int64_t i);
|
||||
/** Returns an LLVM i64 constant of the given value */
|
||||
extern llvm::ConstantInt *LLVMUInt64(uint64_t i);
|
||||
/** Returns an LLVM float constant of the given value */
|
||||
extern llvm::Constant *LLVMFloat(float f);
|
||||
/** Returns an LLVM double constant of the given value */
|
||||
extern llvm::Constant *LLVMDouble(double f);
|
||||
|
||||
/** Returns an LLVM boolean vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMBoolVector(bool v);
|
||||
/** Returns an LLVM i32 vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMInt32Vector(int32_t i);
|
||||
/** Returns an LLVM i32 vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMUInt32Vector(uint32_t i);
|
||||
/** Returns an LLVM i64 vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMInt64Vector(int64_t i);
|
||||
/** Returns an LLVM i64 vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMUInt64Vector(uint64_t i);
|
||||
/** Returns an LLVM float vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMFloatVector(float f);
|
||||
/** Returns an LLVM double vector constant of the given value smeared
|
||||
across all elements */
|
||||
extern llvm::Constant *LLVMDoubleVector(double f);
|
||||
|
||||
/** Returns an LLVM boolean vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMBoolVector(const bool *v);
|
||||
/** Returns an LLVM i32 vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMInt32Vector(const int32_t *i);
|
||||
/** Returns an LLVM i32 vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMUInt32Vector(const uint32_t *i);
|
||||
/** Returns an LLVM i64 vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMInt64Vector(const int64_t *i);
|
||||
/** Returns an LLVM i64 vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMUInt64Vector(const uint64_t *i);
|
||||
/** Returns an LLVM float vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMFloatVector(const float *f);
|
||||
/** Returns an LLVM double vector based on the given array of values.
|
||||
The array should have g->target.vectorWidth elements. */
|
||||
extern llvm::Constant *LLVMDoubleVector(const double *f);
|
||||
|
||||
/** LLVM constant value representing an 'all on' SIMD lane mask */
|
||||
extern llvm::Constant *LLVMMaskAllOn;
|
||||
/** LLVM constant value representing an 'all off' SIMD lane mask */
|
||||
extern llvm::Constant *LLVMMaskAllOff;
|
||||
|
||||
/** Given an LLVM type, returns the corresponding type for a vector of
|
||||
pointers to that type. (In practice, an array of pointers, since LLVM
|
||||
prohibits vectors of pointers.
|
||||
*/
|
||||
extern const llvm::ArrayType *LLVMPointerVectorType(const llvm::Type *t);
|
||||
|
||||
#endif // ISPC_LLVMUTIL_H
|
||||
330
main.cpp
Normal file
330
main.cpp
Normal file
@@ -0,0 +1,330 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file main.cpp
|
||||
@brief main() entrypoint implementation for ispc
|
||||
*/
|
||||
|
||||
#include "ispc.h"
|
||||
#include "module.h"
|
||||
#include <stdio.h>
|
||||
#include <llvm/Support/PrettyStackTrace.h>
|
||||
#ifdef LLVM_2_8
|
||||
#include <llvm/System/Signals.h>
|
||||
#else
|
||||
#include <llvm/Support/Signals.h>
|
||||
#endif
|
||||
|
||||
#ifdef ISPC_IS_WINDOWS
|
||||
#define strcasecmp stricmp
|
||||
#define BUILD_DATE __DATE__
|
||||
#define BUILD_VERSION ""
|
||||
#endif // ISPC_IS_WINDOWS
|
||||
|
||||
static void usage(int ret) {
|
||||
printf("This is the Intel(r) SPMD Program Compiler (ispc), build %s (%s)\n\n", BUILD_DATE, BUILD_VERSION);
|
||||
printf("usage: ispc\n");
|
||||
printf(" [--arch={x86,x86-64}]\t\tSelect target architecture\n");
|
||||
printf(" [--cpu=<cpu>]\t\t\tSelect target CPU type\n");
|
||||
printf(" (atom, barcelona, core2, corei7, corei7-avx, istanbul, nocona,\n");
|
||||
printf(" penryn, westmere)\n");
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
printf(" [-D<foo>]\t\t\t\t#define value when running preprocessor\n");
|
||||
#endif
|
||||
printf(" [--debug]\t\t\t\tPrint information useful for debugging ispc\n");
|
||||
printf(" [--emit-asm]\t\t\tGenerate assembly language file as output\n");
|
||||
printf(" [--emit-llvm]\t\t\tEmit LLVM bitode file as output\n");
|
||||
printf(" [--emit-obj]\t\t\tGenerate object file file as output\n");
|
||||
printf(" [--fast-math]\t\t\tPerform non-IEEE-compliant optimizations of numeric expressions\n");
|
||||
printf(" [-g]\t\t\t\tGenerate debugging information\n");
|
||||
printf(" [--help]\t\t\t\tPrint help\n");
|
||||
printf(" [-h] <name>\t\t\t\tOutput filename for header\n");
|
||||
printf(" [--instrument]\t\t\tEmit instrumentation to gather performance data\n");
|
||||
printf(" [--math-lib=<option>]\t\tSelect math library\n");
|
||||
printf(" default\t\t\t\tUse ispc's built-in math functions\n");
|
||||
printf(" fast\t\t\t\tUse high-performance but lower-accuracy math functions\n");
|
||||
printf(" svml\t\t\t\tUse the Intel SVML math libraries\n");
|
||||
printf(" system\t\t\t\tUse the system's math library (*may be quite slow*)\n");
|
||||
printf(" [--nostdlib]\t\t\tDon't make the ispc standard library available\n");
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
printf(" [--nocpp]\t\t\t\tDon't run the C preprocessor\n");
|
||||
#endif
|
||||
printf(" [-o/--outfile] <name>\t\tOutput filename for bitcode (may be \"-\" for standard output)\n");
|
||||
printf(" [-O0/-O1]\t\t\t\tSet optimization level\n");
|
||||
printf(" [--opt=<option>]\t\t\tSet optimization option\n");
|
||||
printf(" disable-blended-masked-stores\t\tScalarize masked stores on SSE (vs. using vblendps)\n");
|
||||
printf(" disable-coherent-control-flow\t\tDisable coherent control flow optimizations\n");
|
||||
printf(" disable-uniform-control-flow\t\tDisable uniform control flow optimizations\n");
|
||||
printf(" disable-gather-scatter-optimizations\tDisable improvements to gather/scatter\n");
|
||||
printf(" disable-blending-removal\t\tDisable eliminating blend at same scope\n");
|
||||
printf(" disable-gather-scatter-flattening\tDisable flattening when all lanes are on\n");
|
||||
printf(" disable-uniform-memory-optimizations\tDisable uniform-based coherent memory access\n");
|
||||
printf(" disable-masked-store-optimizations\tDisable lowering to regular stores when possible\n");
|
||||
printf(" [--target={sse2,sse4,sse4x2,avx}] Select target ISA (SSE4 is default)\n");
|
||||
printf(" [--version]\t\t\t\tPrint ispc version\n");
|
||||
printf(" [--woff]\t\t\t\tDisable warnings\n");
|
||||
printf(" [--wno-perf]\t\t\tDon't issue warnings related to performance-related issues\n");
|
||||
printf(" <file to compile or \"-\" for stdin>\n");
|
||||
exit(ret);
|
||||
}
|
||||
|
||||
/** Given a target name string, set initialize the global g->target
|
||||
structure appropriately.
|
||||
*/
|
||||
static void lDoTarget(const char *target) {
|
||||
if (!strcasecmp(target, "sse2")) {
|
||||
g->target.isa = Target::SSE2;
|
||||
g->target.nativeVectorWidth = 4;
|
||||
g->target.vectorWidth = 4;
|
||||
}
|
||||
else if (!strcasecmp(target, "sse4")) {
|
||||
g->target.isa = Target::SSE4;
|
||||
g->target.nativeVectorWidth = 4;
|
||||
g->target.vectorWidth = 4;
|
||||
}
|
||||
else if (!strcasecmp(target, "sse4x2")) {
|
||||
g->target.isa = Target::SSE4;
|
||||
g->target.nativeVectorWidth = 4;
|
||||
g->target.vectorWidth = 8;
|
||||
}
|
||||
else if (!strcasecmp(target, "avx")) {
|
||||
g->target.isa = Target::AVX;
|
||||
g->target.nativeVectorWidth = 8;
|
||||
g->target.vectorWidth = 8;
|
||||
}
|
||||
else
|
||||
usage(1);
|
||||
}
|
||||
|
||||
|
||||
/** We take arguments from both the command line as well as from the
|
||||
ISPC_ARGS environment variable. This function returns a new set of
|
||||
arguments representing the ones from those two sources merged together.
|
||||
*/
|
||||
static void lGetAllArgs(int Argc, char *Argv[], int &argc, char *argv[128]) {
|
||||
// Copy over the command line arguments (passed in)
|
||||
for (int i = 0; i < Argc; ++i)
|
||||
argv[i] = Argv[i];
|
||||
argc = Argc;
|
||||
|
||||
// See if we have any set via the environment variable
|
||||
const char *env = getenv("ISPC_ARGS");
|
||||
if (!env)
|
||||
return;
|
||||
while (true) {
|
||||
// Look for the next space in the string, which delimits the end of
|
||||
// the current argument
|
||||
const char *end = strchr(env, ' ');
|
||||
if (end == NULL)
|
||||
end = env + strlen(env);
|
||||
int len = end - env;
|
||||
|
||||
// Copy the argument into a newly allocated memory (so we can
|
||||
// NUL-terminate it).
|
||||
char *ptr = new char[len+1];
|
||||
strncpy(ptr, env, len);
|
||||
ptr[len] = '\0';
|
||||
|
||||
// Add it to the args array and get out of here
|
||||
argv[argc++] = ptr;
|
||||
if (*end == '\0')
|
||||
break;
|
||||
|
||||
// Advance the starting pointer of the string to the next non-space
|
||||
// character
|
||||
env = end+1;
|
||||
while (*env == ' ')
|
||||
++env;
|
||||
|
||||
// Hit the end of the string; get out of here
|
||||
if (*env == '\0')
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
int main(int Argc, char *Argv[]) {
|
||||
int argc;
|
||||
char *argv[128];
|
||||
lGetAllArgs(Argc, Argv, argc, argv);
|
||||
|
||||
// Use LLVM's little utility function to print out nice stack traces if
|
||||
// we crash
|
||||
llvm::sys::PrintStackTraceOnErrorSignal();
|
||||
llvm::PrettyStackTraceProgram X(argc, argv);
|
||||
|
||||
char *file = NULL;
|
||||
const char *headerFileName = NULL;
|
||||
const char *outFileName = NULL;
|
||||
|
||||
// Initiailize globals early so that we can set various option values
|
||||
// as we're parsing below
|
||||
g = new Globals;
|
||||
|
||||
bool debugSet = false, optSet = false;
|
||||
Module::OutputType ot = Module::Object;
|
||||
|
||||
for (int i = 1; i < argc; ++i) {
|
||||
if (!strcmp(argv[i], "--help"))
|
||||
usage(0);
|
||||
#ifndef ISPC_IS_WINDOWS
|
||||
else if (!strncmp(argv[i], "-D", 2)) {
|
||||
g->cppArgs.push_back(argv[i]);
|
||||
}
|
||||
#endif // !ISPC_IS_WINDOWS
|
||||
else if (!strncmp(argv[i], "--arch=", 7))
|
||||
g->target.arch = argv[i] + 7;
|
||||
else if (!strncmp(argv[i], "--cpu=", 6))
|
||||
g->target.cpu = argv[i] + 6;
|
||||
else if (!strcmp(argv[i], "--fast-math"))
|
||||
g->opt.fastMath = true;
|
||||
else if (!strcmp(argv[i], "--debug"))
|
||||
g->debugPrint = true;
|
||||
else if (!strcmp(argv[i], "--instrument"))
|
||||
g->emitInstrumentation = true;
|
||||
else if (!strcmp(argv[i], "-g")) {
|
||||
g->generateDebuggingSymbols = true;
|
||||
debugSet = true;
|
||||
}
|
||||
else if (!strcmp(argv[i], "--emit-asm"))
|
||||
ot = Module::Asm;
|
||||
else if (!strcmp(argv[i], "--emit-llvm"))
|
||||
ot = Module::Bitcode;
|
||||
else if (!strcmp(argv[i], "--emit-obj"))
|
||||
ot = Module::Object;
|
||||
else if (!strcmp(argv[i], "--target")) {
|
||||
if (++i == argc) usage(1);
|
||||
lDoTarget(argv[i]);
|
||||
}
|
||||
else if (!strncmp(argv[i], "--target=", 9)) {
|
||||
const char *target = argv[i] + 9;
|
||||
lDoTarget(target);
|
||||
}
|
||||
else if (!strncmp(argv[i], "--math-lib=", 11)) {
|
||||
const char *lib = argv[i] + 11;
|
||||
if (!strcmp(lib, "default"))
|
||||
g->mathLib = Globals::Math_ISPC;
|
||||
else if (!strcmp(lib, "fast"))
|
||||
g->mathLib = Globals::Math_ISPCFast;
|
||||
else if (!strcmp(lib, "svml"))
|
||||
g->mathLib = Globals::Math_SVML;
|
||||
else if (!strcmp(lib, "system"))
|
||||
g->mathLib = Globals::Math_System;
|
||||
else
|
||||
usage(1);
|
||||
}
|
||||
else if (!strncmp(argv[i], "--opt=", 6)) {
|
||||
const char *opt = argv[i] + 6;
|
||||
if (!strcmp(opt, "disable-blended-masked-stores"))
|
||||
g->opt.disableBlendedMaskedStores = true;
|
||||
else if (!strcmp(opt, "disable-coherent-control-flow"))
|
||||
g->opt.disableCoherentControlFlow = true;
|
||||
else if (!strcmp(opt, "disable-uniform-control-flow"))
|
||||
g->opt.disableUniformControlFlow = true;
|
||||
else if (!strcmp(opt, "disable-gather-scatter-optimizations"))
|
||||
g->opt.disableGatherScatterOptimizations = true;
|
||||
else if (!strcmp(opt, "disable-blending-removal"))
|
||||
g->opt.disableMaskedStoreToStore = true;
|
||||
else if (!strcmp(opt, "disable-gather-scatter-flattening"))
|
||||
g->opt.disableGatherScatterFlattening = true;
|
||||
else if (!strcmp(opt, "disable-uniform-memory-optimizations"))
|
||||
g->opt.disableUniformMemoryOptimizations = true;
|
||||
else if (!strcmp(opt, "disable-masked-store-optimizations"))
|
||||
g->opt.disableMaskedStoreOptimizations = true;
|
||||
else
|
||||
usage(1);
|
||||
}
|
||||
else if (!strcmp(argv[i], "--woff") || !strcmp(argv[i], "-woff")) {
|
||||
g->disableWarnings = true;
|
||||
g->emitPerfWarnings = false;
|
||||
}
|
||||
else if (!strcmp(argv[i], "--wno-perf") || !strcmp(argv[i], "-wno-perf"))
|
||||
g->emitPerfWarnings = false;
|
||||
else if (!strcmp(argv[i], "-o") || !strcmp(argv[i], "--outfile")) {
|
||||
if (++i == argc) usage(1);
|
||||
outFileName = argv[i];
|
||||
}
|
||||
else if (!strcmp(argv[i], "-h") || !strcmp(argv[i], "--header-outfile")) {
|
||||
if (++i == argc) usage(1);
|
||||
headerFileName = argv[i];
|
||||
}
|
||||
else if (!strcmp(argv[i], "-O0")) {
|
||||
g->opt.level = 0;
|
||||
optSet = true;
|
||||
}
|
||||
else if (!strcmp(argv[i], "-O") || !strcmp(argv[i], "-O1") ||
|
||||
!strcmp(argv[i], "-O2") || !strcmp(argv[i], "-O3")) {
|
||||
g->opt.level = 1;
|
||||
optSet = true;
|
||||
}
|
||||
else if (!strcmp(argv[i], "-"))
|
||||
;
|
||||
else if (!strcmp(argv[i], "--nostdlib"))
|
||||
g->includeStdlib = false;
|
||||
else if (!strcmp(argv[i], "--nocpp"))
|
||||
g->runCPP = false;
|
||||
else if (!strcmp(argv[i], "-v") || !strcmp(argv[i], "--version")) {
|
||||
printf("Intel(r) SPMD Program Compiler (ispc) build %s (%s)\n",
|
||||
BUILD_DATE, BUILD_VERSION);
|
||||
return 0;
|
||||
}
|
||||
else if (argv[i][0] == '-')
|
||||
usage(1);
|
||||
else {
|
||||
if (file != NULL)
|
||||
usage(1);
|
||||
else
|
||||
file = argv[i];
|
||||
}
|
||||
}
|
||||
|
||||
// If the user specified -g, then the default optimization level is 0.
|
||||
// If -g wasn't specified, the default optimization level is 1 (full
|
||||
// optimization).
|
||||
if (debugSet && !optSet)
|
||||
g->opt.level = 0;
|
||||
|
||||
m = new Module(file);
|
||||
if (m->CompileFile() == 0) {
|
||||
if (outFileName != NULL)
|
||||
if (!m->WriteOutput(ot, outFileName))
|
||||
return 1;
|
||||
if (headerFileName != NULL)
|
||||
if (!m->WriteOutput(Module::Header, headerFileName))
|
||||
return 1;
|
||||
}
|
||||
int errorCount = m->errorCount;
|
||||
delete m;
|
||||
|
||||
return errorCount > 0;
|
||||
}
|
||||
1431
module.cpp
Normal file
1431
module.cpp
Normal file
File diff suppressed because it is too large
Load Diff
113
module.h
Normal file
113
module.h
Normal file
@@ -0,0 +1,113 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file module.h
|
||||
@brief Declaration of the Module class, which is the ispc-side representation
|
||||
of the results of compiling a source file.
|
||||
*/
|
||||
|
||||
#ifndef ISPC_MODULE_H
|
||||
#define ISPC_MODULE_H 1
|
||||
|
||||
#include "ispc.h"
|
||||
|
||||
class Module {
|
||||
public:
|
||||
/** The name of the source file being compiled should be passed as the
|
||||
module name. */
|
||||
Module(const char *filename);
|
||||
|
||||
/** Compiles the source file passed to the Module constructor, adding
|
||||
its global variables and functions to both the llvm::Module and
|
||||
SymbolTable. Returns the number of errors during compilation. */
|
||||
int CompileFile();
|
||||
|
||||
/** Adds the global variable described by the declaration information to
|
||||
the module. */
|
||||
void AddGlobal(DeclSpecs *ds, Declarator *decl);
|
||||
|
||||
/** Adds the function described by the declaration information and the
|
||||
provided statements to the module. */
|
||||
void AddFunction(DeclSpecs *ds, Declarator *decl, Stmt *code);
|
||||
|
||||
/** After a source file has been compiled, output can be generated in a
|
||||
number of different formats. */
|
||||
enum OutputType { Asm, /** Generate text assembly language output */
|
||||
Bitcode, /** Generate LLVM IR bitcode output */
|
||||
Object, /** Generate a native object file */
|
||||
Header /** Generate a C/C++ header file with
|
||||
declarations of 'export'ed functions, global
|
||||
variables, and the types used by them. */
|
||||
};
|
||||
|
||||
/** Write the corresponding output type to the given file. Returns
|
||||
true on success, false if there has been an error. The given
|
||||
filename may be NULL, indicating that output should go to standard
|
||||
output. */
|
||||
bool WriteOutput(OutputType ot, const char *filename);
|
||||
|
||||
/** Total number of errors encountered during compilation. */
|
||||
int errorCount;
|
||||
|
||||
/** Symbol table to hold symbols visible in the current scope during
|
||||
compilation. */
|
||||
SymbolTable *symbolTable;
|
||||
|
||||
/** llvm Module object into which globals and functions are added. */
|
||||
llvm::Module *module;
|
||||
|
||||
#ifndef LLVM_2_8
|
||||
/** The diBuilder manages generating debugging information (only
|
||||
supported in LLVM 2.9 and beyond...) */
|
||||
llvm::DIBuilder *diBuilder;
|
||||
#endif
|
||||
|
||||
GatherBuffer *gatherBuffer;
|
||||
|
||||
private:
|
||||
const char *filename;
|
||||
|
||||
/** This member records the global variables that have been defined
|
||||
with 'extern' linkage, so that it's easy to include their
|
||||
declarations in generated header files.
|
||||
|
||||
@todo FIXME: it would be nice to eliminate this and then query the
|
||||
symbol table or the llvm Module for them when/if we need them.
|
||||
*/
|
||||
std::vector<Symbol *> externGlobals;
|
||||
|
||||
bool writeHeader(const char *filename);
|
||||
bool writeObjectFileOrAssembly(OutputType outputType, const char *filename);
|
||||
};
|
||||
|
||||
#endif // ISPC_MODULE_H
|
||||
50
opt.h
Normal file
50
opt.h
Normal file
@@ -0,0 +1,50 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file opt.h
|
||||
@brief Declarations related to optimization passes
|
||||
*/
|
||||
|
||||
#ifndef ISPC_OPT_H
|
||||
#define ISPC_OPT_H 1
|
||||
|
||||
#include "ispc.h"
|
||||
|
||||
/** Optimize the functions in the given module, applying the specified
|
||||
level of optimization. optLevel zero corresponds to essentially no
|
||||
optimization--just enough to generate correct code, while level one
|
||||
corresponds to full optimization.
|
||||
*/
|
||||
void Optimize(llvm::Module *module, int optLevel);
|
||||
|
||||
#endif // ISPC_OPT_H
|
||||
43
run_tests.sh
Executable file
43
run_tests.sh
Executable file
@@ -0,0 +1,43 @@
|
||||
#!/bin/zsh
|
||||
|
||||
surprises=0
|
||||
|
||||
echo Running correctness tests
|
||||
|
||||
for i in tests/*.ispc; do
|
||||
bc=${i%%ispc}bc
|
||||
ispc -O2 $i -woff -o $bc --emit-llvm --target=sse4
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc compile
|
||||
echo
|
||||
else
|
||||
ispc_test $bc
|
||||
if [[ $? != 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i FAILED ispc_test
|
||||
echo
|
||||
fi
|
||||
# cmp $bc tests_bitcode${bc##tests}
|
||||
# if [[ $? == 0 ]]; then
|
||||
# /bin/rm $bc
|
||||
# fi
|
||||
fi
|
||||
/bin/rm $bc
|
||||
done
|
||||
|
||||
echo Running failing tests
|
||||
for i in failing_tests/*.ispc; do
|
||||
(ispc -O2 $i -woff -o - --emit-llvm | ispc_test -) 2>/dev/null 1>/dev/null
|
||||
if [[ $? == 0 ]]; then
|
||||
surprises=1
|
||||
echo Test $i UNEXPECTEDLY PASSED
|
||||
echo
|
||||
fi
|
||||
done
|
||||
|
||||
if [[ $surprises == 0 ]]; then
|
||||
echo No surprises.
|
||||
fi
|
||||
|
||||
exit $surprises
|
||||
589
stdlib-avx.ll
Normal file
589
stdlib-avx.ll
Normal file
@@ -0,0 +1,589 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; *** Untested *** AVX target implementation.
|
||||
;;
|
||||
;; The LLVM AVX code generator is incomplete, so the ispc AVX target
|
||||
;; hasn't yet been tested. There is therefore a higher-than-normal
|
||||
;; chance that there are bugs in the code in this file.
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Basic 8-wide definitions
|
||||
|
||||
stdlib_core(8)
|
||||
packed_load_and_store(8)
|
||||
int8_16(8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.rcp.ps(<8 x float>) nounwind readnone
|
||||
declare <8 x float> @llvm.x86.avx.rcp.ss(<8 x float>) nounwind readnone
|
||||
|
||||
define internal <8 x float> @__rcp_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||
; float iv = __rcp_v(v);
|
||||
; return iv * (2. - v * iv);
|
||||
|
||||
%call = call <8 x float> @llvm.x86.avx.rcp.ps(<8 x float> %0)
|
||||
; do one N-R iteration
|
||||
%v_iv = fmul <8 x float> %0, %call
|
||||
%two_minus = fsub <8 x float> <float 2., float 2., float 2., float 2.,
|
||||
float 2., float 2., float 2., float 2.>, %v_iv
|
||||
%iv_mul = fmul <8 x float> %call, %two_minus
|
||||
ret <8 x float> %iv_mul
|
||||
}
|
||||
|
||||
define internal float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; uniform float iv = extract(__rcp_u(v), 0);
|
||||
; return iv * (2. - v * iv);
|
||||
%vecval = insertelement <8 x float> undef, float %0, i32 0
|
||||
%call = call <8 x float> @llvm.x86.avx.rcp.ss(<8 x float> %vecval)
|
||||
%scall = extractelement <8 x float> %call, i32 0
|
||||
|
||||
; do one N-R iteration
|
||||
%v_iv = fmul float %0, %scall
|
||||
%two_minus = fsub float 2., %v_iv
|
||||
%iv_mul = fmul float %scall, %two_minus
|
||||
ret float %iv_mul
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.round.ps(<8 x float>, i32) nounwind readnone
|
||||
declare <8 x float> @llvm.x86.avx.round.ss(<8 x float>, <8 x float>, i32) nounwind readnone
|
||||
|
||||
define internal <8 x float> @__round_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||
; roundps, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||
%call = call <8 x float> @llvm.x86.avx.round.ps(<8 x float> %0, i32 8)
|
||||
ret <8 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; roundss, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||
; the roundss intrinsic is a total mess--docs say:
|
||||
;
|
||||
; __m128 _mm_round_ss (__m128 a, __m128 b, const int c)
|
||||
;
|
||||
; b is a 128-bit parameter. The lowest 32 bits are the result of the rounding function
|
||||
; on b0. The higher order 96 bits are copied directly from input parameter a. The
|
||||
; return value is described by the following equations:
|
||||
;
|
||||
; r0 = RND(b0)
|
||||
; r1 = a1
|
||||
; r2 = a2
|
||||
; r3 = a3
|
||||
;
|
||||
; It doesn't matter what we pass as a, since we only need the r0 value
|
||||
; here. So we pass the same register for both.
|
||||
%xi = insertelement <8 x float> undef, float %0, i32 0
|
||||
%xr = call <8 x float> @llvm.x86.avx.round.ss(<8 x float> %xi, <8 x float> %xi, i32 8)
|
||||
%rs = extractelement <8 x float> %xr, i32 0
|
||||
ret float %rs
|
||||
}
|
||||
|
||||
define internal <8 x float> @__floor_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||
; roundps, round down 0b01 | don't signal precision exceptions 0b1000 = 9
|
||||
%call = call <8 x float> @llvm.x86.avx.round.ps(<8 x float> %0, i32 9)
|
||||
ret <8 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__floor_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; see above for round_ss instrinsic discussion...
|
||||
%xi = insertelement <8 x float> undef, float %0, i32 0
|
||||
; roundps, round down 0b01 | don't signal precision exceptions 0b1000 = 9
|
||||
%xr = call <8 x float> @llvm.x86.avx.round.ss(<8 x float> %xi, <8 x float> %xi, i32 9)
|
||||
%rs = extractelement <8 x float> %xr, i32 0
|
||||
ret float %rs
|
||||
}
|
||||
|
||||
define internal <8 x float> @__ceil_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||
; roundps, round up 0b10 | don't signal precision exceptions 0b1000 = 10
|
||||
%call = call <8 x float> @llvm.x86.avx.round.ps(<8 x float> %0, i32 10)
|
||||
ret <8 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__ceil_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; see above for round_ss instrinsic discussion...
|
||||
%xi = insertelement <8 x float> undef, float %0, i32 0
|
||||
; roundps, round up 0b10 | don't signal precision exceptions 0b1000 = 10
|
||||
%xr = call <8 x float> @llvm.x86.avx.round.ss(<8 x float> %xi, <8 x float> %xi, i32 10)
|
||||
%rs = extractelement <8 x float> %xr, i32 0
|
||||
ret float %rs
|
||||
}
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.rsqrt.ps(<8 x float>) nounwind readnone
|
||||
declare <8 x float> @llvm.x86.avx.rsqrt.ss(<8 x float>) nounwind readnone
|
||||
|
||||
define internal <8 x float> @__rsqrt_varying_float(<8 x float> %v) nounwind readonly alwaysinline {
|
||||
; float is = __rsqrt_v(v);
|
||||
%is = call <8 x float> @llvm.x86.avx.rsqrt.ps(<8 x float> %v)
|
||||
; return 0.5 * is * (3. - (v * is) * is);
|
||||
%v_is = fmul <8 x float> %v, %is
|
||||
%v_is_is = fmul <8 x float> %v_is, %is
|
||||
%three_sub = fsub <8 x float> <float 3., float 3., float 3., float 3., float 3., float 3., float 3., float 3.>, %v_is_is
|
||||
%is_mul = fmul <8 x float> %is, %three_sub
|
||||
%half_scale = fmul <8 x float> <float 0.5, float 0.5, float 0.5, float 0.5, float 0.5, float 0.5, float 0.5, float 0.5>, %is_mul
|
||||
ret <8 x float> %half_scale
|
||||
}
|
||||
|
||||
define internal float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; uniform float is = extract(__rsqrt_u(v), 0);
|
||||
%v = insertelement <8 x float> undef, float %0, i32 0
|
||||
%vis = call <8 x float> @llvm.x86.avx.rsqrt.ss(<8 x float> %v)
|
||||
%is = extractelement <8 x float> %vis, i32 0
|
||||
|
||||
; return 0.5 * is * (3. - (v * is) * is);
|
||||
%v_is = fmul float %0, %is
|
||||
%v_is_is = fmul float %v_is, %is
|
||||
%three_sub = fsub float 3., %v_is_is
|
||||
%is_mul = fmul float %is, %three_sub
|
||||
%half_scale = fmul float 0.5, %is_mul
|
||||
ret float %half_scale
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; sqrt
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.sqrt.ps(<8 x float>) nounwind readnone
|
||||
declare <8 x float> @llvm.x86.avx.sqrt.ss(<8 x float>) nounwind readnone
|
||||
|
||||
define internal <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||
%call = call <8 x float> @llvm.x86.avx.sqrt.ps(<8 x float> %0)
|
||||
ret <8 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||
sse_unary_scalar(ret, 8, float, @llvm.x86.avx.sqrt.ss, %0)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; fastmath
|
||||
|
||||
declare void @llvm.x86.avx.stmxcsr(i32 *) nounwind
|
||||
declare void @llvm.x86.avx.ldmxcsr(i32 *) nounwind
|
||||
|
||||
define internal void @__fastmath() nounwind alwaysinline {
|
||||
%ptr = alloca i32
|
||||
call void @llvm.x86.avx.stmxcsr(i32 * %ptr)
|
||||
%oldval = load i32 *%ptr
|
||||
|
||||
; turn on DAZ (64)/FTZ (32768) -> 32832
|
||||
%update = or i32 %oldval, 32832
|
||||
store i32 %update, i32 *%ptr
|
||||
call void @llvm.x86.avx.ldmxcsr(i32 * %ptr)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; svml
|
||||
|
||||
; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
|
||||
; or, use the macro to call the 4-wide ones twice with our 8-wide
|
||||
; vectors...
|
||||
|
||||
declare <8 x float> @__svml_sin(<8 x float>)
|
||||
declare <8 x float> @__svml_cos(<8 x float>)
|
||||
declare void @__svml_sincos(<8 x float>, <8 x float> *, <8 x float> *)
|
||||
declare <8 x float> @__svml_tan(<8 x float>)
|
||||
declare <8 x float> @__svml_atan(<8 x float>)
|
||||
declare <8 x float> @__svml_atan2(<8 x float>, <8 x float>)
|
||||
declare <8 x float> @__svml_exp(<8 x float>)
|
||||
declare <8 x float> @__svml_log(<8 x float>)
|
||||
declare <8 x float> @__svml_pow(<8 x float>, <8 x float>)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float min/max
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.max.ps(<8 x float>, <8 x float>) nounwind readnone
|
||||
declare <8 x float> @llvm.x86.avx.max.ss(<8 x float>, <8 x float>) nounwind readnone
|
||||
declare <8 x float> @llvm.x86.avx.min.ps(<8 x float>, <8 x float>) nounwind readnone
|
||||
declare <8 x float> @llvm.x86.avx.min.ss(<8 x float>, <8 x float>) nounwind readnone
|
||||
|
||||
define internal <8 x float> @__max_varying_float(<8 x float>,
|
||||
<8 x float>) nounwind readonly alwaysinline {
|
||||
%call = call <8 x float> @llvm.x86.avx.max.ps(<8 x float> %0, <8 x float> %1)
|
||||
ret <8 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 8, float, @llvm.x86.avx.max.ss, %0, %1)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define internal <8 x float> @__min_varying_float(<8 x float>,
|
||||
<8 x float>) nounwind readonly alwaysinline {
|
||||
%call = call <8 x float> @llvm.x86.avx.min.ps(<8 x float> %0, <8 x float> %1)
|
||||
ret <8 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 8, float, @llvm.x86.avx.min.ss, %0, %1)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx.pminsd(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
declare <8 x i32> @llvm.x86.avx.pmaxsd(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define internal <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
%call = call <8 x i32> @llvm.x86.avx.pminsd(<8 x i32> %0, <8 x i32> %1)
|
||||
ret <8 x i32> %call
|
||||
}
|
||||
|
||||
define internal i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 8, i32, @llvm.x86.avx.pminsd, %0, %1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define internal <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
%call = call <8 x i32> @llvm.x86.avx.pmaxsd(<8 x i32> %0, <8 x i32> %1)
|
||||
ret <8 x i32> %call
|
||||
}
|
||||
|
||||
define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 8, i32, @llvm.x86.avx.pmaxsd, %0, %1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unsigned int min/max
|
||||
|
||||
declare <8 x i32> @llvm.x86.avx.pminud(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
declare <8 x i32> @llvm.x86.avx.pmaxud(<8 x i32>, <8 x i32>) nounwind readnone
|
||||
|
||||
define internal <8 x i32> @__min_varying_uint32(<8 x i32>,
|
||||
<8 x i32>) nounwind readonly alwaysinline {
|
||||
%call = call <8 x i32> @llvm.x86.avx.pminud(<8 x i32> %0, <8 x i32> %1)
|
||||
ret <8 x i32> %call
|
||||
}
|
||||
|
||||
define internal i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 8, i32, @llvm.x86.avx.pminud, %0, %1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define internal <8 x i32> @__max_varying_uint32(<8 x i32>,
|
||||
<8 x i32>) nounwind readonly alwaysinline {
|
||||
%call = call <8 x i32> @llvm.x86.avx.pmaxud(<8 x i32> %0, <8 x i32> %1)
|
||||
ret <8 x i32> %call
|
||||
}
|
||||
|
||||
define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 8, i32, @llvm.x86.avx.pmaxud, %0, %1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops
|
||||
|
||||
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||
|
||||
define internal i32 @__popcnt(i32) nounwind readonly alwaysinline {
|
||||
%call = call i32 @llvm.ctpop.i32(i32 %0)
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
declare i32 @llvm.x86.avx.movmsk.ps(<8 x float>) nounwind readnone
|
||||
|
||||
define internal i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||
%floatmask = bitcast <8 x i32> %0 to <8 x float>
|
||||
%v = call i32 @llvm.x86.avx.movmsk.ps(<8 x float> %floatmask) nounwind readnone
|
||||
ret i32 %v
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; horizontal float ops
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.hadd.ps(<8 x float>, <8 x float>) nounwind readnone
|
||||
|
||||
define internal float @__reduce_add_float(<8 x float>) nounwind readonly alwaysinline {
|
||||
%v1 = call <8 x float> @llvm.x86.avx.hadd.ps(<8 x float> %0, <8 x float> %0)
|
||||
%v2 = call <8 x float> @llvm.x86.avx.hadd.ps(<8 x float> %v1, <8 x float> %v1)
|
||||
%scalar1 = extractelement <8 x float> %v2, i32 0
|
||||
%scalar2 = extractelement <8 x float> %v2, i32 4
|
||||
%sum = fadd float %scalar1, %scalar2
|
||||
ret float %sum
|
||||
}
|
||||
|
||||
|
||||
define internal float @__reduce_min_float(<8 x float>) nounwind readnone alwaysinline {
|
||||
reduce8(float, @__min_varying_float, @__min_uniform_float)
|
||||
}
|
||||
|
||||
|
||||
define internal float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
||||
reduce8(float, @__max_varying_float, @__max_uniform_float)
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; horizontal int32 ops
|
||||
|
||||
define internal <8 x i32> @__add_varying_int32(<8 x i32>,
|
||||
<8 x i32>) nounwind readnone alwaysinline {
|
||||
%s = add <8 x i32> %0, %1
|
||||
ret <8 x i32> %s
|
||||
}
|
||||
|
||||
define internal i32 @__add_uniform_int32(i32, i32) nounwind readnone alwaysinline {
|
||||
%s = add i32 %0, %1
|
||||
ret i32 %s
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_add_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||
reduce8(i32, @__add_varying_int32, @__add_uniform_int32)
|
||||
}
|
||||
|
||||
|
||||
define internal i32 @__reduce_min_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||
reduce8(i32, @__min_varying_int32, @__min_uniform_int32)
|
||||
}
|
||||
|
||||
|
||||
define internal i32 @__reduce_max_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||
reduce8(i32, @__max_varying_int32, @__max_uniform_int32)
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;;; horizontal uint32 ops
|
||||
|
||||
define internal i32 @__reduce_add_uint32(<8 x i32> %v) nounwind readnone alwaysinline {
|
||||
%r = call i32 @__reduce_add_int32(<8 x i32> %v)
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_min_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
||||
reduce8(i32, @__min_varying_uint32, @__min_uniform_uint32)
|
||||
}
|
||||
|
||||
|
||||
define internal i32 @__reduce_max_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
||||
reduce8(i32, @__max_varying_uint32, @__max_uniform_uint32)
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unaligned loads/loads+broadcasts
|
||||
|
||||
define <8 x i32> @__load_and_broadcast_32(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<8 x i32> %mask)
|
||||
%any_on = icmp ne i32 %mm, 0
|
||||
br i1 %any_on, label %load, label %skip
|
||||
|
||||
load:
|
||||
%ptr = bitcast i8 * %0 to i32 *
|
||||
%val = load i32 * %ptr
|
||||
|
||||
%ret0 = insertelement <8 x i32> undef, i32 %val, i32 0
|
||||
%ret1 = insertelement <8 x i32> %ret0, i32 %val, i32 1
|
||||
%ret2 = insertelement <8 x i32> %ret1, i32 %val, i32 2
|
||||
%ret3 = insertelement <8 x i32> %ret2, i32 %val, i32 3
|
||||
%ret4 = insertelement <8 x i32> %ret3, i32 %val, i32 4
|
||||
%ret5 = insertelement <8 x i32> %ret4, i32 %val, i32 5
|
||||
%ret6 = insertelement <8 x i32> %ret5, i32 %val, i32 6
|
||||
%ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
|
||||
ret <8 x i32> %ret7
|
||||
|
||||
skip:
|
||||
ret <8 x i32> undef
|
||||
}
|
||||
|
||||
|
||||
define <8 x i64> @__load_and_broadcast_64(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<8 x i32> %mask)
|
||||
%any_on = icmp ne i32 %mm, 0
|
||||
br i1 %any_on, label %load, label %skip
|
||||
|
||||
load:
|
||||
%ptr = bitcast i8 * %0 to i64 *
|
||||
%val = load i64 * %ptr
|
||||
|
||||
%ret0 = insertelement <8 x i64> undef, i64 %val, i32 0
|
||||
%ret1 = insertelement <8 x i64> %ret0, i64 %val, i32 1
|
||||
%ret2 = insertelement <8 x i64> %ret1, i64 %val, i32 2
|
||||
%ret3 = insertelement <8 x i64> %ret2, i64 %val, i32 3
|
||||
%ret4 = insertelement <8 x i64> %ret3, i64 %val, i32 4
|
||||
%ret5 = insertelement <8 x i64> %ret4, i64 %val, i32 5
|
||||
%ret6 = insertelement <8 x i64> %ret5, i64 %val, i32 6
|
||||
%ret7 = insertelement <8 x i64> %ret6, i64 %val, i32 7
|
||||
ret <8 x i64> %ret3
|
||||
|
||||
skip:
|
||||
ret <8 x i64> undef
|
||||
}
|
||||
|
||||
|
||||
define <8 x i32> @__load_masked_32(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<8 x i32> %mask)
|
||||
%any_on = icmp ne i32 %mm, 0
|
||||
br i1 %any_on, label %load, label %skip
|
||||
|
||||
load:
|
||||
%ptr = bitcast i8 * %0 to <8 x i32> *
|
||||
%val = load <8 x i32> * %ptr, align 4
|
||||
ret <8 x i32> %val
|
||||
|
||||
skip:
|
||||
ret <8 x i32> undef
|
||||
}
|
||||
|
||||
|
||||
define <8 x i64> @__load_masked_64(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<8 x i32> %mask)
|
||||
%any_on = icmp ne i32 %mm, 0
|
||||
br i1 %any_on, label %load, label %skip
|
||||
|
||||
load:
|
||||
%ptr = bitcast i8 * %0 to <8 x i64> *
|
||||
%val = load <8 x i64> * %ptr, align 8
|
||||
ret <8 x i64> %val
|
||||
|
||||
skip:
|
||||
ret <8 x i64> undef
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
define void @__masked_store_32(<8 x i32>* nocapture, <8 x i32>,
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
per_lane(8, <8 x i32> %2, `
|
||||
; compute address for this one
|
||||
%ptr_ID = getelementptr <8 x i32> * %0, i32 0, i32 LANE
|
||||
%storeval_ID = extractelement <8 x i32> %1, i32 LANE
|
||||
store i32 %storeval_ID, i32 * %ptr_ID')
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_64(<8 x i64>* nocapture, <8 x i64>,
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
per_lane(8, <8 x i32> %2, `
|
||||
%ptr_ID = getelementptr <8 x i64> * %0, i32 0, i32 LANE
|
||||
%storeval_ID = extractelement <8 x i64> %1, i32 LANE
|
||||
store i64 %storeval_ID, i64 * %ptr_ID')
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
declare <8 x float> @llvm.x86.avx.blendvps(<8 x float>, <8 x float>,
|
||||
<8 x float>) nounwind readnone
|
||||
|
||||
|
||||
define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
%mask_as_float = bitcast <8 x i32> %2 to <8 x float>
|
||||
%oldValue = load <8 x i32>* %0
|
||||
%oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float>
|
||||
%newAsFloat = bitcast <8 x i32> %1 to <8 x float>
|
||||
%blend = call <8 x float> @llvm.x86.avx.blendvps(<8 x float> %oldAsFloat,
|
||||
<8 x float> %newAsFloat,
|
||||
<8 x float> %mask_as_float)
|
||||
%blendAsInt = bitcast <8 x float> %blend to <8 x i32>
|
||||
store <8 x i32> %blendAsInt, <8 x i32>* %0
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @__masked_store_blend_64(<8 x i64>* nocapture, <8 x i64>,
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
; always just serialize it
|
||||
; FIXME: should implement the "do two 32-bit masked stores" stuff that
|
||||
; other targets do...
|
||||
call void @__masked_store_64(<8 x i64>* nocapture %0, <8 x i64> %1, <8 x i32> %2)
|
||||
ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
|
||||
gen_gather(8, i32)
|
||||
gen_gather(8, i64)
|
||||
gen_scatter(8, i32)
|
||||
gen_scatter(8, i64)
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; double precision sqrt
|
||||
|
||||
declare <4 x double> @llvm.x86.avx.sqrt.pd(<4 x double>) nounwind readnone
|
||||
declare <4 x double> @llvm.x86.avx.sqrt.sd(<4 x double>) nounwind readnone
|
||||
|
||||
define internal <8 x double> @__sqrt_varying_double(<8 x double>) nounwind alwaysinline {
|
||||
unary4to8(ret, double, @llvm.x86.avx.sqrt.pd, %0)
|
||||
ret <8 x double> %ret
|
||||
}
|
||||
|
||||
define internal double @__sqrt_uniform_double(double) nounwind alwaysinline {
|
||||
sse_unary_scalar(ret, 4, double, @llvm.x86.avx.sqrt.pd, %0)
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; double precision min/max
|
||||
|
||||
declare <4 x double> @llvm.x86.avx.max.pd(<4 x double>, <4 x double>) nounwind readnone
|
||||
declare <4 x double> @llvm.x86.avx.max.sd(<4 x double>, <4 x double>) nounwind readnone
|
||||
declare <4 x double> @llvm.x86.avx.min.pd(<4 x double>, <4 x double>) nounwind readnone
|
||||
declare <4 x double> @llvm.x86.avx.min.sd(<4 x double>, <4 x double>) nounwind readnone
|
||||
|
||||
define internal <8 x double> @__min_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
||||
binary4to8(ret, double, @llvm.x86.avx.min.pd, %0, %1)
|
||||
ret <8 x double> %ret
|
||||
}
|
||||
|
||||
define internal double @__min_uniform_double(double, double) nounwind readnone alwaysinline {
|
||||
sse_binary_scalar(ret, 4, double, @llvm.x86.avx.min.pd, %0, %1)
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
define internal <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
||||
binary4to8(ret, double, @llvm.x86.avx.max.pd, %0, %1)
|
||||
ret <8 x double> %ret
|
||||
}
|
||||
|
||||
define internal double @__max_uniform_double(double, double) nounwind readnone alwaysinline {
|
||||
sse_binary_scalar(ret, 4, double, @llvm.x86.avx.max.pd, %0, %1)
|
||||
ret double %ret
|
||||
}
|
||||
141
stdlib-c.c
Normal file
141
stdlib-c.c
Normal file
@@ -0,0 +1,141 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file stdlib-c.c
|
||||
@brief Standard library function implementations written in C.
|
||||
|
||||
This file provides C implementations of various functions that can be
|
||||
called from ispc programs; in other words, this file is *not* linked
|
||||
into the ispc compiler executable, but rather provides functions that
|
||||
can be compiled into ispc programs.
|
||||
|
||||
When the ispc compiler is built, this file is compiled with clang to
|
||||
generate LLVM bitcode. This bitcode is later linked in to the program
|
||||
being compiled by the DefineStdlib() function. The first way to access
|
||||
definitions from this file is by asking for them name from the
|
||||
llvm::Module's' symbol table (e.g. as the PrintStmt implementation does
|
||||
with __do_print() below. Alternatively, if a function defined in this
|
||||
file has a signature that can be mapped back to ispc types by the
|
||||
lLLVMTypeToIspcType() function, then its declaration will be made
|
||||
available to ispc programs at compile time automatically.
|
||||
*/
|
||||
|
||||
|
||||
#include <stdint.h>
|
||||
#include <stdio.h>
|
||||
#include <stdarg.h>
|
||||
|
||||
typedef int Bool;
|
||||
|
||||
#define PRINT_SCALAR(fmt, type) \
|
||||
printf(fmt, *((type *)ptr)); \
|
||||
break
|
||||
|
||||
#define PRINT_VECTOR(fmt, type) \
|
||||
putchar('['); \
|
||||
for (int i = 0; i < width; ++i) { \
|
||||
/* only print the value if the current lane is executing */ \
|
||||
if (mask & (1<<i)) \
|
||||
printf(fmt, ((type *)ptr)[i]); \
|
||||
else \
|
||||
printf("((" fmt "))", ((type *)ptr)[i]); \
|
||||
putchar(i != width-1 ? ',' : ']'); \
|
||||
} \
|
||||
break
|
||||
|
||||
/** This function is called by PrintStmt to do the work of printing values
|
||||
from ispc programs. Note that the function signature here must match
|
||||
the parameters that PrintStmt::EmitCode() generates.
|
||||
|
||||
@param format Print format string
|
||||
@param types Encoded types of the values being printed.
|
||||
(See lEncodeType()).
|
||||
@param width Vector width of the compilation target
|
||||
@param mask Current lane mask when the print statemnt is called
|
||||
@param args Array of pointers to the values to be printed
|
||||
*/
|
||||
void __do_print(const char *format, const char *types, int width, int mask,
|
||||
void **args) {
|
||||
if (mask == 0)
|
||||
return;
|
||||
|
||||
int argCount = 0;
|
||||
while (*format) {
|
||||
// Format strings are just single percent signs.
|
||||
if (*format != '%')
|
||||
putchar(*format);
|
||||
else {
|
||||
if (*types) {
|
||||
void *ptr = args[argCount++];
|
||||
// Based on the encoding in the types string, cast the
|
||||
// value appropriately and print it with a reasonable
|
||||
// printf() formatting string.
|
||||
switch (*types) {
|
||||
case 'b': {
|
||||
printf("%s", *((Bool *)ptr) ? "true" : "false");
|
||||
break;
|
||||
}
|
||||
case 'B': {
|
||||
putchar('[');
|
||||
for (int i = 0; i < width; ++i) {
|
||||
if (mask & (1<<i))
|
||||
printf("%s", ((Bool *)ptr)[i] ? "true" : "false");
|
||||
else
|
||||
printf("_________");
|
||||
putchar(i != width-1 ? ',' : ']');
|
||||
}
|
||||
break;
|
||||
}
|
||||
case 'i': PRINT_SCALAR("%d", int);
|
||||
case 'I': PRINT_VECTOR("%d", int);
|
||||
case 'u': PRINT_SCALAR("%u", unsigned int);
|
||||
case 'U': PRINT_VECTOR("%u", unsigned int);
|
||||
case 'f': PRINT_SCALAR("%f", float);
|
||||
case 'F': PRINT_VECTOR("%f", float);
|
||||
case 'l': PRINT_SCALAR("%lld", long long);
|
||||
case 'L': PRINT_VECTOR("%lld", long long);
|
||||
case 'v': PRINT_SCALAR("%llu", unsigned long long);
|
||||
case 'V': PRINT_VECTOR("%llu", unsigned long long);
|
||||
case 'd': PRINT_SCALAR("%f", double);
|
||||
case 'D': PRINT_VECTOR("%f", double);
|
||||
default:
|
||||
printf("UNKNOWN TYPE ");
|
||||
putchar(*types);
|
||||
}
|
||||
++types;
|
||||
}
|
||||
}
|
||||
++format;
|
||||
}
|
||||
fflush(stdout);
|
||||
}
|
||||
441
stdlib-sse.ll
Normal file
441
stdlib-sse.ll
Normal file
@@ -0,0 +1,441 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
;; This file declares implementations of various stdlib builtins that
|
||||
;; only require SSE version 1 and 2 functionality; this file, in turn
|
||||
;; is then included by stdlib-sse2.ll and stdlib-sse4.ll to provide
|
||||
;; those definitions for them.
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
int8_16(4)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
|
||||
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
|
||||
|
||||
define internal <4 x float> @__rcp_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||
%call = call <4 x float> @llvm.x86.sse.rcp.ps(<4 x float> %0)
|
||||
; do one N-R iteration to improve precision
|
||||
; float iv = __rcp_v(v);
|
||||
; return iv * (2. - v * iv);
|
||||
%v_iv = fmul <4 x float> %0, %call
|
||||
%two_minus = fsub <4 x float> <float 2., float 2., float 2., float 2.>, %v_iv
|
||||
%iv_mul = fmul <4 x float> %call, %two_minus
|
||||
ret <4 x float> %iv_mul
|
||||
}
|
||||
|
||||
define internal float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; do the rcpss call
|
||||
%vecval = insertelement <4 x float> undef, float %0, i32 0
|
||||
%call = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecval)
|
||||
%scall = extractelement <4 x float> %call, i32 0
|
||||
|
||||
; do one N-R iteration to improve precision, as above
|
||||
%v_iv = fmul float %0, %scall
|
||||
%two_minus = fsub float 2., %v_iv
|
||||
%iv_mul = fmul float %scall, %two_minus
|
||||
ret float %iv_mul
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; rsqrt
|
||||
|
||||
declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
||||
|
||||
define internal <4 x float> @__rsqrt_varying_float(<4 x float> %v) nounwind readonly alwaysinline {
|
||||
; float is = __rsqrt_v(v);
|
||||
%is = call <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float> %v)
|
||||
; Newton-Raphson iteration to improve precision
|
||||
; return 0.5 * is * (3. - (v * is) * is);
|
||||
%v_is = fmul <4 x float> %v, %is
|
||||
%v_is_is = fmul <4 x float> %v_is, %is
|
||||
%three_sub = fsub <4 x float> <float 3., float 3., float 3., float 3.>, %v_is_is
|
||||
%is_mul = fmul <4 x float> %is, %three_sub
|
||||
%half_scale = fmul <4 x float> <float 0.5, float 0.5, float 0.5, float 0.5>, %is_mul
|
||||
ret <4 x float> %half_scale
|
||||
}
|
||||
|
||||
define internal float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; uniform float is = extract(__rsqrt_u(v), 0);
|
||||
%v = insertelement <4 x float> undef, float %0, i32 0
|
||||
%vis = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %v)
|
||||
%is = extractelement <4 x float> %vis, i32 0
|
||||
|
||||
; Newton-Raphson iteration to improve precision
|
||||
; return 0.5 * is * (3. - (v * is) * is);
|
||||
%v_is = fmul float %0, %is
|
||||
%v_is_is = fmul float %v_is, %is
|
||||
%three_sub = fsub float 3., %v_is_is
|
||||
%is_mul = fmul float %is, %three_sub
|
||||
%half_scale = fmul float 0.5, %is_mul
|
||||
ret float %half_scale
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; sqrt
|
||||
|
||||
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
|
||||
|
||||
define internal <4 x float> @__sqrt_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||
%call = call <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float> %0)
|
||||
ret <4 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||
sse_unary_scalar(ret, 4, float, @llvm.x86.sse.sqrt.ss, %0)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; fast math mode
|
||||
|
||||
declare void @llvm.x86.sse.stmxcsr(i32 *) nounwind
|
||||
declare void @llvm.x86.sse.ldmxcsr(i32 *) nounwind
|
||||
|
||||
define internal void @__fastmath() nounwind alwaysinline {
|
||||
%ptr = alloca i32
|
||||
call void @llvm.x86.sse.stmxcsr(i32 * %ptr)
|
||||
%oldval = load i32 *%ptr
|
||||
|
||||
; turn on DAZ (64)/FTZ (32768) -> 32832
|
||||
%update = or i32 %oldval, 32832
|
||||
store i32 %update, i32 *%ptr
|
||||
call void @llvm.x86.sse.ldmxcsr(i32 * %ptr)
|
||||
ret void
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; svml stuff
|
||||
|
||||
declare <4 x float> @__svml_sinf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_cosf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_sincosf4(<4 x float> *, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_tanf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_atanf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_atan2f4(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_expf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define internal <4 x float> @__svml_sin(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_sinf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define internal <4 x float> @__svml_cos(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_cosf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define internal void @__svml_sincos(<4 x float>, <4 x float> *, <4 x float> *) nounwind readnone alwaysinline {
|
||||
%s = call <4 x float> @__svml_sincosf4(<4 x float> * %2, <4 x float> %0)
|
||||
store <4 x float> %s, <4 x float> * %1
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal <4 x float> @__svml_tan(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_tanf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define internal <4 x float> @__svml_atan(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_atanf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define internal <4 x float> @__svml_atan2(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_atan2f4(<4 x float> %0, <4 x float> %1)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define internal <4 x float> @__svml_exp(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_expf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define internal <4 x float> @__svml_log(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_logf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define internal <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_powf4(<4 x float> %0, <4 x float> %1)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float min/max
|
||||
|
||||
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define internal <4 x float> @__max_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
|
||||
%call = call <4 x float> @llvm.x86.sse.max.ps(<4 x float> %0, <4 x float> %1)
|
||||
ret <4 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.max.ss, %0, %1)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define internal <4 x float> @__min_varying_float(<4 x float>, <4 x float>) nounwind readonly alwaysinline {
|
||||
%call = call <4 x float> @llvm.x86.sse.min.ps(<4 x float> %0, <4 x float> %1)
|
||||
ret <4 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.min.ss, %0, %1)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops / reductions
|
||||
|
||||
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
||||
|
||||
define internal i32 @__movmsk(<4 x i32>) nounwind readnone alwaysinline {
|
||||
%floatmask = bitcast <4 x i32> %0 to <4 x float>
|
||||
%v = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %floatmask) nounwind readnone
|
||||
ret i32 %v
|
||||
}
|
||||
|
||||
define internal float @__reduce_min_float(<4 x float>) nounwind readnone {
|
||||
reduce4(float, @__min_varying_float, @__min_uniform_float)
|
||||
}
|
||||
|
||||
define internal float @__reduce_max_float(<4 x float>) nounwind readnone {
|
||||
reduce4(float, @__max_varying_float, @__max_uniform_float)
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_add_int32(<4 x i32> %v) nounwind readnone {
|
||||
%v1 = shufflevector <4 x i32> %v, <4 x i32> undef,
|
||||
<4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%m1 = add <4 x i32> %v1, %v
|
||||
%m1a = extractelement <4 x i32> %m1, i32 0
|
||||
%m1b = extractelement <4 x i32> %m1, i32 1
|
||||
%sum = add i32 %m1a, %m1b
|
||||
ret i32 %sum
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_min_int32(<4 x i32>) nounwind readnone {
|
||||
reduce4(i32, @__min_varying_int32, @__min_uniform_int32)
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_max_int32(<4 x i32>) nounwind readnone {
|
||||
reduce4(i32, @__max_varying_int32, @__max_uniform_int32)
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_add_uint32(<4 x i32> %v) nounwind readnone {
|
||||
%r = call i32 @__reduce_add_int32(<4 x i32> %v)
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_min_uint32(<4 x i32>) nounwind readnone {
|
||||
reduce4(i32, @__min_varying_uint32, @__min_uniform_uint32)
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_max_uint32(<4 x i32>) nounwind readnone {
|
||||
reduce4(i32, @__max_varying_uint32, @__max_uniform_uint32)
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
define void @__masked_store_32(<4 x i32>* nocapture, <4 x i32>, <4 x i32>) nounwind alwaysinline {
|
||||
per_lane(4, <4 x i32> %2, `
|
||||
; compute address for this one
|
||||
%ptr_ID = getelementptr <4 x i32> * %0, i32 0, i32 LANE
|
||||
%storeval_ID = extractelement <4 x i32> %1, i32 LANE
|
||||
store i32 %storeval_ID, i32 * %ptr_ID')
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_64(<4 x i64>* nocapture, <4 x i64>, <4 x i32>) nounwind alwaysinline {
|
||||
per_lane(4, <4 x i32> %2, `
|
||||
%ptr_ID = getelementptr <4 x i64> * %0, i32 0, i32 LANE
|
||||
%storeval_ID = extractelement <4 x i64> %1, i32 LANE
|
||||
store i64 %storeval_ID, i64 * %ptr_ID')
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unaligned loads/loads+broadcasts
|
||||
|
||||
define <4 x i32> @__load_and_broadcast_32(i8 *, <4 x i32> %mask) nounwind alwaysinline {
|
||||
; must not load if the mask is all off; the address may be invalid
|
||||
%mm = call i32 @__movmsk(<4 x i32> %mask)
|
||||
%any_on = icmp ne i32 %mm, 0
|
||||
br i1 %any_on, label %load, label %skip
|
||||
|
||||
load:
|
||||
%ptr = bitcast i8 * %0 to i32 *
|
||||
%val = load i32 * %ptr
|
||||
|
||||
%ret0 = insertelement <4 x i32> undef, i32 %val, i32 0
|
||||
%ret1 = insertelement <4 x i32> %ret0, i32 %val, i32 1
|
||||
%ret2 = insertelement <4 x i32> %ret1, i32 %val, i32 2
|
||||
%ret3 = insertelement <4 x i32> %ret2, i32 %val, i32 3
|
||||
ret <4 x i32> %ret3
|
||||
|
||||
skip:
|
||||
ret <4 x i32> undef
|
||||
}
|
||||
|
||||
define <4 x i64> @__load_and_broadcast_64(i8 *, <4 x i32> %mask) nounwind alwaysinline {
|
||||
; must not load if the mask is all off; the address may be invalid
|
||||
%mm = call i32 @__movmsk(<4 x i32> %mask)
|
||||
%any_on = icmp ne i32 %mm, 0
|
||||
br i1 %any_on, label %load, label %skip
|
||||
|
||||
load:
|
||||
%ptr = bitcast i8 * %0 to i64 *
|
||||
%val = load i64 * %ptr
|
||||
|
||||
%ret0 = insertelement <4 x i64> undef, i64 %val, i32 0
|
||||
%ret1 = insertelement <4 x i64> %ret0, i64 %val, i32 1
|
||||
%ret2 = insertelement <4 x i64> %ret1, i64 %val, i32 2
|
||||
%ret3 = insertelement <4 x i64> %ret2, i64 %val, i32 3
|
||||
ret <4 x i64> %ret3
|
||||
|
||||
skip:
|
||||
ret <4 x i64> undef
|
||||
}
|
||||
|
||||
define <4 x i32> @__load_masked_32(i8 *, <4 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<4 x i32> %mask)
|
||||
%any_on = icmp ne i32 %mm, 0
|
||||
br i1 %any_on, label %load, label %skip
|
||||
|
||||
load:
|
||||
; if any mask lane is on, just load all of the values
|
||||
; FIXME: there is a lurking bug here if we straddle a page boundary, the
|
||||
; next page is invalid to read, but the mask bits are set so that we
|
||||
; aren't supposed to be reading those elements...
|
||||
%ptr = bitcast i8 * %0 to <4 x i32> *
|
||||
%val = load <4 x i32> * %ptr, align 4
|
||||
ret <4 x i32> %val
|
||||
|
||||
skip:
|
||||
ret <4 x i32> undef
|
||||
}
|
||||
|
||||
define <4 x i64> @__load_masked_64(i8 *, <4 x i32> %mask) nounwind alwaysinline {
|
||||
%mm = call i32 @__movmsk(<4 x i32> %mask)
|
||||
%any_on = icmp ne i32 %mm, 0
|
||||
br i1 %any_on, label %load, label %skip
|
||||
|
||||
load:
|
||||
; if any mask lane is on, just load all of the values
|
||||
; FIXME: there is a lurking bug here if we straddle a page boundary, the
|
||||
; next page is invalid to read, but the mask bits are set so that we
|
||||
; aren't supposed to be reading those elements...
|
||||
%ptr = bitcast i8 * %0 to <4 x i64> *
|
||||
%val = load <4 x i64> * %ptr, align 8
|
||||
ret <4 x i64> %val
|
||||
|
||||
skip:
|
||||
ret <4 x i64> undef
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
|
||||
; define these with the macros from stdlib.m4
|
||||
|
||||
gen_gather(4, i32)
|
||||
gen_gather(4, i64)
|
||||
gen_scatter(4, i32)
|
||||
gen_scatter(4, i64)
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; double precision sqrt
|
||||
|
||||
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
|
||||
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
||||
|
||||
define internal <4 x double> @__sqrt_varying_double(<4 x double>) nounwind alwaysinline {
|
||||
unary2to4(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
|
||||
ret <4 x double> %ret
|
||||
}
|
||||
|
||||
|
||||
define internal double @__sqrt_uniform_double(double) nounwind alwaysinline {
|
||||
sse_unary_scalar(ret, 2, double, @llvm.x86.sse2.sqrt.pd, %0)
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; double precision min/max
|
||||
|
||||
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define internal <4 x double> @__min_varying_double(<4 x double>, <4 x double>) nounwind readnone {
|
||||
binary2to4(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
|
||||
ret <4 x double> %ret
|
||||
}
|
||||
|
||||
|
||||
define internal double @__min_uniform_double(double, double) nounwind readnone {
|
||||
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.min.pd, %0, %1)
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
|
||||
define internal <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind readnone {
|
||||
binary2to4(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
|
||||
ret <4 x double> %ret
|
||||
}
|
||||
|
||||
|
||||
define internal double @__max_uniform_double(double, double) nounwind readnone {
|
||||
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.pd, %0, %1)
|
||||
ret double %ret
|
||||
}
|
||||
328
stdlib-sse2.ll
Normal file
328
stdlib-sse2.ll
Normal file
@@ -0,0 +1,328 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Define the standard library builtins for the SSE2 target
|
||||
|
||||
; Define some basics for a 4-wide target
|
||||
stdlib_core(4)
|
||||
packed_load_and_store(4)
|
||||
|
||||
; Include the various definitions of things that only require SSE1 and SSE2
|
||||
include(`stdlib-sse.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rounding
|
||||
;;
|
||||
;; There are not any rounding instructions in SSE2, so we have to emulate
|
||||
;; the functionality with multiple instructions...
|
||||
|
||||
; The code for __round_* is the result of compiling the following source
|
||||
; code.
|
||||
;
|
||||
; export float Round(float x) {
|
||||
; unsigned int sign = signbits(x);
|
||||
; unsigned int ix = intbits(x);
|
||||
; ix ^= sign;
|
||||
; x = floatbits(ix);
|
||||
; x += 0x1.0p23f;
|
||||
; x -= 0x1.0p23f;
|
||||
; ix = intbits(x);
|
||||
; ix ^= sign;
|
||||
; x = floatbits(ix);
|
||||
; return x;
|
||||
;}
|
||||
|
||||
define internal <4 x float> @__round_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||
%float_to_int_bitcast.i.i.i.i = bitcast <4 x float> %0 to <4 x i32>
|
||||
%bitop.i.i = and <4 x i32> %float_to_int_bitcast.i.i.i.i, <i32 -2147483648, i32 -2147483648, i32 -2147483648, i32 -2147483648>
|
||||
%bitop.i = xor <4 x i32> %float_to_int_bitcast.i.i.i.i, %bitop.i.i
|
||||
%int_to_float_bitcast.i.i40.i = bitcast <4 x i32> %bitop.i to <4 x float>
|
||||
%binop.i = fadd <4 x float> %int_to_float_bitcast.i.i40.i, <float 8.388608e+06, float 8.388608e+06, float 8.388608e+06, float 8.388608e+06>
|
||||
%binop21.i = fadd <4 x float> %binop.i, <float -8.388608e+06, float -8.388608e+06, float -8.388608e+06, float -8.388608e+06>
|
||||
%float_to_int_bitcast.i.i.i = bitcast <4 x float> %binop21.i to <4 x i32>
|
||||
%bitop31.i = xor <4 x i32> %float_to_int_bitcast.i.i.i, %bitop.i.i
|
||||
%int_to_float_bitcast.i.i.i = bitcast <4 x i32> %bitop31.i to <4 x float>
|
||||
ret <4 x float> %int_to_float_bitcast.i.i.i
|
||||
}
|
||||
|
||||
define internal float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
||||
%float_to_int_bitcast.i.i.i.i = bitcast float %0 to i32
|
||||
%bitop.i.i = and i32 %float_to_int_bitcast.i.i.i.i, -2147483648
|
||||
%bitop.i = xor i32 %bitop.i.i, %float_to_int_bitcast.i.i.i.i
|
||||
%int_to_float_bitcast.i.i40.i = bitcast i32 %bitop.i to float
|
||||
%binop.i = fadd float %int_to_float_bitcast.i.i40.i, 8.388608e+06
|
||||
%binop21.i = fadd float %binop.i, -8.388608e+06
|
||||
%float_to_int_bitcast.i.i.i = bitcast float %binop21.i to i32
|
||||
%bitop31.i = xor i32 %float_to_int_bitcast.i.i.i, %bitop.i.i
|
||||
%int_to_float_bitcast.i.i.i = bitcast i32 %bitop31.i to float
|
||||
ret float %int_to_float_bitcast.i.i.i
|
||||
}
|
||||
|
||||
;; Similarly, for implementations of the __floor* functions below, we have the
|
||||
;; bitcode from compiling the following source code...
|
||||
|
||||
;export float Floor(float x) {
|
||||
; float y = Round(x);
|
||||
; unsigned int cmp = y > x ? 0xffffffff : 0;
|
||||
; float delta = -1.f;
|
||||
; unsigned int idelta = intbits(delta);
|
||||
; idelta &= cmp;
|
||||
; delta = floatbits(idelta);
|
||||
; return y + delta;
|
||||
;}
|
||||
|
||||
define internal <4 x float> @__floor_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||
%calltmp.i = tail call <4 x float> @__round_varying_float(<4 x float> %0) nounwind
|
||||
%bincmp.i = fcmp ogt <4 x float> %calltmp.i, %0
|
||||
%val_to_boolvec32.i = sext <4 x i1> %bincmp.i to <4 x i32>
|
||||
%bitop.i = and <4 x i32> %val_to_boolvec32.i, <i32 -1082130432, i32 -1082130432, i32 -1082130432, i32 -1082130432>
|
||||
%int_to_float_bitcast.i.i.i = bitcast <4 x i32> %bitop.i to <4 x float>
|
||||
%binop.i = fadd <4 x float> %calltmp.i, %int_to_float_bitcast.i.i.i
|
||||
ret <4 x float> %binop.i
|
||||
}
|
||||
|
||||
define internal float @__floor_uniform_float(float) nounwind readonly alwaysinline {
|
||||
%calltmp.i = tail call float @__round_uniform_float(float %0) nounwind
|
||||
%bincmp.i = fcmp ogt float %calltmp.i, %0
|
||||
%selectexpr.i = sext i1 %bincmp.i to i32
|
||||
%bitop.i = and i32 %selectexpr.i, -1082130432
|
||||
%int_to_float_bitcast.i.i.i = bitcast i32 %bitop.i to float
|
||||
%binop.i = fadd float %calltmp.i, %int_to_float_bitcast.i.i.i
|
||||
ret float %binop.i
|
||||
}
|
||||
|
||||
;; And here is the code we compiled to get the __ceil* functions below
|
||||
;
|
||||
;export uniform float Ceil(uniform float x) {
|
||||
; uniform float y = Round(x);
|
||||
; uniform int yltx = y < x ? 0xffffffff : 0;
|
||||
; uniform float delta = 1.f;
|
||||
; uniform int idelta = intbits(delta);
|
||||
; idelta &= yltx;
|
||||
; delta = floatbits(idelta);
|
||||
; return y + delta;
|
||||
;}
|
||||
|
||||
define internal <4 x float> @__ceil_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||
%calltmp.i = tail call <4 x float> @__round_varying_float(<4 x float> %0) nounwind
|
||||
%bincmp.i = fcmp olt <4 x float> %calltmp.i, %0
|
||||
%val_to_boolvec32.i = sext <4 x i1> %bincmp.i to <4 x i32>
|
||||
%bitop.i = and <4 x i32> %val_to_boolvec32.i, <i32 1065353216, i32 1065353216, i32 1065353216, i32 1065353216>
|
||||
%int_to_float_bitcast.i.i.i = bitcast <4 x i32> %bitop.i to <4 x float>
|
||||
%binop.i = fadd <4 x float> %calltmp.i, %int_to_float_bitcast.i.i.i
|
||||
ret <4 x float> %binop.i
|
||||
}
|
||||
|
||||
define internal float @__ceil_uniform_float(float) nounwind readonly alwaysinline {
|
||||
%calltmp.i = tail call float @__round_uniform_float(float %0) nounwind
|
||||
%bincmp.i = fcmp olt float %calltmp.i, %0
|
||||
%selectexpr.i = sext i1 %bincmp.i to i32
|
||||
%bitop.i = and i32 %selectexpr.i, 1065353216
|
||||
%int_to_float_bitcast.i.i.i = bitcast i32 %bitop.i to float
|
||||
%binop.i = fadd float %calltmp.i, %int_to_float_bitcast.i.i.i
|
||||
ret float %binop.i
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; min/max
|
||||
|
||||
; There is no blend instruction with SSE2, so we simulate it with bit
|
||||
; operations on i32s. For these two vselect functions, for each
|
||||
; vector element, if the mask is on, we return the corresponding value
|
||||
; from %1, and otherwise return the value from %0.
|
||||
|
||||
define internal <4 x i32> @__vselect_i32(<4 x i32>, <4 x i32> ,
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
%notmask = xor <4 x i32> %mask, <i32 -1, i32 -1, i32 -1, i32 -1>
|
||||
%cleared_old = and <4 x i32> %0, %notmask
|
||||
%masked_new = and <4 x i32> %1, %mask
|
||||
%new = or <4 x i32> %cleared_old, %masked_new
|
||||
ret <4 x i32> %new
|
||||
}
|
||||
|
||||
define internal <4 x float> @__vselect_float(<4 x float>, <4 x float>,
|
||||
<4 x i32> %mask) nounwind readnone alwaysinline {
|
||||
%v0 = bitcast <4 x float> %0 to <4 x i32>
|
||||
%v1 = bitcast <4 x float> %1 to <4 x i32>
|
||||
%r = call <4 x i32> @__vselect_i32(<4 x i32> %v0, <4 x i32> %v1, <4 x i32> %mask)
|
||||
%rf = bitcast <4 x i32> %r to <4 x float>
|
||||
ret <4 x float> %rf
|
||||
}
|
||||
|
||||
|
||||
; To do vector integer min and max, we do the vector compare and then sign
|
||||
; extend the i1 vector result to an i32 mask. The __vselect does the
|
||||
; rest...
|
||||
|
||||
define internal <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%c = icmp slt <4 x i32> %0, %1
|
||||
%mask = sext <4 x i1> %c to <4 x i32>
|
||||
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
define internal i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||
%c = icmp slt i32 %0, %1
|
||||
%r = select i1 %c, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define internal <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%c = icmp sgt <4 x i32> %0, %1
|
||||
%mask = sext <4 x i1> %c to <4 x i32>
|
||||
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||
%c = icmp sgt i32 %0, %1
|
||||
%r = select i1 %c, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
; The functions for unsigned ints are similar, just with unsigned
|
||||
; comparison functions...
|
||||
|
||||
define internal <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%c = icmp ult <4 x i32> %0, %1
|
||||
%mask = sext <4 x i1> %c to <4 x i32>
|
||||
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
define internal i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
%c = icmp ult i32 %0, %1
|
||||
%r = select i1 %c, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define internal <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%c = icmp ugt <4 x i32> %0, %1
|
||||
%mask = sext <4 x i1> %c to <4 x i32>
|
||||
%v = call <4 x i32> @__vselect_i32(<4 x i32> %1, <4 x i32> %0, <4 x i32> %mask)
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
%c = icmp ugt i32 %0, %1
|
||||
%r = select i1 %c, i32 %0, i32 %1
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops / reductions
|
||||
|
||||
; FIXME: this is very inefficient, loops over all 32 bits...
|
||||
|
||||
define internal i32 @__popcnt(i32) nounwind readonly alwaysinline {
|
||||
entry:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%count = phi i32 [ 0, %entry ], [ %newcount, %loop ]
|
||||
%val = phi i32 [ %0, %entry ], [ %newval, %loop ]
|
||||
%delta = and i32 %val, 1
|
||||
%newcount = add i32 %count, %delta
|
||||
%newval = lshr i32 %val, 1
|
||||
%done = icmp eq i32 %newval, 0
|
||||
br i1 %done, label %exit, label %loop
|
||||
|
||||
exit:
|
||||
ret i32 %newcount
|
||||
}
|
||||
|
||||
|
||||
define internal float @__reduce_add_float(<4 x float> %v) nounwind readonly alwaysinline {
|
||||
%v1 = shufflevector <4 x float> %v, <4 x float> undef,
|
||||
<4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%m1 = fadd <4 x float> %v1, %v
|
||||
%m1a = extractelement <4 x float> %m1, i32 0
|
||||
%m1b = extractelement <4 x float> %m1, i32 1
|
||||
%sum = fadd float %m1a, %m1b
|
||||
ret float %sum
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
define void @__masked_store_blend_32(<4 x i32>* nocapture, <4 x i32>,
|
||||
<4 x i32> %mask) nounwind alwaysinline {
|
||||
%val = load <4 x i32> * %0
|
||||
%newval = call <4 x i32> @__vselect_i32(<4 x i32> %val, <4 x i32> %1, <4 x i32> %mask)
|
||||
store <4 x i32> %newval, <4 x i32> * %0
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_blend_64(<4 x i64>* nocapture %ptr, <4 x i64> %new,
|
||||
<4 x i32> %mask) nounwind alwaysinline {
|
||||
%oldValue = load <4 x i64>* %ptr
|
||||
|
||||
; Do 4x64-bit blends by doing two <4 x i32> blends, where the <4 x i32> values
|
||||
; are actually bitcast <2 x i64> values
|
||||
;
|
||||
; set up the first two 64-bit values
|
||||
%old01 = shufflevector <4 x i64> %oldValue, <4 x i64> undef,
|
||||
<2 x i32> <i32 0, i32 1>
|
||||
%old01f = bitcast <2 x i64> %old01 to <4 x float>
|
||||
%new01 = shufflevector <4 x i64> %new, <4 x i64> undef,
|
||||
<2 x i32> <i32 0, i32 1>
|
||||
%new01f = bitcast <2 x i64> %new01 to <4 x float>
|
||||
; compute mask--note that the indices 0 and 1 are doubled-up
|
||||
%mask01 = shufflevector <4 x i32> %mask, <4 x i32> undef,
|
||||
<4 x i32> <i32 0, i32 0, i32 1, i32 1>
|
||||
; and blend the two of the values
|
||||
%result01f = call <4 x float> @__vselect_float(<4 x float> %old01f, <4 x float> %new01f, <4 x i32> %mask01)
|
||||
%result01 = bitcast <4 x float> %result01f to <2 x i64>
|
||||
|
||||
; and again
|
||||
%old23 = shufflevector <4 x i64> %oldValue, <4 x i64> undef,
|
||||
<2 x i32> <i32 2, i32 3>
|
||||
%old23f = bitcast <2 x i64> %old23 to <4 x float>
|
||||
%new23 = shufflevector <4 x i64> %new, <4 x i64> undef,
|
||||
<2 x i32> <i32 2, i32 3>
|
||||
%new23f = bitcast <2 x i64> %new23 to <4 x float>
|
||||
; compute mask--note that the values 2 and 3 are doubled-up
|
||||
%mask23 = shufflevector <4 x i32> %mask, <4 x i32> undef,
|
||||
<4 x i32> <i32 2, i32 2, i32 3, i32 3>
|
||||
; and blend the two of the values
|
||||
%result23f = call <4 x float> @__vselect_float(<4 x float> %old23f, <4 x float> %new23f, <4 x i32> %mask23)
|
||||
%result23 = bitcast <4 x float> %result23f to <2 x i64>
|
||||
|
||||
; reconstruct the final <4 x i64> vector
|
||||
%final = shufflevector <2 x i64> %result01, <2 x i64> %result23,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
store <4 x i64> %final, <4 x i64> * %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
248
stdlib-sse4.ll
Normal file
248
stdlib-sse4.ll
Normal file
@@ -0,0 +1,248 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
; Define common 4-wide stuff
|
||||
stdlib_core(4)
|
||||
packed_load_and_store(4)
|
||||
|
||||
; Define the stuff that can be done with base SSE1/SSE2 instructions
|
||||
include(`stdlib-sse.ll')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; math
|
||||
|
||||
declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
|
||||
|
||||
define internal <4 x float> @__round_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||
; roundps, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||
%call = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %0, i32 8)
|
||||
ret <4 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; roundss, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||
; the roundss intrinsic is a total mess--docs say:
|
||||
;
|
||||
; __m128 _mm_round_ss (__m128 a, __m128 b, const int c)
|
||||
;
|
||||
; b is a 128-bit parameter. The lowest 32 bits are the result of the rounding function
|
||||
; on b0. The higher order 96 bits are copied directly from input parameter a. The
|
||||
; return value is described by the following equations:
|
||||
;
|
||||
; r0 = RND(b0)
|
||||
; r1 = a1
|
||||
; r2 = a2
|
||||
; r3 = a3
|
||||
;
|
||||
; It doesn't matter what we pass as a, since we only need the r0 value
|
||||
; here. So we pass the same register for both. Further, only the 0th
|
||||
; element of the b parameter matters
|
||||
%xi = insertelement <4 x float> undef, float %0, i32 0
|
||||
%xr = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %xi, <4 x float> %xi, i32 8)
|
||||
%rs = extractelement <4 x float> %xr, i32 0
|
||||
ret float %rs
|
||||
}
|
||||
|
||||
define internal <4 x float> @__floor_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||
; roundps, round down 0b01 | don't signal precision exceptions 0b1000 = 9
|
||||
%call = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %0, i32 9)
|
||||
ret <4 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__floor_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; see above for round_ss instrinsic discussion...
|
||||
%xi = insertelement <4 x float> undef, float %0, i32 0
|
||||
; roundps, round down 0b01 | don't signal precision exceptions 0b1000 = 9
|
||||
%xr = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %xi, <4 x float> %xi, i32 9)
|
||||
%rs = extractelement <4 x float> %xr, i32 0
|
||||
ret float %rs
|
||||
}
|
||||
|
||||
define internal <4 x float> @__ceil_varying_float(<4 x float>) nounwind readonly alwaysinline {
|
||||
; roundps, round up 0b10 | don't signal precision exceptions 0b1000 = 10
|
||||
%call = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %0, i32 10)
|
||||
ret <4 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__ceil_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; see above for round_ss instrinsic discussion...
|
||||
%xi = insertelement <4 x float> undef, float %0, i32 0
|
||||
; roundps, round up 0b10 | don't signal precision exceptions 0b1000 = 10
|
||||
%xr = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %xi, <4 x float> %xi, i32 10)
|
||||
%rs = extractelement <4 x float> %xr, i32 0
|
||||
ret float %rs
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; integer min/max
|
||||
|
||||
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define internal <4 x i32> @__min_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%call = call <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %call
|
||||
}
|
||||
|
||||
define internal i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define internal <4 x i32> @__max_varying_int32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%call = call <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %call
|
||||
}
|
||||
|
||||
define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; unsigned int min/max
|
||||
|
||||
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define internal <4 x i32> @__min_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%call = call <4 x i32> @llvm.x86.sse41.pminud(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %call
|
||||
}
|
||||
|
||||
define internal i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define internal <4 x i32> @__max_varying_uint32(<4 x i32>, <4 x i32>) nounwind readonly alwaysinline {
|
||||
%call = call <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32> %0, <4 x i32> %1)
|
||||
ret <4 x i32> %call
|
||||
}
|
||||
|
||||
define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops / reductions
|
||||
|
||||
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||
|
||||
define internal i32 @__popcnt(i32) nounwind readonly alwaysinline {
|
||||
%call = call i32 @llvm.ctpop.i32(i32 %0)
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define internal float @__reduce_add_float(<4 x float>) nounwind readonly alwaysinline {
|
||||
%v1 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %0, <4 x float> %0)
|
||||
%v2 = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %v1, <4 x float> %v1)
|
||||
%scalar = extractelement <4 x float> %v2, i32 0
|
||||
ret float %scalar
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>,
|
||||
<4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define void @__masked_store_blend_32(<4 x i32>* nocapture, <4 x i32>,
|
||||
<4 x i32> %mask) nounwind alwaysinline {
|
||||
%mask_as_float = bitcast <4 x i32> %mask to <4 x float>
|
||||
%oldValue = load <4 x i32>* %0
|
||||
%oldAsFloat = bitcast <4 x i32> %oldValue to <4 x float>
|
||||
%newAsFloat = bitcast <4 x i32> %1 to <4 x float>
|
||||
%blend = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %oldAsFloat,
|
||||
<4 x float> %newAsFloat,
|
||||
<4 x float> %mask_as_float)
|
||||
%blendAsInt = bitcast <4 x float> %blend to <4 x i32>
|
||||
store <4 x i32> %blendAsInt, <4 x i32>* %0
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @__masked_store_blend_64(<4 x i64>* nocapture %ptr, <4 x i64> %new,
|
||||
<4 x i32> %i32mask) nounwind alwaysinline {
|
||||
%oldValue = load <4 x i64>* %ptr
|
||||
%mask = bitcast <4 x i32> %i32mask to <4 x float>
|
||||
|
||||
; Do 4x64-bit blends by doing two <4 x i32> blends, where the <4 x i32> values
|
||||
; are actually bitcast <2 x i64> values
|
||||
;
|
||||
; set up the first two 64-bit values
|
||||
%old01 = shufflevector <4 x i64> %oldValue, <4 x i64> undef,
|
||||
<2 x i32> <i32 0, i32 1>
|
||||
%old01f = bitcast <2 x i64> %old01 to <4 x float>
|
||||
%new01 = shufflevector <4 x i64> %new, <4 x i64> undef,
|
||||
<2 x i32> <i32 0, i32 1>
|
||||
%new01f = bitcast <2 x i64> %new01 to <4 x float>
|
||||
; compute mask--note that the indices 0 and 1 are doubled-up
|
||||
%mask01 = shufflevector <4 x float> %mask, <4 x float> undef,
|
||||
<4 x i32> <i32 0, i32 0, i32 1, i32 1>
|
||||
; and blend the two of the values
|
||||
%result01f = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %old01f,
|
||||
<4 x float> %new01f,
|
||||
<4 x float> %mask01)
|
||||
%result01 = bitcast <4 x float> %result01f to <2 x i64>
|
||||
|
||||
; and again
|
||||
%old23 = shufflevector <4 x i64> %oldValue, <4 x i64> undef,
|
||||
<2 x i32> <i32 2, i32 3>
|
||||
%old23f = bitcast <2 x i64> %old23 to <4 x float>
|
||||
%new23 = shufflevector <4 x i64> %new, <4 x i64> undef,
|
||||
<2 x i32> <i32 2, i32 3>
|
||||
%new23f = bitcast <2 x i64> %new23 to <4 x float>
|
||||
; compute mask--note that the values 2 and 3 are doubled-up
|
||||
%mask23 = shufflevector <4 x float> %mask, <4 x float> undef,
|
||||
<4 x i32> <i32 2, i32 2, i32 3, i32 3>
|
||||
; and blend the two of the values
|
||||
%result23f = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %old23f,
|
||||
<4 x float> %new23f,
|
||||
<4 x float> %mask23)
|
||||
%result23 = bitcast <4 x float> %result23f to <2 x i64>
|
||||
|
||||
; reconstruct the final <4 x i64> vector
|
||||
%final = shufflevector <2 x i64> %result01, <2 x i64> %result23,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
store <4 x i64> %final, <4 x i64> * %ptr
|
||||
ret void
|
||||
}
|
||||
703
stdlib-sse4x2.ll
Normal file
703
stdlib-sse4x2.ll
Normal file
@@ -0,0 +1,703 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
|
||||
;; This file defines the target for "double-pumped" SSE4, i.e. running
|
||||
;; with 8-wide vectors
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; standard 8-wide definitions from m4 macros
|
||||
|
||||
stdlib_core(8)
|
||||
packed_load_and_store(8)
|
||||
int8_16(8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rcp
|
||||
|
||||
declare <4 x float> @llvm.x86.sse.rcp.ps(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.rcp.ss(<4 x float>) nounwind readnone
|
||||
|
||||
define internal <8 x float> @__rcp_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||
; float iv = __rcp_v(v);
|
||||
; return iv * (2. - v * iv);
|
||||
|
||||
unary4to8(call, float, @llvm.x86.sse.rcp.ps, %0)
|
||||
; do one N-R iteration
|
||||
%v_iv = fmul <8 x float> %0, %call
|
||||
%two_minus = fsub <8 x float> <float 2., float 2., float 2., float 2.,
|
||||
float 2., float 2., float 2., float 2.>, %v_iv
|
||||
%iv_mul = fmul <8 x float> %call, %two_minus
|
||||
ret <8 x float> %iv_mul
|
||||
}
|
||||
|
||||
define internal float @__rcp_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; uniform float iv = extract(__rcp_u(v), 0);
|
||||
; return iv * (2. - v * iv);
|
||||
%vecval = insertelement <4 x float> undef, float %0, i32 0
|
||||
%call = call <4 x float> @llvm.x86.sse.rcp.ss(<4 x float> %vecval)
|
||||
%scall = extractelement <4 x float> %call, i32 0
|
||||
|
||||
; do one N-R iteration
|
||||
%v_iv = fmul float %0, %scall
|
||||
%two_minus = fsub float 2., %v_iv
|
||||
%iv_mul = fmul float %scall, %two_minus
|
||||
ret float %iv_mul
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; rsqrt
|
||||
|
||||
declare <4 x float> @llvm.x86.sse.rsqrt.ps(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float>) nounwind readnone
|
||||
|
||||
define internal <8 x float> @__rsqrt_varying_float(<8 x float> %v) nounwind readonly alwaysinline {
|
||||
; float is = __rsqrt_v(v);
|
||||
unary4to8(is, float, @llvm.x86.sse.rsqrt.ps, %v)
|
||||
; return 0.5 * is * (3. - (v * is) * is);
|
||||
%v_is = fmul <8 x float> %v, %is
|
||||
%v_is_is = fmul <8 x float> %v_is, %is
|
||||
%three_sub = fsub <8 x float> <float 3., float 3., float 3., float 3.,
|
||||
float 3., float 3., float 3., float 3.>, %v_is_is
|
||||
%is_mul = fmul <8 x float> %is, %three_sub
|
||||
%half_scale = fmul <8 x float> <float 0.5, float 0.5, float 0.5, float 0.5,
|
||||
float 0.5, float 0.5, float 0.5, float 0.5>, %is_mul
|
||||
ret <8 x float> %half_scale
|
||||
}
|
||||
|
||||
define internal float @__rsqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; uniform float is = extract(__rsqrt_u(v), 0);
|
||||
%v = insertelement <4 x float> undef, float %0, i32 0
|
||||
%vis = call <4 x float> @llvm.x86.sse.rsqrt.ss(<4 x float> %v)
|
||||
%is = extractelement <4 x float> %vis, i32 0
|
||||
|
||||
; return 0.5 * is * (3. - (v * is) * is);
|
||||
%v_is = fmul float %0, %is
|
||||
%v_is_is = fmul float %v_is, %is
|
||||
%three_sub = fsub float 3., %v_is_is
|
||||
%is_mul = fmul float %is, %three_sub
|
||||
%half_scale = fmul float 0.5, %is_mul
|
||||
ret float %half_scale
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; sqrt
|
||||
|
||||
declare <4 x float> @llvm.x86.sse.sqrt.ps(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.sqrt.ss(<4 x float>) nounwind readnone
|
||||
|
||||
define internal <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||
unary4to8(call, float, @llvm.x86.sse.sqrt.ps, %0)
|
||||
ret <8 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__sqrt_uniform_float(float) nounwind readonly alwaysinline {
|
||||
sse_unary_scalar(ret, 4, float, @llvm.x86.sse.sqrt.ss, %0)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; fast math
|
||||
|
||||
declare void @llvm.x86.sse.stmxcsr(i32 *) nounwind
|
||||
declare void @llvm.x86.sse.ldmxcsr(i32 *) nounwind
|
||||
|
||||
define internal void @__fastmath() nounwind alwaysinline {
|
||||
%ptr = alloca i32
|
||||
call void @llvm.x86.sse.stmxcsr(i32 * %ptr)
|
||||
%oldval = load i32 *%ptr
|
||||
|
||||
; turn on DAZ (64)/FTZ (32768) -> 32832
|
||||
%update = or i32 %oldval, 32832
|
||||
store i32 %update, i32 *%ptr
|
||||
call void @llvm.x86.sse.ldmxcsr(i32 * %ptr)
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; svml stuff
|
||||
|
||||
declare <4 x float> @__svml_sinf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_cosf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_sincosf4(<4 x float> *, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_tanf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_atanf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_atan2f4(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_expf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define internal <8 x float> @__svml_sin(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_sinf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define internal <8 x float> @__svml_cos(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_cosf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define internal void @__svml_sincos(<8 x float>, <8 x float> *,
|
||||
<8 x float> *) nounwind readnone alwaysinline {
|
||||
; call svml_sincosf4 two times with the two 4-wide sub-vectors
|
||||
%a = shufflevector <8 x float> %0, <8 x float> undef,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%b = shufflevector <8 x float> %0, <8 x float> undef,
|
||||
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
|
||||
%cospa = alloca <4 x float>
|
||||
%sa = call <4 x float> @__svml_sincosf4(<4 x float> * %cospa, <4 x float> %a)
|
||||
|
||||
%cospb = alloca <4 x float>
|
||||
%sb = call <4 x float> @__svml_sincosf4(<4 x float> * %cospb, <4 x float> %b)
|
||||
|
||||
%sin = shufflevector <4 x float> %sa, <4 x float> %sb,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||
i32 4, i32 5, i32 6, i32 7>
|
||||
store <8 x float> %sin, <8 x float> * %1
|
||||
|
||||
%cosa = load <4 x float> * %cospa
|
||||
%cosb = load <4 x float> * %cospb
|
||||
%cos = shufflevector <4 x float> %cosa, <4 x float> %cosb,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||
i32 4, i32 5, i32 6, i32 7>
|
||||
store <8 x float> %cos, <8 x float> * %2
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal <8 x float> @__svml_tan(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_tanf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define internal <8 x float> @__svml_atan(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_atanf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define internal <8 x float> @__svml_atan2(<8 x float>,
|
||||
<8 x float>) nounwind readnone alwaysinline {
|
||||
binary4to8(ret, float, @__svml_atan2f4, %0, %1)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define internal <8 x float> @__svml_exp(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_expf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define internal <8 x float> @__svml_log(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_logf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define internal <8 x float> @__svml_pow(<8 x float>,
|
||||
<8 x float>) nounwind readnone alwaysinline {
|
||||
binary4to8(ret, float, @__svml_powf4, %0, %1)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float min/max
|
||||
|
||||
declare <4 x float> @llvm.x86.sse.max.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.max.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.min.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse.min.ss(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define internal <8 x float> @__max_varying_float(<8 x float>, <8 x float>) nounwind readonly alwaysinline {
|
||||
binary4to8(call, float, @llvm.x86.sse.max.ps, %0, %1)
|
||||
ret <8 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__max_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.max.ss, %0, %1)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
define internal <8 x float> @__min_varying_float(<8 x float>, <8 x float>) nounwind readonly alwaysinline {
|
||||
binary4to8(call, float, @llvm.x86.sse.min.ps, %0, %1)
|
||||
ret <8 x float> %call
|
||||
}
|
||||
|
||||
define internal float @__min_uniform_float(float, float) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 4, float, @llvm.x86.sse.min.ss, %0, %1)
|
||||
ret float %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; int min/max
|
||||
|
||||
declare <4 x i32> @llvm.x86.sse41.pminsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.sse41.pmaxsd(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define internal <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(call, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||
ret <8 x i32> %call
|
||||
}
|
||||
|
||||
define internal i32 @__min_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminsd, %0, %1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define internal <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(call, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||
ret <8 x i32> %call
|
||||
}
|
||||
|
||||
define internal i32 @__max_uniform_int32(i32, i32) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; unsigned int min/max
|
||||
|
||||
declare <4 x i32> @llvm.x86.sse41.pminud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
declare <4 x i32> @llvm.x86.sse41.pmaxud(<4 x i32>, <4 x i32>) nounwind readnone
|
||||
|
||||
define internal <8 x i32> @__min_varying_uint32(<8 x i32>,
|
||||
<8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(call, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||
ret <8 x i32> %call
|
||||
}
|
||||
|
||||
define internal i32 @__min_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pminud, %0, %1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
define internal <8 x i32> @__max_varying_uint32(<8 x i32>,
|
||||
<8 x i32>) nounwind readonly alwaysinline {
|
||||
binary4to8(call, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||
ret <8 x i32> %call
|
||||
}
|
||||
|
||||
define internal i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {
|
||||
sse_binary_scalar(ret, 4, i32, @llvm.x86.sse41.pmaxud, %0, %1)
|
||||
ret i32 %ret
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops / reductions
|
||||
|
||||
declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone
|
||||
|
||||
define internal i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
|
||||
; first do two 4-wide movmsk calls
|
||||
%floatmask = bitcast <8 x i32> %0 to <8 x float>
|
||||
%m0 = shufflevector <8 x float> %floatmask, <8 x float> undef,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%v0 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %m0) nounwind readnone
|
||||
%m1 = shufflevector <8 x float> %floatmask, <8 x float> undef,
|
||||
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%v1 = call i32 @llvm.x86.sse.movmsk.ps(<4 x float> %m1) nounwind readnone
|
||||
|
||||
; and shift the first one over by 4 before ORing it with the value
|
||||
; of the second one
|
||||
%v1s = shl i32 %v1, 4
|
||||
%v = or i32 %v0, %v1s
|
||||
ret i32 %v
|
||||
}
|
||||
|
||||
define internal float @__reduce_min_float(<8 x float>) nounwind readnone alwaysinline {
|
||||
reduce8by4(float, @llvm.x86.sse.min.ps, @__min_uniform_float)
|
||||
}
|
||||
|
||||
define internal float @__reduce_max_float(<8 x float>) nounwind readnone alwaysinline {
|
||||
reduce8by4(float, @llvm.x86.sse.max.ps, @__max_uniform_float)
|
||||
}
|
||||
|
||||
; helper function for reduce_add_int32
|
||||
define internal <4 x i32> @__vec4_add_int32(<4 x i32> %v0,
|
||||
<4 x i32> %v1) nounwind readnone alwaysinline {
|
||||
%v = add <4 x i32> %v0, %v1
|
||||
ret <4 x i32> %v
|
||||
}
|
||||
|
||||
; helper function for reduce_add_int32
|
||||
define internal i32 @__add_int32(i32, i32) nounwind readnone alwaysinline {
|
||||
%v = add i32 %0, %1
|
||||
ret i32 %v
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_add_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||
reduce8by4(i32, @__vec4_add_int32, @__add_int32)
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_min_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||
reduce8by4(i32, @llvm.x86.sse41.pminsd, @__min_uniform_int32)
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_max_int32(<8 x i32>) nounwind readnone alwaysinline {
|
||||
reduce8by4(i32, @llvm.x86.sse41.pmaxsd, @__max_uniform_int32)
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_add_uint32(<8 x i32> %v) nounwind readnone alwaysinline {
|
||||
%r = call i32 @__reduce_add_int32(<8 x i32> %v)
|
||||
ret i32 %r
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_min_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
||||
reduce8by4(i32, @llvm.x86.sse41.pminud, @__min_uniform_uint32)
|
||||
}
|
||||
|
||||
define internal i32 @__reduce_max_uint32(<8 x i32>) nounwind readnone alwaysinline {
|
||||
reduce8by4(i32, @llvm.x86.sse41.pmaxud, @__max_uniform_uint32)
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
define void @__masked_store_32(<8 x i32>* nocapture, <8 x i32>,
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
per_lane(8, <8 x i32> %2, `
|
||||
; compute address for this one
|
||||
%ptr_ID = getelementptr <8 x i32> * %0, i32 0, i32 LANE
|
||||
%storeval_ID = extractelement <8 x i32> %1, i32 LANE
|
||||
store i32 %storeval_ID, i32 * %ptr_ID')
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
define void @__masked_store_64(<8 x i64>* nocapture, <8 x i64>,
|
||||
<8 x i32>) nounwind alwaysinline {
|
||||
per_lane(8, <8 x i32> %2, `
|
||||
; compute address for this one
|
||||
%ptr_ID = getelementptr <8 x i64> * %0, i32 0, i32 LANE
|
||||
%storeval_ID = extractelement <8 x i64> %1, i32 LANE
|
||||
store i64 %storeval_ID, i64 * %ptr_ID')
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; unaligned loads/loads+broadcasts
|
||||
|
||||
; FIXME: I think this and the next one need to verify that the mask isn't
|
||||
; all off before doing the load!!! (See e.g. stdlib-sse.ll)
|
||||
|
||||
define <8 x i32> @__load_and_broadcast_32(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
%ptr = bitcast i8 * %0 to i32 *
|
||||
%val = load i32 * %ptr
|
||||
|
||||
%ret0 = insertelement <8 x i32> undef, i32 %val, i32 0
|
||||
%ret1 = insertelement <8 x i32> %ret0, i32 %val, i32 1
|
||||
%ret2 = insertelement <8 x i32> %ret1, i32 %val, i32 2
|
||||
%ret3 = insertelement <8 x i32> %ret2, i32 %val, i32 3
|
||||
%ret4 = insertelement <8 x i32> %ret3, i32 %val, i32 4
|
||||
%ret5 = insertelement <8 x i32> %ret4, i32 %val, i32 5
|
||||
%ret6 = insertelement <8 x i32> %ret5, i32 %val, i32 6
|
||||
%ret7 = insertelement <8 x i32> %ret6, i32 %val, i32 7
|
||||
ret <8 x i32> %ret7
|
||||
}
|
||||
|
||||
|
||||
define <8 x i64> @__load_and_broadcast_64(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
%ptr = bitcast i8 * %0 to i64 *
|
||||
%val = load i64 * %ptr
|
||||
|
||||
%ret0 = insertelement <8 x i64> undef, i64 %val, i32 0
|
||||
%ret1 = insertelement <8 x i64> %ret0, i64 %val, i32 1
|
||||
%ret2 = insertelement <8 x i64> %ret1, i64 %val, i32 2
|
||||
%ret3 = insertelement <8 x i64> %ret2, i64 %val, i32 3
|
||||
%ret4 = insertelement <8 x i64> %ret3, i64 %val, i32 4
|
||||
%ret5 = insertelement <8 x i64> %ret4, i64 %val, i32 5
|
||||
%ret6 = insertelement <8 x i64> %ret5, i64 %val, i32 6
|
||||
%ret7 = insertelement <8 x i64> %ret6, i64 %val, i32 7
|
||||
ret <8 x i64> %ret7
|
||||
}
|
||||
|
||||
|
||||
define <8 x i32> @__load_masked_32(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
%ptr = bitcast i8 * %0 to <8 x i32> *
|
||||
%val = load <8 x i32> * %ptr, align 4
|
||||
ret <8 x i32> %val
|
||||
}
|
||||
|
||||
|
||||
define <8 x i64> @__load_masked_64(i8 *, <8 x i32> %mask) nounwind alwaysinline {
|
||||
%ptr = bitcast i8 * %0 to <8 x i64> *
|
||||
%val = load <8 x i64> * %ptr, align 8
|
||||
ret <8 x i64> %val
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather/scatter
|
||||
|
||||
gen_gather(8, i32)
|
||||
gen_gather(8, i64)
|
||||
gen_scatter(8, i32)
|
||||
gen_scatter(8, i64)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; math
|
||||
|
||||
declare <4 x float> @llvm.x86.sse41.round.ps(<4 x float>, i32) nounwind readnone
|
||||
declare <4 x float> @llvm.x86.sse41.round.ss(<4 x float>, <4 x float>, i32) nounwind readnone
|
||||
|
||||
define internal <8 x float> @__round_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||
; roundps, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||
round4to8(%0, 8)
|
||||
}
|
||||
|
||||
define internal float @__round_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; roundss, round mode nearest 0b00 | don't signal precision exceptions 0b1000 = 8
|
||||
; the roundss intrinsic is a total mess--docs say:
|
||||
;
|
||||
; __m128 _mm_round_ss (__m128 a, __m128 b, const int c)
|
||||
;
|
||||
; b is a 128-bit parameter. The lowest 32 bits are the result of the rounding function
|
||||
; on b0. The higher order 96 bits are copied directly from input parameter a. The
|
||||
; return value is described by the following equations:
|
||||
;
|
||||
; r0 = RND(b0)
|
||||
; r1 = a1
|
||||
; r2 = a2
|
||||
; r3 = a3
|
||||
;
|
||||
; It doesn't matter what we pass as a, since we only need the r0 value
|
||||
; here. So we pass the same register for both.
|
||||
%xi = insertelement <4 x float> undef, float %0, i32 0
|
||||
%xr = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %xi, <4 x float> %xi, i32 8)
|
||||
%rs = extractelement <4 x float> %xr, i32 0
|
||||
ret float %rs
|
||||
}
|
||||
|
||||
define internal <8 x float> @__floor_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||
; roundps, round down 0b01 | don't signal precision exceptions 0b1000 = 9
|
||||
round4to8(%0, 9)
|
||||
}
|
||||
|
||||
define internal float @__floor_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; see above for round_ss instrinsic discussion...
|
||||
%xi = insertelement <4 x float> undef, float %0, i32 0
|
||||
; roundps, round down 0b01 | don't signal precision exceptions 0b1000 = 9
|
||||
%xr = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %xi, <4 x float> %xi, i32 9)
|
||||
%rs = extractelement <4 x float> %xr, i32 0
|
||||
ret float %rs
|
||||
}
|
||||
|
||||
define internal <8 x float> @__ceil_varying_float(<8 x float>) nounwind readonly alwaysinline {
|
||||
; roundps, round up 0b10 | don't signal precision exceptions 0b1000 = 10
|
||||
round4to8(%0, 10)
|
||||
}
|
||||
|
||||
define internal float @__ceil_uniform_float(float) nounwind readonly alwaysinline {
|
||||
; see above for round_ss instrinsic discussion...
|
||||
%xi = insertelement <4 x float> undef, float %0, i32 0
|
||||
; roundps, round up 0b10 | don't signal precision exceptions 0b1000 = 10
|
||||
%xr = call <4 x float> @llvm.x86.sse41.round.ss(<4 x float> %xi, <4 x float> %xi, i32 10)
|
||||
%rs = extractelement <4 x float> %xr, i32 0
|
||||
ret float %rs
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops / reductions
|
||||
|
||||
declare i32 @llvm.ctpop.i32(i32) nounwind readnone
|
||||
|
||||
define internal i32 @__popcnt(i32) nounwind readonly alwaysinline {
|
||||
%call = call i32 @llvm.ctpop.i32(i32 %0)
|
||||
ret i32 %call
|
||||
}
|
||||
|
||||
declare <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
define internal float @__reduce_add_float(<8 x float>) nounwind readonly alwaysinline {
|
||||
%a = shufflevector <8 x float> %0, <8 x float> undef,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%b = shufflevector <8 x float> %0, <8 x float> undef,
|
||||
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%ab = fadd <4 x float> %a, %b
|
||||
%hab = call <4 x float> @llvm.x86.sse3.hadd.ps(<4 x float> %ab, <4 x float> %ab)
|
||||
%a_scalar = extractelement <4 x float> %hab, i32 0
|
||||
%b_scalar = extractelement <4 x float> %hab, i32 1
|
||||
%sum = fadd float %a_scalar, %b_scalar
|
||||
ret float %sum
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; masked store
|
||||
|
||||
declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>,
|
||||
<4 x float>) nounwind readnone
|
||||
|
||||
define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
||||
<8 x i32> %mask) nounwind alwaysinline {
|
||||
; do two 4-wide blends with blendvps
|
||||
%mask_as_float = bitcast <8 x i32> %mask to <8 x float>
|
||||
%mask_a = shufflevector <8 x float> %mask_as_float, <8 x float> undef,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%mask_b = shufflevector <8 x float> %mask_as_float, <8 x float> undef,
|
||||
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%oldValue = load <8 x i32>* %0
|
||||
%oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float>
|
||||
%newAsFloat = bitcast <8 x i32> %1 to <8 x float>
|
||||
%old_a = shufflevector <8 x float> %oldAsFloat, <8 x float> undef,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%old_b = shufflevector <8 x float> %oldAsFloat, <8 x float> undef,
|
||||
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%new_a = shufflevector <8 x float> %newAsFloat, <8 x float> undef,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%new_b = shufflevector <8 x float> %newAsFloat, <8 x float> undef,
|
||||
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%blend_a = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %old_a, <4 x float> %new_a,
|
||||
<4 x float> %mask_a)
|
||||
%blend_b = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %old_b, <4 x float> %new_b,
|
||||
<4 x float> %mask_b)
|
||||
%blend = shufflevector <4 x float> %blend_a, <4 x float> %blend_b,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
%blendAsInt = bitcast <8 x float> %blend to <8 x i32>
|
||||
store <8 x i32> %blendAsInt, <8 x i32>* %0
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
|
||||
<8 x i32> %mask) nounwind alwaysinline {
|
||||
; implement this as 4 blends of <4 x i32>s, which are actually bitcast
|
||||
; <2 x i64>s...
|
||||
|
||||
%mask_as_float = bitcast <8 x i32> %mask to <8 x float>
|
||||
|
||||
%old = load <8 x i64>* %ptr
|
||||
|
||||
; set up the first two 64-bit values
|
||||
%old01 = shufflevector <8 x i64> %old, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
|
||||
%old01f = bitcast <2 x i64> %old01 to <4 x float>
|
||||
%new01 = shufflevector <8 x i64> %new, <8 x i64> undef, <2 x i32> <i32 0, i32 1>
|
||||
%new01f = bitcast <2 x i64> %new01 to <4 x float>
|
||||
; compute mask--note that the values mask0 and mask1 are doubled-up
|
||||
%mask01 = shufflevector <8 x float> %mask_as_float, <8 x float> undef,
|
||||
<4 x i32> <i32 0, i32 0, i32 1, i32 1>
|
||||
; and blend the two of them values
|
||||
%result01f = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %old01f,
|
||||
<4 x float> %new01f,
|
||||
<4 x float> %mask01)
|
||||
%result01 = bitcast <4 x float> %result01f to <2 x i64>
|
||||
|
||||
; and again
|
||||
%old23 = shufflevector <8 x i64> %old, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
|
||||
%old23f = bitcast <2 x i64> %old23 to <4 x float>
|
||||
%new23 = shufflevector <8 x i64> %new, <8 x i64> undef, <2 x i32> <i32 2, i32 3>
|
||||
%new23f = bitcast <2 x i64> %new23 to <4 x float>
|
||||
%mask23 = shufflevector <8 x float> %mask_as_float, <8 x float> undef,
|
||||
<4 x i32> <i32 2, i32 2, i32 3, i32 3>
|
||||
%result23f = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %old23f,
|
||||
<4 x float> %new23f,
|
||||
<4 x float> %mask23)
|
||||
%result23 = bitcast <4 x float> %result23f to <2 x i64>
|
||||
|
||||
%old45 = shufflevector <8 x i64> %old, <8 x i64> undef, <2 x i32> <i32 4, i32 5>
|
||||
%old45f = bitcast <2 x i64> %old45 to <4 x float>
|
||||
%new45 = shufflevector <8 x i64> %new, <8 x i64> undef, <2 x i32> <i32 4, i32 5>
|
||||
%new45f = bitcast <2 x i64> %new45 to <4 x float>
|
||||
%mask45 = shufflevector <8 x float> %mask_as_float, <8 x float> undef,
|
||||
<4 x i32> <i32 4, i32 4, i32 5, i32 5>
|
||||
%result45f = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %old45f,
|
||||
<4 x float> %new45f,
|
||||
<4 x float> %mask45)
|
||||
%result45 = bitcast <4 x float> %result45f to <2 x i64>
|
||||
|
||||
%old67 = shufflevector <8 x i64> %old, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
|
||||
%old67f = bitcast <2 x i64> %old67 to <4 x float>
|
||||
%new67 = shufflevector <8 x i64> %new, <8 x i64> undef, <2 x i32> <i32 6, i32 7>
|
||||
%new67f = bitcast <2 x i64> %new67 to <4 x float>
|
||||
%mask67 = shufflevector <8 x float> %mask_as_float, <8 x float> undef,
|
||||
<4 x i32> <i32 6, i32 6, i32 7, i32 7>
|
||||
%result67f = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %old67f,
|
||||
<4 x float> %new67f,
|
||||
<4 x float> %mask67)
|
||||
%result67 = bitcast <4 x float> %result67f to <2 x i64>
|
||||
|
||||
%final0123 = shufflevector <2 x i64> %result01, <2 x i64> %result23,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%final4567 = shufflevector <2 x i64> %result45, <2 x i64> %result67,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%final = shufflevector <4 x i64> %final0123, <4 x i64> %final4567,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
store <8 x i64> %final, <8 x i64> * %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; double precision sqrt
|
||||
|
||||
declare <2 x double> @llvm.x86.sse2.sqrt.pd(<2 x double>) nounwind readnone
|
||||
declare <2 x double> @llvm.x86.sse2.sqrt.sd(<2 x double>) nounwind readnone
|
||||
|
||||
define internal <8 x double> @__sqrt_varying_double(<8 x double>) nounwind alwaysinline {
|
||||
unary2to8(ret, double, @llvm.x86.sse2.sqrt.pd, %0)
|
||||
ret <8 x double> %ret
|
||||
}
|
||||
|
||||
|
||||
define internal double @__sqrt_uniform_double(double) nounwind alwaysinline {
|
||||
sse_unary_scalar(ret, 2, double, @llvm.x86.sse2.sqrt.pd, %0)
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; double precision float min/max
|
||||
|
||||
declare <2 x double> @llvm.x86.sse2.max.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||
declare <2 x double> @llvm.x86.sse2.max.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
declare <2 x double> @llvm.x86.sse2.min.pd(<2 x double>, <2 x double>) nounwind readnone
|
||||
declare <2 x double> @llvm.x86.sse2.min.sd(<2 x double>, <2 x double>) nounwind readnone
|
||||
|
||||
define internal <8 x double> @__min_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
||||
binary2to8(ret, double, @llvm.x86.sse2.min.pd, %0, %1)
|
||||
ret <8 x double> %ret
|
||||
}
|
||||
|
||||
define internal double @__min_uniform_double(double, double) nounwind readnone alwaysinline {
|
||||
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.min.pd, %0, %1)
|
||||
ret double %ret
|
||||
}
|
||||
|
||||
define internal <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind readnone alwaysinline {
|
||||
binary2to8(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
|
||||
ret <8 x double> %ret
|
||||
}
|
||||
|
||||
define internal double @__max_uniform_double(double, double) nounwind readnone alwaysinline {
|
||||
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.pd, %0, %1)
|
||||
ret double %ret
|
||||
|
||||
}
|
||||
1605
stdlib.ispc
Normal file
1605
stdlib.ispc
Normal file
File diff suppressed because it is too large
Load Diff
835
stdlib.m4
Normal file
835
stdlib.m4
Normal file
@@ -0,0 +1,835 @@
|
||||
;; Copyright (c) 2010-2011, Intel Corporation
|
||||
;; All rights reserved.
|
||||
;;
|
||||
;; Redistribution and use in source and binary forms, with or without
|
||||
;; modification, are permitted provided that the following conditions are
|
||||
;; met:
|
||||
;;
|
||||
;; * Redistributions of source code must retain the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer.
|
||||
;;
|
||||
;; * Redistributions in binary form must reproduce the above copyright
|
||||
;; notice, this list of conditions and the following disclaimer in the
|
||||
;; documentation and/or other materials provided with the distribution.
|
||||
;;
|
||||
;; * Neither the name of Intel Corporation nor the names of its
|
||||
;; contributors may be used to endorse or promote products derived from
|
||||
;; this software without specific prior written permission.
|
||||
;;
|
||||
;;
|
||||
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
|
||||
;; This file provides a variety of macros used to generate LLVM bitcode
|
||||
;; parametrized in various ways. Implementations of the standard library
|
||||
;; builtins for various targets can use macros from this file to simplify
|
||||
;; generating code for their implementations of those builtins.
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
|
||||
;; Helper macro for calling various SSE instructions for scalar values
|
||||
;; but where the instruction takes a vector parameter.
|
||||
;; $1 : name of variable to put the final value in
|
||||
;; $2 : vector width of the target
|
||||
;; $3 : scalar type of the operand
|
||||
;; $4 : SSE intrinsic name
|
||||
;; $5 : variable name that has the scalar value
|
||||
;; For example, the following call causes the variable %ret to have
|
||||
;; the result of a call to sqrtss with the scalar value in %0
|
||||
;; sse_unary_scalar(ret, 4, float, @llvm.x86.sse.sqrt.ss, %0)
|
||||
|
||||
define(`sse_unary_scalar', `
|
||||
%$1_vec = insertelement <$2 x $3> undef, $3 $5, i32 0
|
||||
%$1_val = call <$2 x $3> $4(<$2 x $3> %$1_vec)
|
||||
%$1 = extractelement <$2 x $3> %$1_val, i32 0
|
||||
')
|
||||
|
||||
;; Similar to `sse_unary_scalar', this helper macro is for calling binary
|
||||
;; SSE instructions with scalar values,
|
||||
;; $1: name of variable to put the result in
|
||||
;; $2: vector width of the target
|
||||
;; $3: scalar type of the operand
|
||||
;; $4 : SSE intrinsic name
|
||||
;; $5 : variable name that has the first scalar operand
|
||||
;; $6 : variable name that has the second scalar operand
|
||||
|
||||
define(`sse_binary_scalar', `
|
||||
%$1_veca = insertelement <$2 x $3> undef, $3 $5, i32 0
|
||||
%$1_vecb = insertelement <$2 x $3> undef, $3 $6, i32 0
|
||||
%$1_val = call <$2 x $3> $4(<$2 x $3> %$1_veca, <$2 x $3> %$1_vecb)
|
||||
%$1 = extractelement <$2 x $3> %$1_val, i32 0
|
||||
')
|
||||
|
||||
;; Do a reduction over a 4-wide vector
|
||||
;; $1: type of final scalar result
|
||||
;; $2: 4-wide function that takes 2 4-wide operands and returns the
|
||||
;; element-wise reduction
|
||||
;; $3: scalar function that takes two scalar operands and returns
|
||||
;; the final reduction
|
||||
|
||||
define(`reduce4', `
|
||||
%v1 = shufflevector <4 x $1> %0, <4 x $1> undef,
|
||||
<4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%m1 = call <4 x $1> $2(<4 x $1> %v1, <4 x $1> %0)
|
||||
%m1a = extractelement <4 x $1> %m1, i32 0
|
||||
%m1b = extractelement <4 x $1> %m1, i32 1
|
||||
%m = call $1 $3($1 %m1a, $1 %m1b)
|
||||
ret $1 %m
|
||||
'
|
||||
)
|
||||
|
||||
;; Similar to `reduce4', do a reduction over an 8-wide vector
|
||||
;; $1: type of final scalar result
|
||||
;; $2: 8-wide function that takes 2 8-wide operands and returns the
|
||||
;; element-wise reduction
|
||||
;; $3: scalar function that takes two scalar operands and returns
|
||||
;; the final reduction
|
||||
|
||||
define(`reduce8', `
|
||||
%v1 = shufflevector <8 x $1> %0, <8 x $1> undef,
|
||||
<8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%m1 = call <8 x $1> $2(<8 x $1> %v1, <8 x $1> %0)
|
||||
%v2 = shufflevector <8 x $1> %m1, <8 x $1> undef,
|
||||
<8 x i32> <i32 2, i32 3, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef, i32 undef>
|
||||
%m2 = call <8 x $1> $2(<8 x $1> %v2, <8 x $1> %m1)
|
||||
%m2a = extractelement <8 x $1> %m2, i32 0
|
||||
%m2b = extractelement <8 x $1> %m2, i32 1
|
||||
%m = call $1 $3($1 %m2a, $1 %m2b)
|
||||
ret $1 %m
|
||||
'
|
||||
)
|
||||
|
||||
;; Do an reduction over an 8-wide vector, using a vector reduction function
|
||||
;; that only takes 4-wide vectors
|
||||
;; $1: type of final scalar result
|
||||
;; $2: 4-wide function that takes 2 4-wide operands and returns the
|
||||
;; element-wise reduction
|
||||
;; $3: scalar function that takes two scalar operands and returns
|
||||
;; the final reduction
|
||||
|
||||
define(`reduce8by4', `
|
||||
%v1 = shufflevector <8 x $1> %0, <8 x $1> undef,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%v2 = shufflevector <8 x $1> %0, <8 x $1> undef,
|
||||
<4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%m1 = call <4 x $1> $2(<4 x $1> %v1, <4 x $1> %v2)
|
||||
%v3 = shufflevector <4 x $1> %m1, <4 x $1> undef,
|
||||
<4 x i32> <i32 2, i32 3, i32 undef, i32 undef>
|
||||
%m2 = call <4 x $1> $2(<4 x $1> %v3, <4 x $1> %m1)
|
||||
%m2a = extractelement <4 x $1> %m2, i32 0
|
||||
%m2b = extractelement <4 x $1> %m2, i32 1
|
||||
%m = call $1 $3($1 %m2a, $1 %m2b)
|
||||
ret $1 %m
|
||||
'
|
||||
)
|
||||
|
||||
|
||||
;; Given a unary function that takes a 2-wide vector and a 4-wide vector
|
||||
;; that we'd like to apply it to, extract 2 2-wide vectors from the 4-wide
|
||||
;; vector, apply it, and return the corresponding 4-wide vector result
|
||||
;; $1: name of variable into which the final result should go
|
||||
;; $2: scalar type of the vector elements
|
||||
;; $3: 2-wide unary vector function to apply
|
||||
;; $4: 4-wide operand value
|
||||
|
||||
define(`unary2to4', `
|
||||
%$1_0 = shufflevector <4 x $2> $4, <4 x $2> undef, <2 x i32> <i32 0, i32 1>
|
||||
%v$1_0 = call <2 x $2> $3(<2 x $2> %$1_0)
|
||||
%$1_1 = shufflevector <4 x $2> $4, <4 x $2> undef, <2 x i32> <i32 2, i32 3>
|
||||
%v$1_1 = call <2 x $2> $3(<2 x $2> %$1_1)
|
||||
%$1 = shufflevector <2 x $2> %v$1_0, <2 x $2> %v$1_1,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
'
|
||||
)
|
||||
|
||||
;; Similar to `unary2to4', this applies a 2-wide binary function to two 4-wide
|
||||
;; vector operands
|
||||
;; $1: name of variable into which the final result should go
|
||||
;; $2: scalar type of the vector elements
|
||||
;; $3: 2-wide binary vector function to apply
|
||||
;; $4: First 4-wide operand value
|
||||
;; $5: Second 4-wide operand value
|
||||
|
||||
define(`binary2to4', `
|
||||
%$1_0a = shufflevector <4 x $2> $4, <4 x $2> undef, <2 x i32> <i32 0, i32 1>
|
||||
%$1_0b = shufflevector <4 x $2> $5, <4 x $2> undef, <2 x i32> <i32 0, i32 1>
|
||||
%v$1_0 = call <2 x $2> $3(<2 x $2> %$1_0a, <2 x $2> %$1_0b)
|
||||
%$1_1a = shufflevector <4 x $2> $4, <4 x $2> undef, <2 x i32> <i32 2, i32 3>
|
||||
%$1_1b = shufflevector <4 x $2> $5, <4 x $2> undef, <2 x i32> <i32 2, i32 3>
|
||||
%v$1_1 = call <2 x $2> $3(<2 x $2> %$1_1a, <2 x $2> %$1_1b)
|
||||
%$1 = shufflevector <2 x $2> %v$1_0, <2 x $2> %v$1_1,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
'
|
||||
)
|
||||
|
||||
;; Similar to `unary2to4', this maps a 4-wide unary function to an 8-wide
|
||||
;; vector operand
|
||||
;; $1: name of variable into which the final result should go
|
||||
;; $2: scalar type of the vector elements
|
||||
;; $3: 4-wide unary vector function to apply
|
||||
;; $4: 8-wide operand value
|
||||
|
||||
define(`unary4to8', `
|
||||
%$1_0 = shufflevector <8 x $2> $4, <8 x $2> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%v$1_0 = call <4 x $2> $3(<4 x $2> %$1_0)
|
||||
%$1_1 = shufflevector <8 x $2> $4, <8 x $2> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%v$1_1 = call <4 x $2> $3(<4 x $2> %$1_1)
|
||||
%$1 = shufflevector <4 x $2> %v$1_0, <4 x $2> %v$1_1,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
'
|
||||
)
|
||||
|
||||
;; And along the lines of `binary2to4', this maps a 4-wide binary function to
|
||||
;; two 8-wide vector operands
|
||||
;; $1: name of variable into which the final result should go
|
||||
;; $2: scalar type of the vector elements
|
||||
;; $3: 4-wide unary vector function to apply
|
||||
;; $4: First 8-wide operand value
|
||||
;; $5: Second 8-wide operand value
|
||||
|
||||
define(`binary4to8', `
|
||||
%$1_0a = shufflevector <8 x $2> $4, <8 x $2> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%$1_0b = shufflevector <8 x $2> $5, <8 x $2> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%v$1_0 = call <4 x $2> $3(<4 x $2> %$1_0a, <4 x $2> %$1_0b)
|
||||
%$1_1a = shufflevector <8 x $2> $4, <8 x $2> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%$1_1b = shufflevector <8 x $2> $5, <8 x $2> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%v$1_1 = call <4 x $2> $3(<4 x $2> %$1_1a, <4 x $2> %$1_1b)
|
||||
%$1 = shufflevector <4 x $2> %v$1_0, <4 x $2> %v$1_1,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
'
|
||||
)
|
||||
|
||||
|
||||
;; Maps a 2-wide unary function to an 8-wide vector operand, returning an
|
||||
;; 8-wide vector result
|
||||
;; $1: name of variable into which the final result should go
|
||||
;; $2: scalar type of the vector elements
|
||||
;; $3: 2-wide unary vector function to apply
|
||||
;; $4: 8-wide operand value
|
||||
|
||||
define(`unary2to8', `
|
||||
%$1_0 = shufflevector <8 x $2> $4, <8 x $2> undef, <2 x i32> <i32 0, i32 1>
|
||||
%v$1_0 = call <2 x $2> $3(<2 x $2> %$1_0)
|
||||
%$1_1 = shufflevector <8 x $2> $4, <8 x $2> undef, <2 x i32> <i32 2, i32 3>
|
||||
%v$1_1 = call <2 x $2> $3(<2 x $2> %$1_1)
|
||||
%$1_2 = shufflevector <8 x $2> $4, <8 x $2> undef, <2 x i32> <i32 4, i32 5>
|
||||
%v$1_2 = call <2 x $2> $3(<2 x $2> %$1_2)
|
||||
%$1_3 = shufflevector <8 x $2> $4, <8 x $2> undef, <2 x i32> <i32 6, i32 7>
|
||||
%v$1_3 = call <2 x $2> $3(<2 x $2> %$1_3)
|
||||
%$1a = shufflevector <2 x $2> %v$1_0, <2 x $2> %v$1_1,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%$1b = shufflevector <2 x $2> %v$1_2, <2 x $2> %v$1_3,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%$1 = shufflevector <4 x $2> %$1a, <4 x $2> %$1b,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
'
|
||||
)
|
||||
|
||||
;; Maps an 2-wide binary function to two 8-wide vector operands
|
||||
;; $1: name of variable into which the final result should go
|
||||
;; $2: scalar type of the vector elements
|
||||
;; $3: 2-wide unary vector function to apply
|
||||
;; $4: First 8-wide operand value
|
||||
;; $5: Second 8-wide operand value
|
||||
|
||||
define(`binary2to8', `
|
||||
%$1_0a = shufflevector <8 x $2> $4, <8 x $2> undef, <2 x i32> <i32 0, i32 1>
|
||||
%$1_0b = shufflevector <8 x $2> $5, <8 x $2> undef, <2 x i32> <i32 0, i32 1>
|
||||
%v$1_0 = call <2 x $2> $3(<2 x $2> %$1_0a, <2 x $2> %$1_0b)
|
||||
%$1_1a = shufflevector <8 x $2> $4, <8 x $2> undef, <2 x i32> <i32 2, i32 3>
|
||||
%$1_1b = shufflevector <8 x $2> $5, <8 x $2> undef, <2 x i32> <i32 2, i32 3>
|
||||
%v$1_1 = call <2 x $2> $3(<2 x $2> %$1_1a, <2 x $2> %$1_1b)
|
||||
%$1_2a = shufflevector <8 x $2> $4, <8 x $2> undef, <2 x i32> <i32 4, i32 5>
|
||||
%$1_2b = shufflevector <8 x $2> $5, <8 x $2> undef, <2 x i32> <i32 4, i32 5>
|
||||
%v$1_2 = call <2 x $2> $3(<2 x $2> %$1_2a, <2 x $2> %$1_2b)
|
||||
%$1_3a = shufflevector <8 x $2> $4, <8 x $2> undef, <2 x i32> <i32 6, i32 7>
|
||||
%$1_3b = shufflevector <8 x $2> $5, <8 x $2> undef, <2 x i32> <i32 6, i32 7>
|
||||
%v$1_3 = call <2 x $2> $3(<2 x $2> %$1_3a, <2 x $2> %$1_3b)
|
||||
|
||||
%$1a = shufflevector <2 x $2> %v$1_0, <2 x $2> %v$1_1,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%$1b = shufflevector <2 x $2> %v$1_2, <2 x $2> %v$1_3,
|
||||
<4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%$1 = shufflevector <4 x $2> %$1a, <4 x $2> %$1b,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
'
|
||||
)
|
||||
|
||||
;; The unary SSE round intrinsic takes a second argument that encodes the
|
||||
;; rounding mode. This macro makes it easier to apply the 4-wide roundps
|
||||
;; to 8-wide vector operands
|
||||
;; $1: value to be rounded
|
||||
;; $2: integer encoding of rounding mode
|
||||
;; FIXME: this just has a ret statement at the end to return the result,
|
||||
;; which is inconsistent with the macros above
|
||||
|
||||
define(`round4to8', `
|
||||
%v0 = shufflevector <8 x float> $1, <8 x float> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||
%v1 = shufflevector <8 x float> $1, <8 x float> undef, <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||
%r0 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %v0, i32 $2)
|
||||
%r1 = call <4 x float> @llvm.x86.sse41.round.ps(<4 x float> %v1, i32 $2)
|
||||
%ret = shufflevector <4 x float> %r0, <4 x float> %r1,
|
||||
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
|
||||
ret <8 x float> %ret
|
||||
'
|
||||
)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; stdlib_core
|
||||
;;
|
||||
;; This macro defines a bunch of helper routines that only depend on the
|
||||
;; target's vector width, which it takes as its first parameter.
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
define(`stdlib_core', `
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; vector ops
|
||||
|
||||
define internal float @__extract(<$1 x float>, i32) nounwind readnone alwaysinline {
|
||||
%extract = extractelement <$1 x float> %0, i32 %1
|
||||
ret float %extract
|
||||
}
|
||||
|
||||
define internal <$1 x float> @__insert(<$1 x float>, i32,
|
||||
float) nounwind readnone alwaysinline {
|
||||
%insert = insertelement <$1 x float> %0, float %2, i32 %1
|
||||
ret <$1 x float> %insert
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; various bitcasts from one type to another
|
||||
|
||||
define internal <$1 x i32> @__intbits_varying_float(<$1 x float>) nounwind readnone alwaysinline {
|
||||
%float_to_int_bitcast = bitcast <$1 x float> %0 to <$1 x i32>
|
||||
ret <$1 x i32> %float_to_int_bitcast
|
||||
}
|
||||
|
||||
define internal i32 @__intbits_uniform_float(float) nounwind readnone alwaysinline {
|
||||
%float_to_int_bitcast = bitcast float %0 to i32
|
||||
ret i32 %float_to_int_bitcast
|
||||
}
|
||||
|
||||
define internal <$1 x i64> @__intbits_varying_double(<$1 x double>) nounwind readnone alwaysinline {
|
||||
%double_to_int_bitcast = bitcast <$1 x double> %0 to <$1 x i64>
|
||||
ret <$1 x i64> %double_to_int_bitcast
|
||||
}
|
||||
|
||||
define internal i64 @__intbits_uniform_double(double) nounwind readnone alwaysinline {
|
||||
%double_to_int_bitcast = bitcast double %0 to i64
|
||||
ret i64 %double_to_int_bitcast
|
||||
}
|
||||
|
||||
define internal <$1 x float> @__floatbits_varying_int32(<$1 x i32>) nounwind readnone alwaysinline {
|
||||
%int_to_float_bitcast = bitcast <$1 x i32> %0 to <$1 x float>
|
||||
ret <$1 x float> %int_to_float_bitcast
|
||||
}
|
||||
|
||||
define internal float @__floatbits_uniform_int32(i32) nounwind readnone alwaysinline {
|
||||
%int_to_float_bitcast = bitcast i32 %0 to float
|
||||
ret float %int_to_float_bitcast
|
||||
}
|
||||
|
||||
define internal <$1 x double> @__doublebits_varying_int64(<$1 x i64>) nounwind readnone alwaysinline {
|
||||
%int_to_double_bitcast = bitcast <$1 x i64> %0 to <$1 x double>
|
||||
ret <$1 x double> %int_to_double_bitcast
|
||||
}
|
||||
|
||||
define internal double @__doublebits_uniform_int64(i64) nounwind readnone alwaysinline {
|
||||
%int_to_double_bitcast = bitcast i64 %0 to double
|
||||
ret double %int_to_double_bitcast
|
||||
}
|
||||
|
||||
define internal <$1 x float> @__undef_varying() nounwind readnone alwaysinline {
|
||||
ret <$1 x float> undef
|
||||
}
|
||||
|
||||
define internal float @__undef_uniform() nounwind readnone alwaysinline {
|
||||
ret float undef
|
||||
}
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; stdlib transcendentals
|
||||
;;
|
||||
;; These functions provide entrypoints that call out to the libm
|
||||
;; implementations of the transcendental functions
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
|
||||
declare float @sinf(float) nounwind readnone
|
||||
declare float @cosf(float) nounwind readnone
|
||||
declare void @sincosf(float, float *, float *) nounwind readnone
|
||||
declare float @tanf(float) nounwind readnone
|
||||
declare float @atanf(float) nounwind readnone
|
||||
declare float @atan2f(float, float) nounwind readnone
|
||||
declare float @expf(float) nounwind readnone
|
||||
declare float @logf(float) nounwind readnone
|
||||
declare float @powf(float, float) nounwind readnone
|
||||
|
||||
define internal float @__stdlib_sin(float) nounwind readnone alwaysinline {
|
||||
%r = call float @sinf(float %0)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define internal float @__stdlib_cos(float) nounwind readnone alwaysinline {
|
||||
%r = call float @cosf(float %0)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define internal void @__stdlib_sincos(float, float *, float *) nounwind readnone alwaysinline {
|
||||
call void @sincosf(float %0, float *%1, float *%2)
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal float @__stdlib_tan(float) nounwind readnone alwaysinline {
|
||||
%r = call float @tanf(float %0)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define internal float @__stdlib_atan(float) nounwind readnone alwaysinline {
|
||||
%r = call float @atanf(float %0)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define internal float @__stdlib_atan2(float, float) nounwind readnone alwaysinline {
|
||||
%r = call float @atan2f(float %0, float %1)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define internal float @__stdlib_log(float) nounwind readnone alwaysinline {
|
||||
%r = call float @logf(float %0)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define internal float @__stdlib_exp(float) nounwind readnone alwaysinline {
|
||||
%r = call float @expf(float %0)
|
||||
ret float %r
|
||||
}
|
||||
|
||||
define internal float @__stdlib_pow(float, float) nounwind readnone alwaysinline {
|
||||
%r = call float @powf(float %0, float %1)
|
||||
ret float %r
|
||||
}
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; Definitions of 8 and 16-bit load and store functions
|
||||
;;
|
||||
;; The `int8_16' macro defines functions related to loading and storing 8 and
|
||||
;; 16-bit values in memory, converting to and from i32. (This is a workaround
|
||||
;; to be able to use in-memory values of types in ispc programs, since the
|
||||
;; compiler doesn't yet support 8 and 16-bit datatypes...
|
||||
;;
|
||||
;; Arguments to pass to `int8_16':
|
||||
;; $1: vector width of the target
|
||||
|
||||
define(`int8_16', `
|
||||
define internal <$1 x i32> @__load_uint8([0 x i32] *, i32 %offset) nounwind alwaysinline {
|
||||
%ptr8 = bitcast [0 x i32] *%0 to i8 *
|
||||
%ptr = getelementptr i8 * %ptr8, i32 %offset
|
||||
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
|
||||
%val = load i`'eval(8*$1) * %ptr64, align 1
|
||||
|
||||
%vval = bitcast i`'eval(8*$1) %val to <$1 x i8>
|
||||
; were assuming unsigned, so zero-extend to i32...
|
||||
%ret = zext <$1 x i8> %vval to <$1 x i32>
|
||||
ret <$1 x i32> %ret
|
||||
}
|
||||
|
||||
|
||||
define internal <$1 x i32> @__load_uint16([0 x i32] *, i32 %offset) nounwind alwaysinline {
|
||||
%ptr16 = bitcast [0 x i32] *%0 to i16 *
|
||||
%ptr = getelementptr i16 * %ptr16, i32 %offset
|
||||
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
||||
%val = load i`'eval(16*$1) * %ptr64, align 1
|
||||
|
||||
%vval = bitcast i`'eval(16*$1) %val to <$1 x i16>
|
||||
; unsigned, so use zero-extent...
|
||||
%ret = zext <$1 x i16> %vval to <$1 x i32>
|
||||
ret <$1 x i32> %ret
|
||||
}
|
||||
|
||||
define internal void @__store_uint8([0 x i32] *, i32 %offset, <$1 x i32> %val32,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%val = trunc <$1 x i32> %val32 to <$1 x i8>
|
||||
%val64 = bitcast <$1 x i8> %val to i`'eval(8*$1)
|
||||
|
||||
%mask8 = trunc <$1 x i32> %mask to <$1 x i8>
|
||||
%mask64 = bitcast <$1 x i8> %mask8 to i`'eval(8*$1)
|
||||
%notmask = xor i`'eval(8*$1) %mask64, -1
|
||||
|
||||
%ptr8 = bitcast [0 x i32] *%0 to i8 *
|
||||
%ptr = getelementptr i8 * %ptr8, i32 %offset
|
||||
%ptr64 = bitcast i8 * %ptr to i`'eval(8*$1) *
|
||||
|
||||
;; load the old value, use logical ops to blend based on the mask, then
|
||||
;; store the result back
|
||||
%old = load i`'eval(8*$1) * %ptr64, align 1
|
||||
%oldmasked = and i`'eval(8*$1) %old, %notmask
|
||||
%newmasked = and i`'eval(8*$1) %val64, %mask64
|
||||
%final = or i`'eval(8*$1) %oldmasked, %newmasked
|
||||
store i`'eval(8*$1) %final, i`'eval(8*$1) * %ptr64
|
||||
|
||||
ret void
|
||||
}
|
||||
|
||||
define internal void @__store_uint16([0 x i32] *, i32 %offset, <$1 x i32> %val32,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
%val = trunc <$1 x i32> %val32 to <$1 x i16>
|
||||
%val64 = bitcast <$1 x i16> %val to i`'eval(16*$1)
|
||||
|
||||
%mask8 = trunc <$1 x i32> %mask to <$1 x i16>
|
||||
%mask64 = bitcast <$1 x i16> %mask8 to i`'eval(16*$1)
|
||||
%notmask = xor i`'eval(16*$1) %mask64, -1
|
||||
|
||||
%ptr16 = bitcast [0 x i32] *%0 to i16 *
|
||||
%ptr = getelementptr i16 * %ptr16, i32 %offset
|
||||
%ptr64 = bitcast i16 * %ptr to i`'eval(16*$1) *
|
||||
|
||||
;; as above, use mask to do blending with logical ops...
|
||||
%old = load i`'eval(16*$1) * %ptr64, align 1
|
||||
%oldmasked = and i`'eval(16*$1) %old, %notmask
|
||||
%newmasked = and i`'eval(16*$1) %val64, %mask64
|
||||
%final = or i`'eval(16*$1) %oldmasked, %newmasked
|
||||
store i`'eval(16*$1) %final, i`'eval(16*$1) * %ptr64
|
||||
|
||||
ret void
|
||||
}
|
||||
'
|
||||
)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; packed load and store functions
|
||||
;;
|
||||
;; These define functions to emulate those nice packed load and packed store
|
||||
;; instructions. For packed store, given a pointer to destination array and
|
||||
;; an offset into the array, for each lane where the mask is on, the
|
||||
;; corresponding value for that lane is stored into packed locations in the
|
||||
;; destination array. For packed load, each lane that has an active mask
|
||||
;; loads a sequential value from the array.
|
||||
;;
|
||||
;; $1: vector width of the target
|
||||
;;
|
||||
;; FIXME: use the per_lane macro, defined below, to implement these!
|
||||
|
||||
define(`packed_load_and_store', `
|
||||
declare i1 @__is_compile_time_constant_mask(<$1 x i32> %mask)
|
||||
|
||||
define i32 @__packed_load_active([0 x i32] *, i32 %start_offset, <$1 x i32> * %val_ptr,
|
||||
<$1 x i32> %full_mask) nounwind alwaysinline {
|
||||
entry:
|
||||
%mask = call i32 @__movmsk(<$1 x i32> %full_mask)
|
||||
%baseptr = bitcast [0 x i32] * %0 to i32 *
|
||||
%startptr = getelementptr i32 * %baseptr, i32 %start_offset
|
||||
%mask_known = call i1 @__is_compile_time_constant_mask(<$1 x i32> %full_mask)
|
||||
br i1 %mask_known, label %known_mask, label %unknown_mask
|
||||
|
||||
known_mask:
|
||||
%allon = icmp eq i32 %mask, eval((1 << $1) -1)
|
||||
br i1 %allon, label %all_on, label %not_all_on
|
||||
|
||||
all_on:
|
||||
;; everyone wants to load, so just load an entire vector width in a single
|
||||
;; vector load
|
||||
%vecptr = bitcast i32 *%startptr to <$1 x i32> *
|
||||
%vec_load = load <$1 x i32> *%vecptr, align 4
|
||||
store <$1 x i32> %vec_load, <$1 x i32> * %val_ptr
|
||||
ret i32 $1
|
||||
|
||||
not_all_on:
|
||||
%alloff = icmp eq i32 %mask, 0
|
||||
br i1 %alloff, label %all_off, label %unknown_mask
|
||||
|
||||
all_off:
|
||||
;; no one wants to load
|
||||
ret i32 0
|
||||
|
||||
unknown_mask:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%lane = phi i32 [ 0, %unknown_mask ], [ %nextlane, %loopend ]
|
||||
%lanemask = phi i32 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ]
|
||||
%offset = phi i32 [ 0, %unknown_mask ], [ %nextoffset, %loopend ]
|
||||
|
||||
; is the current lane on?
|
||||
%and = and i32 %mask, %lanemask
|
||||
%do_load = icmp eq i32 %and, %lanemask
|
||||
br i1 %do_load, label %load, label %loopend
|
||||
|
||||
load:
|
||||
%loadptr = getelementptr i32 *%startptr, i32 %offset
|
||||
%loadval = load i32 *%loadptr
|
||||
%val_ptr_i32 = bitcast <$1 x i32> * %val_ptr to i32 *
|
||||
%storeptr = getelementptr i32 *%val_ptr_i32, i32 %lane
|
||||
store i32 %loadval, i32 *%storeptr
|
||||
%offset1 = add i32 %offset, 1
|
||||
br label %loopend
|
||||
|
||||
loopend:
|
||||
%nextoffset = phi i32 [ %offset1, %load ], [ %offset, %loop ]
|
||||
%nextlane = add i32 %lane, 1
|
||||
%nextlanemask = mul i32 %lanemask, 2
|
||||
|
||||
; are we done yet?
|
||||
%test = icmp ne i32 %nextlane, $1
|
||||
br i1 %test, label %loop, label %done
|
||||
|
||||
done:
|
||||
ret i32 %nextoffset
|
||||
}
|
||||
|
||||
define i32 @__packed_store_active([0 x i32] *, i32 %start_offset, <$1 x i32> %vals,
|
||||
<$1 x i32> %full_mask) nounwind alwaysinline {
|
||||
entry:
|
||||
%mask = call i32 @__movmsk(<$1 x i32> %full_mask)
|
||||
%baseptr = bitcast [0 x i32] * %0 to i32 *
|
||||
%startptr = getelementptr i32 * %baseptr, i32 %start_offset
|
||||
%mask_known = call i1 @__is_compile_time_constant_mask(<$1 x i32> %full_mask)
|
||||
br i1 %mask_known, label %known_mask, label %unknown_mask
|
||||
|
||||
known_mask:
|
||||
%allon = icmp eq i32 %mask, eval((1 << $1) -1)
|
||||
br i1 %allon, label %all_on, label %not_all_on
|
||||
|
||||
all_on:
|
||||
%vecptr = bitcast i32 *%startptr to <$1 x i32> *
|
||||
store <$1 x i32> %vals, <$1 x i32> * %vecptr, align 4
|
||||
ret i32 $1
|
||||
|
||||
not_all_on:
|
||||
%alloff = icmp eq i32 %mask, 0
|
||||
br i1 %alloff, label %all_off, label %unknown_mask
|
||||
|
||||
all_off:
|
||||
ret i32 0
|
||||
|
||||
unknown_mask:
|
||||
br label %loop
|
||||
|
||||
loop:
|
||||
%lane = phi i32 [ 0, %unknown_mask ], [ %nextlane, %loopend ]
|
||||
%lanemask = phi i32 [ 1, %unknown_mask ], [ %nextlanemask, %loopend ]
|
||||
%offset = phi i32 [ 0, %unknown_mask ], [ %nextoffset, %loopend ]
|
||||
|
||||
; is the current lane on?
|
||||
%and = and i32 %mask, %lanemask
|
||||
%do_store = icmp eq i32 %and, %lanemask
|
||||
br i1 %do_store, label %store, label %loopend
|
||||
|
||||
store:
|
||||
%storeval = extractelement <$1 x i32> %vals, i32 %lane
|
||||
%storeptr = getelementptr i32 *%startptr, i32 %offset
|
||||
store i32 %storeval, i32 *%storeptr
|
||||
%offset1 = add i32 %offset, 1
|
||||
br label %loopend
|
||||
|
||||
loopend:
|
||||
%nextoffset = phi i32 [ %offset1, %store ], [ %offset, %loop ]
|
||||
%nextlane = add i32 %lane, 1
|
||||
%nextlanemask = mul i32 %lanemask, 2
|
||||
|
||||
; are we done yet?
|
||||
%test = icmp ne i32 %nextlane, $1
|
||||
br i1 %test, label %loop, label %done
|
||||
|
||||
done:
|
||||
ret i32 %nextoffset
|
||||
}
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; per_lane
|
||||
;;
|
||||
;; The scary macro below encapsulates the 'scalarization' idiom--i.e. we have
|
||||
;; some operation that we'd like to perform only for the lanes where the
|
||||
;; mask is on
|
||||
;; $1: vector width of the target
|
||||
;; $2: variable that holds the mask
|
||||
;; $3: block of code to run for each lane that is on
|
||||
;; Inside this code, any instances of the text "LANE" are replaced
|
||||
;; with an i32 value that represents the current lane number
|
||||
|
||||
divert(`-1')
|
||||
# forloop(var, from, to, stmt) - improved version:
|
||||
# works even if VAR is not a strict macro name
|
||||
# performs sanity check that FROM is larger than TO
|
||||
# allows complex numerical expressions in TO and FROM
|
||||
define(`forloop', `ifelse(eval(`($3) >= ($2)'), `1',
|
||||
`pushdef(`$1', eval(`$2'))_$0(`$1',
|
||||
eval(`$3'), `$4')popdef(`$1')')')
|
||||
define(`_forloop',
|
||||
`$3`'ifelse(indir(`$1'), `$2', `',
|
||||
`define(`$1', incr(indir(`$1')))$0($@)')')
|
||||
divert`'dnl
|
||||
|
||||
; num lanes, mask, code block to do per lane
|
||||
define(`per_lane', `
|
||||
br label %pl_entry
|
||||
|
||||
pl_entry:
|
||||
%pl_mask = call i32 @__movmsk($2)
|
||||
%pl_mask_known = call i1 @__is_compile_time_constant_mask($2)
|
||||
br i1 %pl_mask_known, label %pl_known_mask, label %pl_unknown_mask
|
||||
|
||||
pl_known_mask:
|
||||
;; the mask is known at compile time; see if it is something we can
|
||||
;; handle more efficiently
|
||||
%pl_is_allon = icmp eq i32 %pl_mask, eval((1<<$1)-1)
|
||||
br i1 %pl_is_allon, label %pl_all_on, label %pl_not_all_on
|
||||
|
||||
pl_all_on:
|
||||
;; the mask is all on--just expand the code for each lane sequentially
|
||||
forloop(i, 0, eval($1-1),
|
||||
`patsubst(`$3', `ID\|LANE', i)')
|
||||
br label %pl_done
|
||||
|
||||
pl_not_all_on:
|
||||
;; not all on--see if it is all off or mixed
|
||||
;; for the mixed case, we just run the general case, though we could
|
||||
;; try to be smart and just emit the code based on what it actually is,
|
||||
;; for example by emitting the code straight-line without a loop and doing
|
||||
;; the lane tests explicitly, leaving later optimization passes to eliminate
|
||||
;; the stuff that is definitely not needed. Not clear if we will frequently
|
||||
;; encounter a mask that is known at compile-time but is not either all on or
|
||||
;; all off...
|
||||
%pl_alloff = icmp eq i32 %pl_mask, 0
|
||||
br i1 %pl_alloff, label %pl_done, label %pl_unknown_mask
|
||||
|
||||
pl_unknown_mask:
|
||||
br label %pl_loop
|
||||
|
||||
pl_loop:
|
||||
;; Loop over each lane and see if we want to do the work for this lane
|
||||
%pl_lane = phi i32 [ 0, %pl_unknown_mask ], [ %pl_nextlane, %pl_loopend ]
|
||||
%pl_lanemask = phi i32 [ 1, %pl_unknown_mask ], [ %pl_nextlanemask, %pl_loopend ]
|
||||
|
||||
; is the current lane on? if so, goto do work, otherwise to end of loop
|
||||
%pl_and = and i32 %pl_mask, %pl_lanemask
|
||||
%pl_doit = icmp eq i32 %pl_and, %pl_lanemask
|
||||
br i1 %pl_doit, label %pl_dolane, label %pl_loopend
|
||||
|
||||
pl_dolane:
|
||||
;; If so, substitute in the code from the caller and replace the LANE
|
||||
;; stuff with the current lane number
|
||||
patsubst(`patsubst(`$3', `LANE_ID', `_id')', `LANE', `%pl_lane')
|
||||
br label %pl_loopend
|
||||
|
||||
pl_loopend:
|
||||
%pl_nextlane = add i32 %pl_lane, 1
|
||||
%pl_nextlanemask = mul i32 %pl_lanemask, 2
|
||||
|
||||
; are we done yet?
|
||||
%pl_test = icmp ne i32 %pl_nextlane, $1
|
||||
br i1 %pl_test, label %pl_loop, label %pl_done
|
||||
|
||||
pl_done:
|
||||
')
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
;;
|
||||
;; $1: vector width of the target
|
||||
;; $2: scalar type for which to generate functions to do gathers
|
||||
|
||||
; vec width, type
|
||||
define(`gen_gather', `
|
||||
;; Define the utility function to do the gather operation for a single element
|
||||
;; of the type
|
||||
define internal <$1 x $2> @__gather_elt_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %ret,
|
||||
i32 %lane) nounwind readonly alwaysinline {
|
||||
; compute address for this one from the base
|
||||
%offset32 = extractelement <$1 x i32> %offsets, i32 %lane
|
||||
%offset64 = zext i32 %offset32 to i64
|
||||
%ptrdelta = add i64 %ptr64, %offset64
|
||||
%ptr = inttoptr i64 %ptrdelta to $2 *
|
||||
|
||||
; load value and insert into returned value
|
||||
%val = load $2 *%ptr
|
||||
%updatedret = insertelement <$1 x $2> %ret, $2 %val, i32 %lane
|
||||
ret <$1 x $2> %updatedret
|
||||
}
|
||||
|
||||
|
||||
define <$1 x $2> @__gather_base_offsets_$2(i8*, <$1 x i32> %offsets,
|
||||
<$1 x i32> %vecmask) nounwind readonly alwaysinline {
|
||||
entry:
|
||||
%mask = call i32 @__movmsk(<$1 x i32> %vecmask)
|
||||
%ptr64 = ptrtoint i8 * %0 to i64
|
||||
|
||||
%maskKnown = call i1 @__is_compile_time_constant_mask(<$1 x i32> %vecmask)
|
||||
br i1 %maskKnown, label %known_mask, label %unknown_mask
|
||||
|
||||
known_mask:
|
||||
%alloff = icmp eq i32 %mask, 0
|
||||
br i1 %alloff, label %gather_all_off, label %unknown_mask
|
||||
|
||||
gather_all_off:
|
||||
ret <$1 x $2> undef
|
||||
|
||||
unknown_mask:
|
||||
; We can be clever and avoid the per-lane stuff for gathers if we are willing
|
||||
; to require that the 0th element of the array being gathered from is always
|
||||
; legal to read from (and we do indeed require that, given the benefits!)
|
||||
;
|
||||
; Set the offset to zero for lanes that are off
|
||||
%offsetsPtr = alloca <$1 x i32>
|
||||
store <$1 x i32> zeroinitializer, <$1 x i32> * %offsetsPtr
|
||||
call void @__masked_store_blend_32(<$1 x i32> * %offsetsPtr, <$1 x i32> %offsets,
|
||||
<$1 x i32> %vecmask)
|
||||
%newOffsets = load <$1 x i32> * %offsetsPtr
|
||||
|
||||
%ret0 = call <$1 x $2> @__gather_elt_$2(i64 %ptr64, <$1 x i32> %newOffsets,
|
||||
<$1 x $2> undef, i32 0)
|
||||
forloop(lane, 1, eval($1-1),
|
||||
`patsubst(patsubst(`%retLANE = call <$1 x $2> @__gather_elt_$2(i64 %ptr64,
|
||||
<$1 x i32> %newOffsets, <$1 x $2> %retPREV, i32 LANE)
|
||||
', `LANE', lane), `PREV', eval(lane-1))')
|
||||
ret <$1 x $2> %ret`'eval($1-1)
|
||||
}
|
||||
'
|
||||
)
|
||||
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gen_scatter
|
||||
;; Emit a function declaration for a scalarized scatter.
|
||||
;;
|
||||
;; $1: target vector width
|
||||
;; $2: scalar type for which we want to generate code to scatter
|
||||
|
||||
define(`gen_scatter', `
|
||||
;; Define the function that descripes the work to do to scatter a single
|
||||
;; value
|
||||
define internal void @__scatter_elt_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values,
|
||||
i32 %lane) nounwind alwaysinline {
|
||||
%offset32 = extractelement <$1 x i32> %offsets, i32 %lane
|
||||
%offset64 = zext i32 %offset32 to i64
|
||||
%ptrdelta = add i64 %ptr64, %offset64
|
||||
%ptr = inttoptr i64 %ptrdelta to $2 *
|
||||
%storeval = extractelement <$1 x $2> %values, i32 %lane
|
||||
store $2 %storeval, $2 * %ptr
|
||||
ret void
|
||||
}
|
||||
|
||||
define void @__scatter_base_offsets_$2(i8* %base, <$1 x i32> %offsets, <$1 x $2> %values,
|
||||
<$1 x i32> %mask) nounwind alwaysinline {
|
||||
;; And use the `per_lane' macro to do all of the per-lane work for scatter...
|
||||
%ptr64 = ptrtoint i8 * %base to i64
|
||||
per_lane($1, <$1 x i32> %mask, `
|
||||
call void @__scatter_elt_$2(i64 %ptr64, <$1 x i32> %offsets, <$1 x $2> %values, i32 LANE)')
|
||||
ret void
|
||||
}
|
||||
'
|
||||
)
|
||||
11
stdlib2cpp.py
Executable file
11
stdlib2cpp.py
Executable file
@@ -0,0 +1,11 @@
|
||||
#!/usr/bin/python
|
||||
|
||||
import sys
|
||||
|
||||
print "const char *stdlib_code = "
|
||||
for line in sys.stdin:
|
||||
l=line.rstrip()
|
||||
l=l.replace('"', '\\"')
|
||||
print "\"" + l + "\\n\""
|
||||
|
||||
print ";"
|
||||
302
stmt.h
Normal file
302
stmt.h
Normal file
@@ -0,0 +1,302 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file stmt.h
|
||||
@brief File with declarations for classes related to statements in the language
|
||||
*/
|
||||
|
||||
#ifndef ISPC_STMT_H
|
||||
#define ISPC_STMT_H 1
|
||||
|
||||
#include "ispc.h"
|
||||
|
||||
/** @brief Interface class for statements in the ispc language.
|
||||
|
||||
This abstract base-class encapsulates methods that AST nodes for
|
||||
statements in the language must implement.
|
||||
*/
|
||||
class Stmt : public ASTNode {
|
||||
public:
|
||||
Stmt(SourcePos p) : ASTNode(p) { }
|
||||
|
||||
/** Emit LLVM IR for the statement, using the FunctionEmitContext to create the
|
||||
necessary instructions.
|
||||
*/
|
||||
virtual void EmitCode(FunctionEmitContext *ctx) const = 0;
|
||||
|
||||
/** Print a representation of the statement (and any children AST
|
||||
nodes) to standard output. This method is used for debuggins. */
|
||||
virtual void Print(int indent) const = 0;
|
||||
|
||||
// Redeclare these methods with Stmt * return values, rather than
|
||||
// ASTNode *s, as in the original ASTNode declarations of them.
|
||||
virtual Stmt *Optimize() = 0;
|
||||
virtual Stmt *TypeCheck() = 0;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Statement representing a single expression */
|
||||
class ExprStmt : public Stmt {
|
||||
public:
|
||||
ExprStmt(Expr *expr, SourcePos pos);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
|
||||
private:
|
||||
Expr *expr;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Statement representing a single declaration (which in turn may declare
|
||||
a number of variables. */
|
||||
class DeclStmt : public Stmt {
|
||||
public:
|
||||
DeclStmt(SourcePos pos, Declaration *declaration, SymbolTable *symbolTable);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
|
||||
private:
|
||||
Declaration *declaration;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Statement representing a single if statement, possibly with an
|
||||
else clause. */
|
||||
class IfStmt : public Stmt {
|
||||
public:
|
||||
IfStmt(Expr *testExpr, Stmt *trueStmts, Stmt *falseStmts,
|
||||
bool doCoherentCheck, SourcePos pos);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
|
||||
// @todo these are only public for lHasVaryingBreakOrContinue(); would
|
||||
// be nice to clean that up...
|
||||
/** Expression giving the 'if' test. */
|
||||
Expr *test;
|
||||
/** Statements to run if the 'if' test returns a true value */
|
||||
Stmt *trueStmts;
|
||||
/** Statements to run if the 'if' test returns a false value */
|
||||
Stmt *falseStmts;
|
||||
|
||||
private:
|
||||
/** This value records if this was a 'coherent' if statement in the
|
||||
source and thus, if the emitted code should check to see if all
|
||||
active program instances want to follow just one of the 'true' or
|
||||
'false' blocks. */
|
||||
const bool doCoherentCheck;
|
||||
|
||||
void emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
llvm::Value *test) const;
|
||||
void emitCoherentTests(FunctionEmitContext *ctx, llvm::Value *test) const;
|
||||
void emitMaskAllOn(FunctionEmitContext *ctx,
|
||||
llvm::Value *test, llvm::BasicBlock *bDone) const;
|
||||
void emitMaskMixed(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
||||
llvm::Value *test, llvm::BasicBlock *bDone) const;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Statement implementation representing a 'do' statement in the
|
||||
program.
|
||||
*/
|
||||
class DoStmt : public Stmt {
|
||||
public:
|
||||
DoStmt(Expr *testExpr, Stmt *bodyStmts, bool doCoherentCheck,
|
||||
SourcePos pos);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
|
||||
private:
|
||||
Expr *testExpr;
|
||||
Stmt *bodyStmts;
|
||||
const bool doCoherentCheck;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Statement implementation for 'for' loops (as well as for 'while'
|
||||
loops).
|
||||
*/
|
||||
class ForStmt : public Stmt {
|
||||
public:
|
||||
ForStmt(Stmt *initializer, Expr *testExpr, Stmt *stepStatements,
|
||||
Stmt *bodyStatements, bool doCoherentCheck, SourcePos pos);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
|
||||
private:
|
||||
/** 'for' statment initializer; may be NULL, indicating no intitializer */
|
||||
Stmt *init;
|
||||
/** expression that returns a value indicating whether the loop should
|
||||
continue for the next iteration */
|
||||
Expr *test;
|
||||
/** Statements to run at the end of the loop for the loop step, before
|
||||
the test expression is evaluated. */
|
||||
Stmt *step;
|
||||
/** Loop body statements */
|
||||
Stmt *stmts;
|
||||
const bool doCoherentCheck;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Statement implementation for a break or 'coherent' break
|
||||
statement in the program. */
|
||||
class BreakStmt : public Stmt {
|
||||
public:
|
||||
BreakStmt(bool doCoherenceCheck, SourcePos pos);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
|
||||
private:
|
||||
/** This indicates whether the generated code will check to see if no
|
||||
more program instances are currently running after the break, in
|
||||
which case the code can have a jump to the end of the current
|
||||
loop. */
|
||||
const bool doCoherenceCheck;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Statement implementation for a continue or 'coherent' continue
|
||||
statement in the program. */
|
||||
class ContinueStmt : public Stmt {
|
||||
public:
|
||||
ContinueStmt(bool doCoherenceCheck, SourcePos pos);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
|
||||
private:
|
||||
/** This indicates whether the generated code will check to see if no
|
||||
more program instances are currently running after the continue, in
|
||||
which case the code can have a jump to the end of the current
|
||||
loop. */
|
||||
const bool doCoherenceCheck;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Statement implementation for a 'return' or 'coherent' return
|
||||
statement in the program. */
|
||||
class ReturnStmt : public Stmt {
|
||||
public:
|
||||
ReturnStmt(Expr *v, bool cc, SourcePos p);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
|
||||
private:
|
||||
Expr *val;
|
||||
/** This indicates whether the generated code will check to see if no
|
||||
more program instances are currently running after the return, in
|
||||
which case the code can possibly jump to the end of the current
|
||||
function. */
|
||||
const bool doCoherenceCheck;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Representation of a list of statements in the program.
|
||||
*/
|
||||
class StmtList : public Stmt {
|
||||
public:
|
||||
StmtList(SourcePos p) : Stmt(p) { }
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
|
||||
void Add(Stmt *s) { if (s) stmts.push_back(s); }
|
||||
const std::vector<Stmt *> &GetStatements() { return stmts; }
|
||||
|
||||
private:
|
||||
std::vector<Stmt *> stmts;
|
||||
};
|
||||
|
||||
|
||||
/** @brief Representation of a print() statement in the program.
|
||||
|
||||
It's currently necessary to have a special statement type for print()
|
||||
since strings aren't supported as first-class types in the language,
|
||||
but we need to be able to pass a formatting string as the first
|
||||
argument to print(). We also need this to be a variable argument
|
||||
function, which also isn't supported. Representing print() as a
|
||||
statement lets us work around both of those ugly little issues...
|
||||
*/
|
||||
class PrintStmt : public Stmt {
|
||||
public:
|
||||
PrintStmt(const std::string &f, Expr *v, SourcePos p);
|
||||
|
||||
void EmitCode(FunctionEmitContext *ctx) const;
|
||||
void Print(int indent) const;
|
||||
|
||||
Stmt *Optimize();
|
||||
Stmt *TypeCheck();
|
||||
|
||||
private:
|
||||
/** Format string for the print() statement. */
|
||||
const std::string format;
|
||||
/** This holds the arguments passed to the print() statement. If more
|
||||
than one was provided, this will be an ExprList. */
|
||||
Expr *values;
|
||||
};
|
||||
|
||||
|
||||
#endif // ISPC_STMT_H
|
||||
326
sym.cpp
Normal file
326
sym.cpp
Normal file
@@ -0,0 +1,326 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file sym.cpp
|
||||
@brief file with definitions for symbol and symbol table classes.
|
||||
*/
|
||||
|
||||
#include "sym.h"
|
||||
#include "type.h"
|
||||
#include "util.h"
|
||||
#include <stdio.h>
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// Symbol
|
||||
|
||||
Symbol::Symbol(const std::string &n, SourcePos p, const Type *t)
|
||||
: pos(p), name(n) {
|
||||
storagePtr = NULL;
|
||||
function = NULL;
|
||||
type = t;
|
||||
constValue = NULL;
|
||||
isStatic = false;
|
||||
varyingCFDepth = 0;
|
||||
}
|
||||
|
||||
|
||||
std::string
|
||||
Symbol::MangledName() const {
|
||||
return name + type->Mangle();
|
||||
}
|
||||
|
||||
///////////////////////////////////////////////////////////////////////////
|
||||
// SymbolTable
|
||||
|
||||
SymbolTable::SymbolTable() {
|
||||
PushScope();
|
||||
}
|
||||
|
||||
|
||||
SymbolTable::~SymbolTable() {
|
||||
// Otherwise we have mismatched push/pop scopes
|
||||
assert(variables.size() == 1 && types.size() == 1);
|
||||
PopScope();
|
||||
}
|
||||
|
||||
void
|
||||
SymbolTable::PushScope() {
|
||||
variables.push_back(new std::vector<Symbol *>);
|
||||
types.push_back(new TypeMapType);
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
SymbolTable::PopScope() {
|
||||
// FIXME: delete Symbols in variables vector<>...
|
||||
assert(variables.size() > 1);
|
||||
delete variables.back();
|
||||
variables.pop_back();
|
||||
assert(types.size() > 1);
|
||||
delete types.back();
|
||||
types.pop_back();
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
SymbolTable::AddVariable(Symbol *symbol) {
|
||||
assert(symbol != NULL);
|
||||
|
||||
// Check to see if a symbol of the same name has already been declared.
|
||||
for (int i = (int)variables.size() - 1; i >= 0; --i) {
|
||||
std::vector<Symbol *> &sv = *(variables[i]);
|
||||
for (int j = (int)sv.size() - 1; j >= 0; --j) {
|
||||
if (sv[j]->name == symbol->name) {
|
||||
if (i == (int)variables.size()-1) {
|
||||
// If a symbol of the same name was declared in the
|
||||
// same scope, it's an error.
|
||||
Error(symbol->pos, "Ignoring redeclaration of symbol \"%s\".",
|
||||
symbol->name.c_str());
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
// Otherwise it's just shadowing something else, which
|
||||
// is legal but dangerous..
|
||||
Warning(symbol->pos,
|
||||
"Symbol \"%s\" shadows symbol declared in outer scope.",
|
||||
symbol->name.c_str());
|
||||
variables.back()->push_back(symbol);
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// No matches, so go ahead and add it...
|
||||
variables.back()->push_back(symbol);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
Symbol *
|
||||
SymbolTable::LookupVariable(const char *name) {
|
||||
// Note that we iterate through the variables vectors backwards, sinec
|
||||
// we want to search from the innermost scope to the outermost, so that
|
||||
// we get the right symbol if we have multiple variables in different
|
||||
// scopes that shadow each other.
|
||||
std::vector<std::vector<Symbol *> *>::reverse_iterator liter = variables.rbegin();
|
||||
while (liter != variables.rend()) {
|
||||
std::vector<Symbol *> &sv = *(*liter);
|
||||
for (int i = (int)sv.size() - 1; i >= 0; --i)
|
||||
if (sv[i]->name == name)
|
||||
return sv[i];
|
||||
++liter;
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
SymbolTable::AddFunction(Symbol *symbol) {
|
||||
const FunctionType *ft = dynamic_cast<const FunctionType *>(symbol->type);
|
||||
assert(ft != NULL);
|
||||
if (LookupFunction(symbol->name.c_str(), ft) != NULL)
|
||||
// A function of the same name and type has already been added to
|
||||
// the symbol table
|
||||
return false;
|
||||
|
||||
functions[symbol->name].push_back(symbol);
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
std::vector<Symbol *> *
|
||||
SymbolTable::LookupFunction(const char *name) {
|
||||
if (functions.find(name) != functions.end())
|
||||
return &functions[name];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
Symbol *
|
||||
SymbolTable::LookupFunction(const char *name, const FunctionType *type) {
|
||||
if (functions.find(name) == functions.end())
|
||||
return NULL;
|
||||
|
||||
std::vector<Symbol *> &funcs = functions[name];
|
||||
for (unsigned int i = 0; i < funcs.size(); ++i)
|
||||
if (Type::Equal(funcs[i]->type, type))
|
||||
return funcs[i];
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
bool
|
||||
SymbolTable::AddType(const char *name, const Type *type, SourcePos pos) {
|
||||
// Like AddVariable(), we go backwards through the type maps, working
|
||||
// from innermost scope to outermost.
|
||||
for (int i = types.size()-1; i >= 0; --i) {
|
||||
TypeMapType &sm = *(types[i]);
|
||||
if (sm.find(name) != sm.end()) {
|
||||
if (i == (int)types.size() - 1) {
|
||||
Error(pos, "Ignoring redefinition of type \"%s\".", name);
|
||||
return false;
|
||||
}
|
||||
else {
|
||||
Warning(pos, "Type \"%s\" shadows type declared in outer scope.", name);
|
||||
TypeMapType &sm = *(types.back());
|
||||
sm[name] = type;
|
||||
return true;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
TypeMapType &sm = *(types.back());
|
||||
sm[name] = type;
|
||||
return true;
|
||||
}
|
||||
|
||||
|
||||
const Type *
|
||||
SymbolTable::LookupType(const char *name) const {
|
||||
// Again, search through the type maps backward to get scoping right.
|
||||
for (int i = types.size()-1; i >= 0; --i) {
|
||||
TypeMapType &sm = *(types[i]);
|
||||
if (sm.find(name) != sm.end())
|
||||
return sm[name];
|
||||
}
|
||||
return NULL;
|
||||
}
|
||||
|
||||
|
||||
std::vector<std::string>
|
||||
SymbolTable::ClosestVariableOrFunctionMatch(const char *str) const {
|
||||
// This is a little wasteful, but we'll look through all of the
|
||||
// variable and function symbols and compute the edit distance from the
|
||||
// given string to them. If the edit distance is under maxDelta, then
|
||||
// it goes in the entry of the matches[] array corresponding to its
|
||||
// edit distance.
|
||||
const int maxDelta = 2;
|
||||
std::vector<std::string> matches[maxDelta+1];
|
||||
|
||||
for (int i = 0; i < (int)variables.size(); ++i) {
|
||||
std::vector<Symbol *> &sv = *(variables[i]);
|
||||
for (int j = 0; j < (int)sv.size(); ++j) {
|
||||
int dist = StringEditDistance(str, sv[j]->name, maxDelta+1);
|
||||
if (dist <= maxDelta)
|
||||
matches[dist].push_back(sv[j]->name);
|
||||
}
|
||||
}
|
||||
|
||||
std::map<std::string, std::vector<Symbol *> >::const_iterator iter;
|
||||
for (iter = functions.begin(); iter != functions.end(); ++iter) {
|
||||
int dist = StringEditDistance(str, iter->first, maxDelta+1);
|
||||
if (dist <= maxDelta)
|
||||
matches[dist].push_back(iter->first);
|
||||
}
|
||||
|
||||
// Now, return the first entry of matches[] that is non-empty, if any.
|
||||
for (int i = 0; i <= maxDelta; ++i) {
|
||||
if (matches[i].size())
|
||||
return matches[i];
|
||||
}
|
||||
|
||||
// Otherwise, no joy.
|
||||
return std::vector<std::string>();
|
||||
}
|
||||
|
||||
|
||||
std::vector<std::string>
|
||||
SymbolTable::ClosestTypeMatch(const char *str) const {
|
||||
// This follows the same approach as ClosestVariableOrFunctionmatch()
|
||||
// above; compute all edit distances, keep the ones shorter than
|
||||
// maxDelta, return the first non-empty vector of one or more sets of
|
||||
// alternatives with minimal edit distance.
|
||||
const int maxDelta = 2;
|
||||
std::vector<std::string> matches[maxDelta+1];
|
||||
|
||||
for (unsigned int i = 0; i < types.size(); ++i) {
|
||||
TypeMapType::const_iterator iter;
|
||||
for (iter = types[i]->begin(); iter != types[i]->end(); ++iter) {
|
||||
int dist = StringEditDistance(str, iter->first, maxDelta+1);
|
||||
if (dist <= maxDelta)
|
||||
matches[dist].push_back(iter->first);
|
||||
}
|
||||
}
|
||||
|
||||
for (int i = 0; i <= maxDelta; ++i) {
|
||||
if (matches[i].size())
|
||||
return matches[i];
|
||||
}
|
||||
return std::vector<std::string>();
|
||||
}
|
||||
|
||||
|
||||
void
|
||||
SymbolTable::Print() {
|
||||
int depth = 0;
|
||||
fprintf(stderr, "Variables:\n----------------\n");
|
||||
std::vector<std::vector<Symbol *> *>::iterator liter = variables.begin();
|
||||
while (liter != variables.end()) {
|
||||
fprintf(stderr, "%*c", depth, ' ');
|
||||
std::vector<Symbol *>::iterator siter = (*liter)->begin();
|
||||
while (siter != (*liter)->end()) {
|
||||
fprintf(stderr, "%s [%s]", (*siter)->name.c_str(),
|
||||
(*siter)->type->GetString().c_str());
|
||||
++siter;
|
||||
}
|
||||
++liter;
|
||||
fprintf(stderr, "\n");
|
||||
depth += 4;
|
||||
}
|
||||
|
||||
fprintf(stderr, "Functions:\n----------------\n");
|
||||
std::map<std::string, std::vector<Symbol *> >::iterator fiter;
|
||||
fiter = functions.begin();
|
||||
while (fiter != functions.end()) {
|
||||
fprintf(stderr, "%s\n", fiter->first.c_str());
|
||||
std::vector<Symbol *> &syms = fiter->second;
|
||||
for (unsigned int i = 0; i < syms.size(); ++i)
|
||||
fprintf(stderr, " %s\n", syms[i]->type->GetString().c_str());
|
||||
++fiter;
|
||||
}
|
||||
|
||||
depth = 0;
|
||||
fprintf(stderr, "Named types:\n---------------\n");
|
||||
for (unsigned int i = 0; i < types.size(); ++i) {
|
||||
TypeMapType &sm = *types[i];
|
||||
TypeMapType::iterator siter = sm.begin();
|
||||
while (siter != sm.end()) {
|
||||
fprintf(stderr, "%*c", depth, ' ');
|
||||
fprintf(stderr, "%s -> %s\n", siter->first.c_str(),
|
||||
siter->second->GetString().c_str());
|
||||
++siter;
|
||||
}
|
||||
depth += 4;
|
||||
}
|
||||
}
|
||||
264
sym.h
Normal file
264
sym.h
Normal file
@@ -0,0 +1,264 @@
|
||||
/*
|
||||
Copyright (c) 2010-2011, Intel Corporation
|
||||
All rights reserved.
|
||||
|
||||
Redistribution and use in source and binary forms, with or without
|
||||
modification, are permitted provided that the following conditions are
|
||||
met:
|
||||
|
||||
* Redistributions of source code must retain the above copyright
|
||||
notice, this list of conditions and the following disclaimer.
|
||||
|
||||
* Redistributions in binary form must reproduce the above copyright
|
||||
notice, this list of conditions and the following disclaimer in the
|
||||
documentation and/or other materials provided with the distribution.
|
||||
|
||||
* Neither the name of Intel Corporation nor the names of its
|
||||
contributors may be used to endorse or promote products derived from
|
||||
this software without specific prior written permission.
|
||||
|
||||
|
||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||
IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||
TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||
PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||
OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||
EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||
PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||
PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||
LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||
NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||
SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/** @file sym.h
|
||||
|
||||
@brief header file with declarations for symbol and symbol table
|
||||
classes.
|
||||
*/
|
||||
|
||||
#ifndef ISPC_SYM_H
|
||||
#define ISPC_SYM_H
|
||||
|
||||
#include "ispc.h"
|
||||
#include <map>
|
||||
|
||||
class StructType;
|
||||
class ConstExpr;
|
||||
|
||||
/**
|
||||
@brief Representation of a program symbol.
|
||||
|
||||
The Symbol class represents a symbol in an ispc program. Symbols can
|
||||
include variables, functions, and named types. Note that all of the
|
||||
members are publically accessible; other code throughout the system
|
||||
accesses and modifies the members directly.
|
||||
|
||||
@todo Should we break function symbols into a separate FunctionSymbol
|
||||
class and then not have these members that are not applicable for
|
||||
function symbols (and vice versa, for non-function symbols)?
|
||||
*/
|
||||
|
||||
class Symbol {
|
||||
public:
|
||||
/** The Symbol constructor takes the name of the symbol, its
|
||||
position in a source file, and its type (if known). */
|
||||
Symbol(const std::string &name, SourcePos pos, const Type *t = NULL);
|
||||
|
||||
/** This method should only be called for function symbols; for them,
|
||||
it returns a mangled version of the function name with the argument
|
||||
types encoded into the returned name. This is used to generate
|
||||
unique symbols in object files for overloaded functions.
|
||||
*/
|
||||
std::string MangledName() const;
|
||||
|
||||
SourcePos pos; /*!< Source file position where the symbol was defined */
|
||||
const std::string name; /*!< Symbol's name */
|
||||
llvm::Value *storagePtr; /*!< For symbols with storage associated with
|
||||
them (i.e. variables but not functions),
|
||||
this member stores a pointer to its
|
||||
location in memory.) */
|
||||
llvm::Function *function; /*!< For symbols that represent functions,
|
||||
this stores the LLVM Function value for
|
||||
the symbol once it has been created. */
|
||||
const Type *type; /*!< The type of the symbol; if not set by the
|
||||
constructor, this is set after the
|
||||
declaration around the symbol has been parsed. */
|
||||
ConstExpr *constValue; /*!< For symbols with const-qualified types, this may store
|
||||
the symbol's compile-time constant value. This value may
|
||||
validly be NULL for a const-qualified type, however; for
|
||||
example, the ConstExpr class can't currently represent
|
||||
struct types. For cases like these, ConstExpr is NULL,
|
||||
though for all const symbols, the value pointed to by the
|
||||
storagePtr member will be its constant value. (This
|
||||
messiness is due to needing an ispc ConstExpr for the early
|
||||
constant folding optimizations). */
|
||||
bool isStatic; /*!< Records whether this symbol had a static qualifier in
|
||||
its declaration. */
|
||||
int varyingCFDepth; /*!< This member records the number of levels of nested 'varying'
|
||||
control flow within which the symbol was declared. Having
|
||||
this value available makes it possible to avoid performing
|
||||
masked stores when modifying the symbol's value when the
|
||||
store is done at the same 'varying' control flow depth as
|
||||
the one where the symbol was originally declared. */
|
||||
};
|
||||
|
||||
|
||||
/** @brief Symbol table that holds all known symbols during parsing and compilation.
|
||||
|
||||
A single instance of a SymbolTable is stored in the Module class
|
||||
(Module::symbolTable); it is created in the Module::Module()
|
||||
constructor. It is then accessed via the global variable Module *\ref m
|
||||
throughout the ispc implementation.
|
||||
*/
|
||||
|
||||
class SymbolTable {
|
||||
public:
|
||||
SymbolTable();
|
||||
~SymbolTable();
|
||||
|
||||
/** The parser calls this method when it enters a new scope in the
|
||||
program; this allows us to track variables that shadows others in
|
||||
outer scopes with same name as well as to efficiently discard all
|
||||
of the variables declared in a particular scope when we exit that
|
||||
scope. */
|
||||
void PushScope();
|
||||
|
||||
/** For each scope started by a call to SymbolTable::PushScope(), there
|
||||
must be a matching call to SymbolTable::PopScope() at the end of
|
||||
that scope. */
|
||||
void PopScope();
|
||||
|
||||
/** Adds the given variable symbol to the symbol table.
|
||||
@param symbol The symbol to be added
|
||||
|
||||
@return true if successful; false if the provided symbol clashes
|
||||
with a symbol defined at the same scope. (Symbols may shaodow
|
||||
symbols in outer scopes; a warning is issued in this case, but this
|
||||
method still returns true.) */
|
||||
bool AddVariable(Symbol *symbol);
|
||||
|
||||
/** Looks for a variable with the given name in the symbol table. This
|
||||
method searches outward from the innermost scope to the outermost,
|
||||
returning the first match found.
|
||||
|
||||
@param name The name of the variable to be searched for.
|
||||
@return A pointer to the Symbol, if a match is found. NULL if no
|
||||
Symbol with the given name is in the symbol table. */
|
||||
Symbol *LookupVariable(const char *name);
|
||||
|
||||
/** Adds the given function symbol to the symbol table.
|
||||
@param symbol The function symbol to be added.
|
||||
|
||||
@return true if the symbol has been added. False if another
|
||||
function symbol with the same name and function signature is
|
||||
already present in the symbol table. */
|
||||
bool AddFunction(Symbol *symbol);
|
||||
|
||||
/** Looks for the function or functions with the given name in the
|
||||
symbol name. If a function has been overloaded and multiple
|
||||
definitions are present for a given function name, all of them will
|
||||
be returned and it's up the the caller to resolve which one (if
|
||||
any) to use.
|
||||
|
||||
@return vector of Symbol pointers to functions with the given name. */
|
||||
std::vector<Symbol *> *LookupFunction(const char *name);
|
||||
|
||||
/** Looks for a function with the given name and type
|
||||
in the symbol table.
|
||||
|
||||
@return pointer to matching Symbol; NULL if none is found. */
|
||||
Symbol *LookupFunction(const char *name, const FunctionType *type);
|
||||
|
||||
/** Returns all of the functions in the symbol table that match the given
|
||||
predicate.
|
||||
|
||||
@param pred A unary predicate that returns true or false, given a Symbol
|
||||
pointer, based on whether the symbol should be included in the returned
|
||||
set of matches. It can either be a function, with signature
|
||||
<tt>bool pred(const Symbol *s)</tt>, or a unary predicate object with
|
||||
an <tt>bool operator()(const Symbol *)</tt> method.
|
||||
|
||||
@param matches Pointer to a vector in which to return the matching
|
||||
symbols.
|
||||
*/
|
||||
template <typename Predicate>
|
||||
void GetMatchingFunctions(Predicate pred,
|
||||
std::vector<Symbol *> *matches) const;
|
||||
|
||||
/** Adds the named type to the symbol table. This is used for both
|
||||
struct definitions (where <tt>struct Foo</tt> causes type \c Foo to
|
||||
be added to the symbol table) as well as for <tt>typedef</tt>s.
|
||||
|
||||
@param name Name of the type to be added
|
||||
@param type Type that \c name represents
|
||||
@param pos Position in source file where the type was named
|
||||
@return true if the named type was successfully added. False if a type
|
||||
with the same name has already been defined.
|
||||
|
||||
*/
|
||||
bool AddType(const char *name, const Type *type, SourcePos pos);
|
||||
|
||||
/** Looks for a type of the given name in the symbol table.
|
||||
|
||||
@return Pointer to the Type, if found; otherwise NULL is returned.
|
||||
*/
|
||||
const Type *LookupType(const char *name) const;
|
||||
|
||||
/** This method returns zero or more strings with the names of symbols
|
||||
in the symbol table that nearly (but not exactly) match the given
|
||||
name. This is useful for issuing informative error methods when
|
||||
misspelled identifiers are found a programs.
|
||||
|
||||
@param name String to compare variable and function symbol names against.
|
||||
@return vector of zero or more strings that approximately match \c name.
|
||||
*/
|
||||
std::vector<std::string> ClosestVariableOrFunctionMatch(const char *name) const;
|
||||
/** This method returns zero or more strings with the names of types
|
||||
in the symbol table that nearly (but not exactly) match the given
|
||||
name. */
|
||||
std::vector<std::string> ClosestTypeMatch(const char *name) const;
|
||||
|
||||
/** Prints out the entire contents of the symbol table to standard error.
|
||||
(Debugging method). */
|
||||
void Print();
|
||||
|
||||
private:
|
||||
/** This member variable holds one \c vector of Symbol pointers for
|
||||
each of the current active scopes as the program is being parsed.
|
||||
New vectors of symbols are added and removed from the end of the
|
||||
main vector, so searches for symbols start looking at the end of \c
|
||||
variables and work backwards.
|
||||
*/
|
||||
std::vector<std::vector<Symbol *> *> variables;
|
||||
/** Because there is no scoping for function symbols, functions are
|
||||
represented with a single STL \c map from names to symbols. A STL
|
||||
\c vector is used to store the function symbols for a given name
|
||||
since, due to function overloading, a name can have multiple
|
||||
function symbols associated with it. */
|
||||
std::map<std::string, std::vector<Symbol *> > functions;
|
||||
typedef std::map<std::string, const Type *> TypeMapType;
|
||||
/** Like variables, type definitions can be scoped. A new \c TypeMapType
|
||||
is added to the back of the \c types \c vector each time a new scope
|
||||
is entered. (And it's removed when the scope exits).
|
||||
*/
|
||||
std::vector<TypeMapType *> types;
|
||||
};
|
||||
|
||||
|
||||
template <typename Predicate>
|
||||
void SymbolTable::GetMatchingFunctions(Predicate pred,
|
||||
std::vector<Symbol *> *matches) const {
|
||||
// Iterate through all function symbols and apply the given predicate.
|
||||
// If it returns true, add the Symbol * to the provided vector.
|
||||
std::map<std::string, std::vector<Symbol *> >::const_iterator iter;
|
||||
for (iter = functions.begin(); iter != functions.end(); ++iter) {
|
||||
const std::vector<Symbol *> &syms = iter->second;
|
||||
for (unsigned int i = 0; i < syms.size(); ++i) {
|
||||
if (pred(syms[i]))
|
||||
matches->push_back(syms[i]);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
#endif // ISPC_SYM_H
|
||||
14
tests/array-1.ispc
Normal file
14
tests/array-1.ispc
Normal file
@@ -0,0 +1,14 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
|
||||
static float x[2][1];
|
||||
|
||||
export void f_f(uniform float RET[], uniform float aFOO[]) {
|
||||
float a = aFOO[programIndex];
|
||||
x[0][1] = a;
|
||||
RET[programIndex] = x[0][1];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) { RET[programIndex] = 1+programIndex; }
|
||||
24
tests/array-assignment-varying-control.ispc
Normal file
24
tests/array-assignment-varying-control.ispc
Normal file
@@ -0,0 +1,24 @@
|
||||
|
||||
export uniform int width() { return programCount; }
|
||||
|
||||
|
||||
|
||||
struct Foo { float f; };
|
||||
|
||||
void f(reference uniform Foo foo[], float a) {
|
||||
++foo[a].f;
|
||||
}
|
||||
|
||||
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float b) {
|
||||
float a = aFOO[programIndex];
|
||||
float f[40] = a;
|
||||
float g[40] = b;
|
||||
if (a < 2)
|
||||
f = g;
|
||||
RET[programIndex] = f[a];
|
||||
}
|
||||
|
||||
export void result(uniform float RET[]) {
|
||||
RET[programIndex] = 1+programIndex;
|
||||
RET[0] = 5;
|
||||
}
|
||||
Some files were not shown because too many files have changed in this diff Show More
Reference in New Issue
Block a user