Add "generic" 4, 8, and 16-wide targets.

When used, these targets end up with calls to undefined functions for all
of the various special vector stuff ispc needs to compile ispc programs
(masked store, gather, min/max, sqrt, etc.).

These targets are not yet useful for anything, but are a step toward
having an option to C++ code with calls out to intrinsics.

Reorganized the directory structure a bit and put the LLVM bitcode used
to define target-specific stuff (as well as some generic built-ins stuff)
into a builtins/ directory.

Note that for building on Windows, it's now necessary to set a LLVM_VERSION
environment variable (with values like LLVM_2_9, LLVM_3_0, LLVM_3_1svn, etc.)
This commit is contained in:
Matt Pharr
2011-12-19 13:46:50 -08:00
parent 6dbb15027a
commit 1d9201fe3d
31 changed files with 1249 additions and 649 deletions

View File

@@ -62,14 +62,17 @@ CXX_SRC=ast.cpp builtins.cpp ctx.cpp decl.cpp expr.cpp func.cpp ispc.cpp \
util.cpp
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
opt.h stmt.h sym.h type.h util.h
BUILTINS_SRC=builtins-avx.ll builtins-avx-x2.ll builtins-sse2.ll builtins-sse2-x2.ll \
builtins-sse4.ll builtins-sse4-x2.ll builtins-dispatch.ll
TARGETS=avx avx-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 generic-16
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
builtins/dispatch.ll
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
builtins-c-32.cpp builtins-c-64.cpp
BISON_SRC=parse.yy
FLEX_SRC=lex.ll
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_SRC:.ll=.o) \
builtins-c-32.o builtins-c-64.o stdlib_ispc.o $(BISON_SRC:.yy=.o) \
$(FLEX_SRC:.ll=.o))
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_OBJS) \
stdlib_generic_ispc.o stdlib_x86_ispc.o \
$(BISON_SRC:.yy=.o) $(FLEX_SRC:.ll=.o))
default: ispc
@@ -104,6 +107,10 @@ objs/%.o: %.cpp
@echo Compiling $<
@$(CXX) $(CXXFLAGS) -o $@ -c $<
objs/%.o: objs/%.cpp
@echo Compiling $<
@$(CXX) $(CXXFLAGS) -o $@ -c $<
objs/parse.cc: parse.yy
@echo Running bison on $<
@$(YACC) -o $@ $<
@@ -120,41 +127,24 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
@echo Compiling $<
@$(CXX) $(CXXFLAGS) -o $@ -c $<
objs/builtins-%.cpp: builtins-%.ll
@echo Creating C++ source from builtin definitions file $<
@m4 -DLLVM_VERSION=$(LLVM_VERSION) builtins.m4 $< | ./bitcode2cpp.py $< > $@
objs/builtins-%.o: objs/builtins-%.cpp
@echo Compiling $<
@$(CXX) $(CXXFLAGS) -o $@ -c $<
objs/builtins-c-32.cpp: builtins-c.c
objs/builtins-%.cpp: builtins/%.ll builtins/util.m4 $(wildcard builtins/*common.ll)
@echo Creating C++ source from builtins definition file $<
@$(CLANG) -m32 -emit-llvm -c $< -o - | llvm-dis - | ./bitcode2cpp.py builtins-c-32.c > $@
@m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) $< | ./bitcode2cpp.py $< > $@
objs/builtins-c-32.o: objs/builtins-c-32.cpp
@echo Compiling $<
@$(CXX) $(CXXFLAGS) -o $@ -c $<
objs/builtins-c-64.cpp: builtins-c.c
objs/builtins-c-32.cpp: builtins/builtins.c
@echo Creating C++ source from builtins definition file $<
@$(CLANG) -m64 -emit-llvm -c $< -o - | llvm-dis - | ./bitcode2cpp.py builtins-c-64.c > $@
@$(CLANG) -m32 -emit-llvm -c $< -o - | llvm-dis - | ./bitcode2cpp.py c-32 > $@
objs/builtins-c-64.o: objs/builtins-c-64.cpp
@echo Compiling $<
@$(CXX) $(CXXFLAGS) -o $@ -c $<
objs/builtins-c-64.cpp: builtins/builtins.c
@echo Creating C++ source from builtins definition file $<
@$(CLANG) -m64 -emit-llvm -c $< -o - | llvm-dis - | ./bitcode2cpp.py c-64 > $@
objs/stdlib_ispc.cpp: stdlib.ispc
@echo Creating C++ source from $<
@$(CLANG) -E -x c -DISPC=1 -DPI=3.1415926536 $< -o - | ./stdlib2cpp.py > $@
objs/stdlib_generic_ispc.cpp: stdlib.ispc
@echo Creating C++ source from $< for generic
@$(CLANG) -E -x c -DISPC_TARGET_GENERIC=1 -DISPC=1 -DPI=3.1415926536 $< -o - | \
./stdlib2cpp.py generic > $@
objs/stdlib_ispc.o: objs/stdlib_ispc.cpp
@echo Compiling $<
@$(CXX) $(CXXFLAGS) -o $@ -c $<
objs/builtins-sse2.cpp: builtins.m4 builtins-sse2-common.ll builtins-sse2.ll
objs/builtins-sse2-x2.cpp: builtins.m4 builtins-sse2-common.ll builtins-sse2-x2.ll
objs/builtins-sse4.cpp: builtins.m4 builtins-sse4-common.ll builtins-sse4.ll
objs/builtins-sse4-x2.cpp: builtins.m4 builtins-sse4-common.ll builtins-sse4-x2.ll
objs/builtins-avx.cpp: builtins.m4 builtins-avx-common.ll builtins-avx.ll
objs/builtins-avx-x2.cpp: builtins.m4 builtins-avx-common.ll builtins-avx-x2.ll
objs/stdlib_x86_ispc.cpp: stdlib.ispc
@echo Creating C++ source from $< for x86
@$(CLANG) -E -x c -DISPC=1 -DPI=3.1415926536 $< -o - | \
./stdlib2cpp.py x86 > $@

View File

@@ -11,7 +11,8 @@ length=0
src=str(sys.argv[1])
target = re.sub(".*builtins-", "", src)
target = re.sub("builtins/target-", "", src)
target = re.sub("builtins/", "", target)
target = re.sub("\.ll$", "", target)
target = re.sub("\.c$", "", target)
target = re.sub("-", "_", target)

View File

@@ -99,6 +99,9 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
// varying
if (LLVMTypes::MaskType != LLVMTypes::Int32VectorType &&
t == LLVMTypes::MaskType)
return AtomicType::VaryingBool;
else if (t == LLVMTypes::Int8VectorType)
return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
else if (t == LLVMTypes::Int16VectorType)
@@ -194,7 +197,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
// symbol creation code below assumes that any LLVM vector of i32s is a
// varying int32. Here, we need that to be interpreted as a varying
// bool, so just have a one-off override for that one...
if (name == "__sext_varying_bool") {
if (g->target.maskBitCount != 1 && name == "__sext_varying_bool") {
const Type *returnType = AtomicType::VaryingInt32;
std::vector<const Type *> argTypes;
argTypes.push_back(AtomicType::VaryingBool);
@@ -556,7 +559,7 @@ lSetInternalFunctions(llvm::Module *module) {
int count = sizeof(names) / sizeof(names[0]);
for (int i = 0; i < count; ++i) {
llvm::Function *f = module->getFunction(names[i]);
if (f != NULL)
if (f != NULL && f->empty() == false)
f->setLinkage(llvm::GlobalValue::InternalLinkage);
}
}
@@ -744,6 +747,33 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
FATAL("logic error in DefineStdlib");
}
break;
case Target::GENERIC:
switch (g->target.vectorWidth) {
case 4:
extern unsigned char builtins_bitcode_generic_4[];
extern int builtins_bitcode_generic_4_length;
AddBitcodeToModule(builtins_bitcode_generic_4,
builtins_bitcode_generic_4_length,
module, symbolTable);
break;
case 8:
extern unsigned char builtins_bitcode_generic_8[];
extern int builtins_bitcode_generic_8_length;
AddBitcodeToModule(builtins_bitcode_generic_8,
builtins_bitcode_generic_8_length,
module, symbolTable);
break;
case 16:
extern unsigned char builtins_bitcode_generic_16[];
extern int builtins_bitcode_generic_16_length;
AddBitcodeToModule(builtins_bitcode_generic_16,
builtins_bitcode_generic_16_length,
module, symbolTable);
break;
default:
FATAL("logic error in DefineStdlib");
}
break;
default:
FATAL("logic error");
}
@@ -771,11 +801,16 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
if (includeStdlibISPC) {
// If the user wants the standard library to be included, parse the
// serialized version of the stdlib.ispc file to get its
// definitions added. Disable emission of performance warnings for
// now, since the user doesn't care about any of that in the stdlib
// implementation...
extern char stdlib_code[];
yy_scan_string(stdlib_code);
yyparse();
// definitions added.
if (g->target.isa == Target::GENERIC) {
extern char stdlib_generic_code[];
yy_scan_string(stdlib_generic_code);
yyparse();
}
else {
extern char stdlib_x86_code[];
yy_scan_string(stdlib_x86_code);
yyparse();
}
}
}

View File

@@ -32,6 +32,9 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; AVX target implementation.
ctlztz()
define_prefetches()
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp

View File

@@ -32,12 +32,16 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Basic 16-wide definitions
stdlib_core(16)
packed_load_and_store(16)
scans(16)
int64minmax(16)
define(`WIDTH',`16')
define(`MASK',`i32')
include(`util.m4')
include(`builtins-avx-common.ll')
stdlib_core()
packed_load_and_store()
scans()
int64minmax()
include(`target-avx-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp

View File

@@ -32,12 +32,16 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Basic 8-wide definitions
stdlib_core(8)
packed_load_and_store(8)
scans(8)
int64minmax(8)
define(`WIDTH',`8')
define(`MASK',`i32')
include(`util.m4')
include(`builtins-avx-common.ll')
stdlib_core()
packed_load_and_store()
scans()
int64minmax()
include(`target-avx-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp

View File

@@ -0,0 +1,34 @@
;; Copyright (c) 2010-2011, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions are
;; met:
;;
;; * Redistributions of source code must retain the above copyright
;; notice, this list of conditions and the following disclaimer.
;;
;; * Redistributions in binary form must reproduce the above copyright
;; notice, this list of conditions and the following disclaimer in the
;; documentation and/or other materials provided with the distribution.
;;
;; * Neither the name of Intel Corporation nor the names of its
;; contributors may be used to endorse or promote products derived from
;; this software without specific prior written permission.
;;
;;
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
define(`WIDTH',`16')
include(`target-generic-common.ll')

View File

@@ -0,0 +1,34 @@
;; Copyright (c) 2010-2011, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions are
;; met:
;;
;; * Redistributions of source code must retain the above copyright
;; notice, this list of conditions and the following disclaimer.
;;
;; * Redistributions in binary form must reproduce the above copyright
;; notice, this list of conditions and the following disclaimer in the
;; documentation and/or other materials provided with the distribution.
;;
;; * Neither the name of Intel Corporation nor the names of its
;; contributors may be used to endorse or promote products derived from
;; this software without specific prior written permission.
;;
;;
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
define(`WIDTH',`4')
include(`target-generic-common.ll')

View File

@@ -0,0 +1,34 @@
;; Copyright (c) 2010-2011, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions are
;; met:
;;
;; * Redistributions of source code must retain the above copyright
;; notice, this list of conditions and the following disclaimer.
;;
;; * Redistributions in binary form must reproduce the above copyright
;; notice, this list of conditions and the following disclaimer in the
;; documentation and/or other materials provided with the distribution.
;;
;; * Neither the name of Intel Corporation nor the names of its
;; contributors may be used to endorse or promote products derived from
;; this software without specific prior written permission.
;;
;;
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
define(`WIDTH',`8')
include(`target-generic-common.ll')

View File

@@ -0,0 +1,277 @@
;; Copyright (c) 2010-2011, Intel Corporation
;; All rights reserved.
;;
;; Redistribution and use in source and binary forms, with or without
;; modification, are permitted provided that the following conditions are
;; met:
;;
;; * Redistributions of source code must retain the above copyright
;; notice, this list of conditions and the following disclaimer.
;;
;; * Redistributions in binary form must reproduce the above copyright
;; notice, this list of conditions and the following disclaimer in the
;; documentation and/or other materials provided with the distribution.
;;
;; * Neither the name of Intel Corporation nor the names of its
;; contributors may be used to endorse or promote products derived from
;; this software without specific prior written permission.
;;
;;
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
define(`MASK',`i1')
include(`util.m4')
stdlib_core()
scans()
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; math
declare void @__fastmath() nounwind
;; round/floor/ceil
declare float @__round_uniform_float(float) nounwind readnone
declare float @__floor_uniform_float(float) nounwind readnone
declare float @__ceil_uniform_float(float) nounwind readnone
declare double @__round_uniform_double(double) nounwind readnone
declare double @__floor_uniform_double(double) nounwind readnone
declare double @__ceil_uniform_double(double) nounwind readnone
declare <WIDTH x float> @__round_varying_float(<WIDTH x float>) nounwind readnone
declare <WIDTH x float> @__floor_varying_float(<WIDTH x float>) nounwind readnone
declare <WIDTH x float> @__ceil_varying_float(<WIDTH x float>) nounwind readnone
declare <WIDTH x double> @__round_varying_double(<WIDTH x double>) nounwind readnone
declare <WIDTH x double> @__floor_varying_double(<WIDTH x double>) nounwind readnone
declare <WIDTH x double> @__ceil_varying_double(<WIDTH x double>) nounwind readnone
;; min/max
declare float @__max_uniform_float(float, float) nounwind readnone
declare float @__min_uniform_float(float, float) nounwind readnone
declare i32 @__min_uniform_int32(i32, i32) nounwind readnone
declare i32 @__max_uniform_int32(i32, i32) nounwind readnone
declare i32 @__min_uniform_uint32(i32, i32) nounwind readnone
declare i32 @__max_uniform_uint32(i32, i32) nounwind readnone
declare i64 @__min_uniform_int64(i64, i64) nounwind readnone
declare i64 @__max_uniform_int64(i64, i64) nounwind readnone
declare i64 @__min_uniform_uint64(i64, i64) nounwind readnone
declare i64 @__max_uniform_uint64(i64, i64) nounwind readnone
declare double @__min_uniform_double(double, double) nounwind readnone
declare double @__max_uniform_double(double, double) nounwind readnone
declare <WIDTH x float> @__max_varying_float(<WIDTH x float>,
<WIDTH x float>) nounwind readnone
declare <WIDTH x float> @__min_varying_float(<WIDTH x float>,
<WIDTH x float>) nounwind readnone
declare <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
declare <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
declare <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
declare <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
declare <WIDTH x i64> @__min_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
declare <WIDTH x i64> @__max_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
declare <WIDTH x i64> @__min_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
declare <WIDTH x i64> @__max_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
declare <WIDTH x double> @__min_varying_double(<WIDTH x double>,
<WIDTH x double>) nounwind readnone
declare <WIDTH x double> @__max_varying_double(<WIDTH x double>,
<WIDTH x double>) nounwind readnone
;; sqrt/rsqrt/rcp
declare float @__rsqrt_uniform_float(float) nounwind readnone
declare float @__rcp_uniform_float(float) nounwind readnone
declare float @__sqrt_uniform_float(float) nounwind readnone
declare <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone
declare <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %v) nounwind readnone
declare <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
declare double @__sqrt_uniform_double(double) nounwind readnone
declare <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone
;; bit ops
declare i32 @__popcnt_int32(i32) nounwind readnone
declare i64 @__popcnt_int64(i64) nounwind readnone
declare i32 @__count_trailing_zeros_i32(i32) nounwind readnone
declare i64 @__count_trailing_zeros_i64(i64) nounwind readnone
declare i32 @__count_leading_zeros_i32(i32) nounwind readnone
declare i64 @__count_leading_zeros_i64(i64) nounwind readnone
;; svml
; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
; or, use the macro to call the 4-wide ones twice with our 8-wide
; vectors...
declare <WIDTH x float> @__svml_sin(<WIDTH x float>)
declare <WIDTH x float> @__svml_cos(<WIDTH x float>)
declare void @__svml_sincos(<WIDTH x float>, <WIDTH x float> *, <WIDTH x float> *)
declare <WIDTH x float> @__svml_tan(<WIDTH x float>)
declare <WIDTH x float> @__svml_atan(<WIDTH x float>)
declare <WIDTH x float> @__svml_atan2(<WIDTH x float>, <WIDTH x float>)
declare <WIDTH x float> @__svml_exp(<WIDTH x float>)
declare <WIDTH x float> @__svml_log(<WIDTH x float>)
declare <WIDTH x float> @__svml_pow(<WIDTH x float>, <WIDTH x float>)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; reductions
declare i32 @__movmsk(<WIDTH x i1>) nounwind readnone
declare float @__reduce_add_float(<WIDTH x float>) nounwind readnone
declare float @__reduce_min_float(<WIDTH x float>) nounwind readnone
declare float @__reduce_max_float(<WIDTH x float>) nounwind readnone
declare i32 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone
declare i32 @__reduce_min_int32(<WIDTH x i32>) nounwind readnone
declare i32 @__reduce_max_int32(<WIDTH x i32>) nounwind readnone
declare i32 @__reduce_add_uint32(<WIDTH x i32> %v) nounwind readnone
declare i32 @__reduce_min_uint32(<WIDTH x i32>) nounwind readnone
declare i32 @__reduce_max_uint32(<WIDTH x i32>) nounwind readnone
declare double @__reduce_add_double(<WIDTH x double>) nounwind readnone
declare double @__reduce_min_double(<WIDTH x double>) nounwind readnone
declare double @__reduce_max_double(<WIDTH x double>) nounwind readnone
declare i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone
declare i64 @__reduce_min_int64(<WIDTH x i64>) nounwind readnone
declare i64 @__reduce_max_int64(<WIDTH x i64>) nounwind readnone
declare i64 @__reduce_add_uint64(<WIDTH x i64> %v) nounwind readnone
declare i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone
declare i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone
declare i1 @__reduce_equal_int32(<WIDTH x i32> %v, i32 * nocapture %samevalue,
<WIDTH x i1> %mask) nounwind
declare i1 @__reduce_equal_float(<WIDTH x float> %v, float * nocapture %samevalue,
<WIDTH x i1> %mask) nounwind
declare i1 @__reduce_equal_int64(<WIDTH x i64> %v, i64 * nocapture %samevalue,
<WIDTH x i1> %mask) nounwind
declare i1 @__reduce_equal_double(<WIDTH x double> %v, double * nocapture %samevalue,
<WIDTH x i1> %mask) nounwind
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unaligned loads/loads+broadcasts
load_and_broadcast(WIDTH, i8, 8)
load_and_broadcast(WIDTH, i16, 16)
load_and_broadcast(WIDTH, i32, 32)
load_and_broadcast(WIDTH, i64, 64)
declare <WIDTH x i8> @__load_masked_8(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
declare <WIDTH x i16> @__load_masked_16(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
declare <WIDTH x i32> @__load_masked_32(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
declare <WIDTH x i64> @__load_masked_64(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
declare void @__masked_store_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
<WIDTH x i1>) nounwind
declare void @__masked_store_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
<WIDTH x i1>) nounwind
declare void @__masked_store_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
<WIDTH x i1>) nounwind
declare void @__masked_store_64(<WIDTH x i64>* nocapture, <WIDTH x i64>,
<WIDTH x i1> %mask) nounwind
ifelse(LLVM_VERSION,LLVM_3_1svn,`
define void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
<WIDTH x i1>) nounwind {
%v = load <WIDTH x i8> * %0
%v1 = select <WIDTH x i1> %2, <WIDTH x i8> %1, <WIDTH x i8> %v
store <WIDTH x i8> %v1, <WIDTH x i8> * %0
ret void
}
define void @__masked_store_blend_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
<WIDTH x i1>) nounwind {
%v = load <WIDTH x i16> * %0
%v1 = select <WIDTH x i1> %2, <WIDTH x i16> %1, <WIDTH x i16> %v
store <WIDTH x i16> %v1, <WIDTH x i16> * %0
ret void
}
define void @__masked_store_blend_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
<WIDTH x i1>) nounwind {
%v = load <WIDTH x i32> * %0
%v1 = select <WIDTH x i1> %2, <WIDTH x i32> %1, <WIDTH x i32> %v
store <WIDTH x i32> %v1, <WIDTH x i32> * %0
ret void
}
define void @__masked_store_blend_64(<WIDTH x i64>* nocapture,
<WIDTH x i64>, <WIDTH x i1>) nounwind {
%v = load <WIDTH x i64> * %0
%v1 = select <WIDTH x i1> %2, <WIDTH x i64> %1, <WIDTH x i64> %v
store <WIDTH x i64> %v1, <WIDTH x i64> * %0
ret void
}
',`
declare void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
<WIDTH x i1>) nounwind
declare void @__masked_store_blend_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
<WIDTH x i1>) nounwind
declare void @__masked_store_blend_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
<WIDTH x i1>) nounwind
declare void @__masked_store_blend_64(<WIDTH x i64>* nocapture %ptr,
<WIDTH x i64> %new,
<WIDTH x i1> %mask) nounwind
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather/scatter
define(`gather_scatter', `
declare <WIDTH x $1> @__gather_base_offsets32_$1(i8 * nocapture %ptr, <WIDTH x i32> %offsets,
i32 %offset_scale, <WIDTH x i1> %vecmask) nounwind readonly
declare <WIDTH x $1> @__gather_base_offsets64_$1(i8 * nocapture %ptr, <WIDTH x i64> %offsets,
i32 %offset_scale, <WIDTH x i1> %vecmask) nounwind readonly
declare <WIDTH x $1> @__gather32_$1(<WIDTH x i32> %ptrs,
<WIDTH x i1> %vecmask) nounwind readonly
declare <WIDTH x $1> @__gather64_$1(<WIDTH x i64> %ptrs,
<WIDTH x i1> %vecmask) nounwind readonly
declare void @__scatter_base_offsets32_$1(i8* nocapture %base, <WIDTH x i32> %offsets,
i32 %offset_scale, <WIDTH x $1> %values, <WIDTH x i1> %mask) nounwind
declare void @__scatter_base_offsets64_$1(i8* nocapture %base, <WIDTH x i64> %offsets,
i32 %offset_scale, <WIDTH x $1> %values, <WIDTH x i1> %mask) nounwind
declare void @__scatter32_$1(<WIDTH x i32> %ptrs, <WIDTH x $1> %values,
<WIDTH x i1> %mask) nounwind
declare void @__scatter64_$1(<WIDTH x i64> %ptrs, <WIDTH x $1> %values,
<WIDTH x i1> %mask) nounwind
')
gather_scatter(i8)
gather_scatter(i16)
gather_scatter(i32)
gather_scatter(i64)
declare i32 @__packed_load_active(i32 * nocapture %startptr, <WIDTH x i32> * nocapture %val_ptr,
<WIDTH x i1> %full_mask) nounwind
declare i32 @__packed_store_active(i32 * %startptr, <WIDTH x i32> %vals,
<WIDTH x i1> %full_mask) nounwind
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; prefetch
declare void @__prefetch_read_uniform_1(i8 *) nounwind readnone
declare void @__prefetch_read_uniform_2(i8 *) nounwind readnone
declare void @__prefetch_read_uniform_3(i8 *) nounwind readnone
declare void @__prefetch_read_uniform_nt(i8 *) nounwind readnone

View File

@@ -29,6 +29,9 @@
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ctlztz()
define_prefetches()
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp

View File

@@ -36,12 +36,16 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; standard 8-wide definitions from m4 macros
stdlib_core(8)
packed_load_and_store(8)
scans(8)
int64minmax(8)
define(`WIDTH',`8')
define(`MASK',`i32')
include(`util.m4')
include(`builtins-sse2-common.ll')
stdlib_core()
packed_load_and_store()
scans()
int64minmax()
include(`target-sse2-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp

View File

@@ -33,12 +33,16 @@
;; Define the standard library builtins for the SSE2 target
; Define some basics for a 4-wide target
stdlib_core(4)
packed_load_and_store(4)
scans(4)
int64minmax(4)
define(`WIDTH',`4')
define(`MASK',`i32')
include(`util.m4')
include(`builtins-sse2-common.ll')
stdlib_core()
packed_load_and_store()
scans()
int64minmax()
include(`target-sse2-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rounding

View File

@@ -29,6 +29,9 @@
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
ctlztz()
define_prefetches()
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rounding floats

View File

@@ -36,12 +36,16 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; standard 8-wide definitions from m4 macros
stdlib_core(8)
packed_load_and_store(8)
scans(8)
int64minmax(8)
define(`WIDTH',`8')
define(`MASK',`i32')
include(`util.m4')
include(`builtins-sse4-common.ll')
stdlib_core()
packed_load_and_store()
scans()
int64minmax()
include(`target-sse4-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp

View File

@@ -33,12 +33,16 @@
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; Define common 4-wide stuff
stdlib_core(4)
packed_load_and_store(4)
scans(4)
int64minmax(4)
define(`WIDTH',`4')
define(`MASK',`i32')
include(`util.m4')
include(`builtins-sse4-common.ll')
stdlib_core()
packed_load_and_store()
scans()
int64minmax()
include(`target-sse4-common.ll')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; rcp

File diff suppressed because it is too large Load Diff

10
ctx.cpp
View File

@@ -875,8 +875,11 @@ FunctionEmitContext::LaneMask(llvm::Value *v) {
// into an i32 value
std::vector<Symbol *> mm;
m->symbolTable->LookupFunction("__movmsk", &mm);
// There should be one with signed int signature, one unsigned int.
Assert(mm.size() == 2);
if (g->target.maskBitCount == 1)
Assert(mm.size() == 1);
else
// There should be one with signed int signature, one unsigned int.
Assert(mm.size() == 2);
// We can actually call either one, since both are i32s as far as
// LLVM's type system is concerned...
llvm::Function *fmm = mm[0]->function;
@@ -929,6 +932,9 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
return NULL;
}
if (g->target.maskBitCount == 1)
return b;
LLVM_TYPE_CONST llvm::ArrayType *at =
llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(b->getType());
if (at) {

View File

@@ -288,7 +288,10 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
bool checkMask = (type->isTask == true) ||
((function->hasFnAttr(llvm::Attribute::AlwaysInline) == false) &&
costEstimate > CHECK_MASK_AT_FUNCTION_START_COST);
if (checkMask && g->opt.disableCoherentControlFlow == false) {
checkMask &= (g->target.maskingIsFree == false);
checkMask &= (g->opt.disableCoherentControlFlow == false);
if (checkMask) {
llvm::Value *mask = ctx->GetFunctionMask();
llvm::Value *allOn = ctx->All(mask);
llvm::BasicBlock *bbAllOn = ctx->CreateBasicBlock("all_on");

View File

@@ -129,24 +129,60 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->nativeVectorWidth = 4;
t->vectorWidth = 4;
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
else if (!strcasecmp(isa, "sse2-x2")) {
t->isa = Target::SSE2;
t->nativeVectorWidth = 4;
t->vectorWidth = 8;
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
else if (!strcasecmp(isa, "sse4")) {
t->isa = Target::SSE4;
t->nativeVectorWidth = 4;
t->vectorWidth = 4;
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2")) {
t->isa = Target::SSE4;
t->nativeVectorWidth = 4;
t->vectorWidth = 8;
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
else if (!strcasecmp(isa, "generic-4")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 4;
t->vectorWidth = 4;
t->maskingIsFree = true;
t->allOffMaskIsSafe = true;
t->maskBitCount = 1;
}
else if (!strcasecmp(isa, "generic-8")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->maskingIsFree = true;
t->allOffMaskIsSafe = true;
t->maskBitCount = 1;
}
else if (!strcasecmp(isa, "generic-16")) {
t->isa = Target::GENERIC;
t->nativeVectorWidth = 16;
t->vectorWidth = 16;
t->maskingIsFree = true;
t->allOffMaskIsSafe = true;
t->maskBitCount = 1;
}
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
else if (!strcasecmp(isa, "avx")) {
@@ -154,12 +190,18 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->attributes = "+avx,+popcnt,+cmov";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
else if (!strcasecmp(isa, "avx-x2")) {
t->isa = Target::AVX;
t->nativeVectorWidth = 8;
t->vectorWidth = 16;
t->attributes = "+avx,+popcnt,+cmov";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
#endif // LLVM 3.0+
#if defined(LLVM_3_1svn)
@@ -168,12 +210,18 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
t->nativeVectorWidth = 8;
t->vectorWidth = 8;
t->attributes = "+avx2,+popcnt,+cmov";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
else if (!strcasecmp(isa, "avx2-x2")) {
t->isa = Target::AVX2;
t->nativeVectorWidth = 16;
t->vectorWidth = 16;
t->attributes = "+avx2,+popcnt,+cmov";
t->maskingIsFree = false;
t->allOffMaskIsSafe = false;
t->maskBitCount = 32;
}
#endif // LLVM 3.1
else {
@@ -221,7 +269,7 @@ Target::SupportedTargetISAs() {
#ifdef LLVM_3_1svn
", avx2, avx2-x2"
#endif // LLVM_3_1svn
;
", generic-4, generic-8, generic-16";
}
@@ -300,6 +348,8 @@ Target::GetISAString() const {
return "avx";
case Target::AVX2:
return "avx2";
case Target::GENERIC:
return "generic";
default:
FATAL("Unhandled target in GetISAString()");
}

19
ispc.h
View File

@@ -193,7 +193,7 @@ struct Target {
flexible/performant of them will apear last in the enumerant. Note
also that __best_available_isa() needs to be updated if ISAs are
added or the enumerant values are reordered. */
enum ISA { SSE2, SSE4, AVX, AVX2, NUM_ISAS };
enum ISA { SSE2, SSE4, AVX, AVX2, GENERIC, NUM_ISAS };
/** Instruction set being compiled to. */
ISA isa;
@@ -222,6 +222,23 @@ struct Target {
/** Indicates whether position independent code should be generated. */
bool generatePIC;
/** Is there overhead associated with masking on the target
architecture; e.g. there is on SSE, due to extra blends and the
like, but there isn't with an ISA that supports masking
natively. */
bool maskingIsFree;
/** Is it safe to run code with the mask all if: e.g. on SSE, the fast
gather trick assumes that at least one program instance is running
(so that it can safely assume that the array base pointer is
valid). */
bool allOffMaskIsSafe;
/** How many bits are used to store each element of the mask: e.g. this
is 32 on SSE/AVX, since that matches the HW better, but it's 1 for
the generic target. */
int maskBitCount;
};

View File

@@ -22,11 +22,15 @@
<ClCompile Include="gen-bitcode-c-32.cpp" />
<ClCompile Include="gen-bitcode-c-64.cpp" />
<ClCompile Include="gen-bitcode-dispatch.cpp" />
<ClCompile Include="gen-bitcode-generic-4.cpp" />
<ClCompile Include="gen-bitcode-generic-8.cpp" />
<ClCompile Include="gen-bitcode-generic-16.cpp" />
<ClCompile Include="gen-bitcode-sse2.cpp" />
<ClCompile Include="gen-bitcode-sse2-x2.cpp" />
<ClCompile Include="gen-bitcode-sse4.cpp" />
<ClCompile Include="gen-bitcode-sse4-x2.cpp" />
<ClCompile Include="gen-stdlib.cpp" />
<ClCompile Include="gen-stdlib-generic.cpp" />
<ClCompile Include="gen-stdlib-x86.cpp" />
<ClCompile Include="ispc.cpp" />
<ClCompile Include="lex.cc">
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4146;4800;4996;4355;4624;4005;4003;4018</DisableSpecificWarnings>
@@ -40,15 +44,15 @@
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4146;4800;4996;4355;4624;4005;4065</DisableSpecificWarnings>
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">4146;4800;4996;4355;4624;4005;4065</DisableSpecificWarnings>
</ClCompile>
<CustomBuild Include="builtins-c.c">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-32.c &gt; gen-bitcode-c-32.cpp;
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-64.c &gt; gen-bitcode-c-64.cpp</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">clang builtins-c.c</Message>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-32.c &gt; gen-bitcode-c-32.cpp;
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-64.c &gt; gen-bitcode-c-64.cpp</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">clang builtins-c.c</Message>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-c-32.cpp;gen-bitcore-c-64.cpp</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-c-32.cpp;gen-bitcore-c-64.cpp</Outputs>
<CustomBuild Include="builtins\builtins.c">
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-32 &gt; gen-bitcode-c-32.cpp;
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-64 &gt; gen-bitcode-c-64.cpp</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building builtins.c</Message>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-32 &gt; gen-bitcode-c-32.cpp;
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-64 &gt; gen-bitcode-c-64.cpp</Command>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building builtins.c</Message>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-c-32.cpp;gen-bitcode-c-64.cpp</Outputs>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-c-32.cpp;gen-bitcode-c-64.cpp</Outputs>
</CustomBuild>
<ClCompile Include="stmt.cpp" />
<ClCompile Include="sym.cpp" />
@@ -75,105 +79,148 @@
<ItemGroup>
<CustomBuild Include="stdlib.ispc">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py &gt; gen-stdlib.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-stdlib.cpp</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py &gt; gen-stdlib.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-stdlib.cpp</Outputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-stdlib.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-stdlib.cpp</Message>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py x86 &gt; gen-stdlib-x86.cpp;
%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DISPC_TARGET_GENERIC=1 -DPI=3.1415926535 | python stdlib2cpp.py generic &gt; gen-stdlib-generic.cpp;
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-stdlib-generic.cpp;gen-stdlib-x86.cpp</Outputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py x86 &gt; gen-stdlib-x86.cpp;
%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DISPC_TARGET_GENERIC=1 -DPI=3.1415926535 | python stdlib2cpp.py generic &gt; gen-stdlib-generic.cpp;
</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-stdlib-generic.cpp;gen-stdlib-x86.cpp</Outputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-stdlib-{generic,x86}.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-stdlib-{generic,x86}.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins-sse4.ll">
<CustomBuild Include="builtins\dispatch.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse4.ll | python bitcode2cpp.py builtins-sse4.ll &gt; gen-bitcode-sse4.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse4.ll | python bitcode2cpp.py builtins-sse4.ll &gt; gen-bitcode-sse4.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins-dispatch.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-dispatch.ll | python bitcode2cpp.py builtins-dispatch.ll &gt; gen-bitcode-dispatch.cpp</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\dispatch.ll | python bitcode2cpp.py dispatch.ll &gt; gen-bitcode-dispatch.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-dispatch.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-dispatch.ll | python bitcode2cpp.py builtins-dispatch.ll &gt; gen-bitcode-dispatch.cpp</Command>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\dispatch.ll | python bitcode2cpp.py dispatch.ll &gt; gen-bitcode-dispatch.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-dispatch.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4</AdditionalInputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-dispatch.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-dispatch.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins-sse4-x2.ll">
<CustomBuild Include="builtins\target-sse4.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse4-x2.ll | python bitcode2cpp.py builtins-sse4-x2.ll &gt; gen-bitcode-sse4-x2.cpp</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll &gt; gen-bitcode-sse4.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll &gt; gen-bitcode-sse4.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-sse4-x2.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll &gt; gen-bitcode-sse4-x2.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4-x2.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse4-x2.ll | python bitcode2cpp.py builtins-sse4-x2.ll &gt; gen-bitcode-sse4-x2.cpp</Command>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll &gt; gen-bitcode-sse4-x2.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4-x2.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4-x2.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4-x2.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins-sse2.ll">
<CustomBuild Include="builtins\target-sse2.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse2.ll | python bitcode2cpp.py builtins-sse2.ll &gt; gen-bitcode-sse2.cpp</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll &gt; gen-bitcode-sse2.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse2.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse2.ll | python bitcode2cpp.py builtins-sse2.ll &gt; gen-bitcode-sse2.cpp</Command>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll &gt; gen-bitcode-sse2.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse2.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse2.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse2.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins-sse2-x2.ll">
<CustomBuild Include="builtins\target-sse2-x2.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse2-x2.ll | python bitcode2cpp.py builtins-sse2-x2.ll &gt; gen-bitcode-sse2-x2.cpp</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll &gt; gen-bitcode-sse2-x2.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse2-x2.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse2-x2.ll | python bitcode2cpp.py builtins-sse2-x2.ll &gt; gen-bitcode-sse2-x2.cpp</Command>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll &gt; gen-bitcode-sse2-x2.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse2-x2.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse2-x2.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse2-x2.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins-avx.ll">
<CustomBuild Include="builtins\target-avx.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-avx.ll | python bitcode2cpp.py builtins-avx.ll &gt; gen-bitcode-avx.cpp</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx.ll | python bitcode2cpp.py builtins\target-avx.ll &gt; gen-bitcode-avx.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-avx-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-avx.ll | python bitcode2cpp.py builtins-avx.ll &gt; gen-bitcode-avx.cpp</Command>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx.ll | python bitcode2cpp.py builtins\target-avx.ll &gt; gen-bitcode-avx.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-avx-common.ll</AdditionalInputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins-avx-x2.ll">
<CustomBuild Include="builtins\target-avx-x2.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-avx-x2.ll | python bitcode2cpp.py builtins-avx-x2.ll &gt; gen-bitcode-avx-x2.cpp</Command>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx-x2.ll | python bitcode2cpp.py builtins\target-avx-x2.ll &gt; gen-bitcode-avx-x2.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx-x2.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-avx-x2.ll | python bitcode2cpp.py builtins-avx-x2.ll &gt; gen-bitcode-avx-x2.cpp</Command>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx-x2.ll | python bitcode2cpp.py builtins\target-avx-x2.ll &gt; gen-bitcode-avx-x2.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx-x2.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse.ll</AdditionalInputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx-x2.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx-x2.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-generic-4.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll &gt; gen-bitcode-generic-4.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-4.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll &gt; gen-bitcode-generic-4.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-4.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-4.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-4.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-generic-8.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll &gt; gen-bitcode-generic-8.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-8.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll &gt; gen-bitcode-generic-8.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-8.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-8.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-8.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="builtins\target-generic-16.ll">
<FileType>Document</FileType>
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll &gt; gen-bitcode-generic-16.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-16.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll &gt; gen-bitcode-generic-16.cpp</Command>
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-16.cpp</Outputs>
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-16.cpp</Message>
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-16.cpp</Message>
</CustomBuild>
</ItemGroup>
<ItemGroup>
<CustomBuild Include="lex.ll">
<FileType>Document</FileType>

View File

@@ -105,11 +105,14 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
LLVMTypes::DoublePointerType = llvm::PointerType::get(LLVMTypes::DoubleType, 0);
// Note that both the mask and bool vectors are vector of int32s
// (not i1s). LLVM ends up generating much better SSE code with
// this representation.
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.vectorWidth);
if (target.maskBitCount == 1)
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
else {
assert(target.maskBitCount == 32);
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.vectorWidth);
}
LLVMTypes::Int1VectorType =
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
@@ -141,7 +144,11 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
std::vector<llvm::Constant *> maskOnes;
llvm::Constant *onMask = NULL;
onMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), -1,
if (target.maskBitCount == 1)
onMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 1,
false /*unsigned*/); // 0x1
else
onMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), -1,
true /*signed*/); // 0xffffffff
for (int i = 0; i < target.vectorWidth; ++i)
@@ -150,8 +157,12 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
std::vector<llvm::Constant *> maskZeros;
llvm::Constant *offMask = NULL;
offMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0,
true /*signed*/);
if (target.maskBitCount == 1)
offMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 0,
true /*signed*/);
else
offMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0,
true /*signed*/);
for (int i = 0; i < target.vectorWidth; ++i)
maskZeros.push_back(offMask);

View File

@@ -1158,22 +1158,14 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre
opts.addMacroDef("PI=3.1415926535");
// Add #define for current compilation target
switch (g->target.isa) {
case Target::SSE2:
opts.addMacroDef("ISPC_TARGET_SSE2");
break;
case Target::SSE4:
opts.addMacroDef("ISPC_TARGET_SSE4");
break;
case Target::AVX:
opts.addMacroDef("ISPC_TARGET_AVX");
break;
case Target::AVX2:
opts.addMacroDef("ISPC_TARGET_AVX2");
break;
default:
FATAL("Unhandled target ISA in preprocessor symbol definition");
char targetMacro[128];
sprintf(targetMacro, "ISPC_TARGET_%s", g->target.GetISAString());
char *p = targetMacro;
while (*p) {
*p = toupper(*p);
++p;
}
opts.addMacroDef(targetMacro);
if (g->target.is32Bit)
opts.addMacroDef("ISPC_POINTER_SIZE=32");

View File

@@ -2444,7 +2444,7 @@ MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
int count = sizeof(names) / sizeof(names[0]);
for (int i = 0; i < count; ++i) {
llvm::Function *f = m->module->getFunction(names[i]);
if (f != NULL) {
if (f != NULL && f->empty() == false) {
f->setLinkage(llvm::GlobalValue::InternalLinkage);
modifiedAny = true;
}

View File

@@ -1605,7 +1605,8 @@ lAddFunctionParams(Declarator *decl) {
/** Add a symbol for the built-in mask variable to the symbol table */
static void lAddMaskToSymbolTable(SourcePos pos) {
const Type *t = AtomicType::VaryingConstUInt32;
const Type *t = g->target.isa == Target::GENERIC ?
AtomicType::VaryingConstBool : AtomicType::VaryingConstUInt32;
Symbol *maskSymbol = new Symbol("__mask", pos, t);
m->symbolTable->AddVariable(maskSymbol);
}

View File

@@ -38,6 +38,14 @@
ispc code
*/
#ifdef ISPC_TARGET_GENERIC
#define IntMaskType bool
#define UIntMaskType bool
#else
#define IntMaskType int32
#define UIntMaskType unsigned int32
#endif
///////////////////////////////////////////////////////////////////////////
// Low level primitives
@@ -274,13 +282,21 @@ static inline int32 sign_extend(bool v) {
static inline uniform bool any(bool v) {
// We only care about whether "any" is true for the active program instances,
// so we have to make v with the current program mask.
#ifdef ISPC_TARGET_GENERIC
return __movmsk(v & __mask) != 0;
#else
return __movmsk(__sext_varying_bool(v) & __mask) != 0;
#endif
}
static inline uniform bool all(bool v) {
// As with any(), we need to explicitly mask v with the current program mask
// so we're only looking at the current lanes
#ifdef ISPC_TARGET_GENERIC
bool match = ((v & __mask) == __mask);
#else
int32 match = __sext_varying_bool((__sext_varying_bool(v) & __mask) == __mask);
#endif
return __movmsk(match) == (1 << programCount) - 1;
}
@@ -308,7 +324,11 @@ static inline int popcnt(int64 v) {
static inline uniform int popcnt(bool v) {
// As with any() and all(), only count across the active lanes
#ifdef ISPC_TARGET_GENERIC
return __popcnt_int32(__movmsk(v & __mask));
#else
return __popcnt_int32(__movmsk(__sext_varying_bool(v) & __mask));
#endif
}
static inline uniform int lanemask() {
@@ -672,19 +692,19 @@ static inline uniform bool reduce_equal(TYPE v, uniform TYPE * uniform value) {
return __reduce_equal_##FUNCTYPE(v, value, (MASKTYPE)__mask); \
}
REDUCE_EQUAL(int32, int32, int32)
REDUCE_EQUAL(unsigned int32, int32, unsigned int32)
REDUCE_EQUAL(float, float, int32)
REDUCE_EQUAL(int64, int64, int32)
REDUCE_EQUAL(unsigned int64, int64, unsigned int32)
REDUCE_EQUAL(double, double, int32)
REDUCE_EQUAL(int32, int32, IntMaskType)
REDUCE_EQUAL(unsigned int32, int32, UIntMaskType)
REDUCE_EQUAL(float, float, IntMaskType)
REDUCE_EQUAL(int64, int64, IntMaskType)
REDUCE_EQUAL(unsigned int64, int64, UIntMaskType)
REDUCE_EQUAL(double, double, IntMaskType)
static int32 exclusive_scan_add(int32 v) {
return __exclusive_scan_add_i32(v, (int32)__mask);
return __exclusive_scan_add_i32(v, (IntMaskType)__mask);
}
static unsigned int32 exclusive_scan_add(unsigned int32 v) {
return __exclusive_scan_add_i32(v, __mask);
return __exclusive_scan_add_i32((int32)v, (IntMaskType)__mask);
}
static float exclusive_scan_add(float v) {
@@ -692,11 +712,11 @@ static float exclusive_scan_add(float v) {
}
static int64 exclusive_scan_add(int64 v) {
return __exclusive_scan_add_i64(v, (int32)__mask);
return __exclusive_scan_add_i64(v, (IntMaskType)__mask);
}
static unsigned int64 exclusive_scan_add(unsigned int64 v) {
return __exclusive_scan_add_i64(v, __mask);
return __exclusive_scan_add_i64(v, (UIntMaskType)__mask);
}
static double exclusive_scan_add(double v) {
@@ -704,35 +724,35 @@ static double exclusive_scan_add(double v) {
}
static int32 exclusive_scan_and(int32 v) {
return __exclusive_scan_and_i32(v, (int32)__mask);
return __exclusive_scan_and_i32(v, (IntMaskType)__mask);
}
static unsigned int32 exclusive_scan_and(unsigned int32 v) {
return __exclusive_scan_and_i32(v, __mask);
return __exclusive_scan_and_i32(v, (UIntMaskType)__mask);
}
static int64 exclusive_scan_and(int64 v) {
return __exclusive_scan_and_i64(v, (int32)__mask);
return __exclusive_scan_and_i64(v, (IntMaskType)__mask);
}
static unsigned int64 exclusive_scan_and(unsigned int64 v) {
return __exclusive_scan_and_i64(v, __mask);
return __exclusive_scan_and_i64(v, (UIntMaskType)__mask);
}
static int32 exclusive_scan_or(int32 v) {
return __exclusive_scan_or_i32(v, (int32)__mask);
return __exclusive_scan_or_i32(v, (IntMaskType)__mask);
}
static unsigned int32 exclusive_scan_or(unsigned int32 v) {
return __exclusive_scan_or_i32(v, __mask);
return __exclusive_scan_or_i32(v, (UIntMaskType)__mask);
}
static int64 exclusive_scan_or(int64 v) {
return __exclusive_scan_or_i64(v, (int32)__mask);
return __exclusive_scan_or_i64(v, (IntMaskType)__mask);
}
static unsigned int64 exclusive_scan_or(unsigned int64 v) {
return __exclusive_scan_or_i64(v, __mask);
return __exclusive_scan_or_i64(v, (UIntMaskType)__mask);
}
///////////////////////////////////////////////////////////////////////////
@@ -741,23 +761,23 @@ static unsigned int64 exclusive_scan_or(unsigned int64 v) {
static inline uniform int
packed_load_active(uniform unsigned int * uniform a,
unsigned int * uniform vals) {
return __packed_load_active(a, vals, (unsigned int32)__mask);
return __packed_load_active(a, vals, (UIntMaskType)__mask);
}
static inline uniform int
packed_store_active(uniform unsigned int * uniform a,
unsigned int vals) {
return __packed_store_active(a, vals, (unsigned int32)__mask);
return __packed_store_active(a, vals, (UIntMaskType)__mask);
}
static inline uniform int
packed_load_active(uniform int * uniform a, int * uniform vals) {
return __packed_load_active(a, vals, (int32)__mask);
return __packed_load_active(a, vals, (IntMaskType)__mask);
}
static inline uniform int
packed_store_active(uniform int * uniform a, int vals) {
return __packed_store_active(a, vals, (int32)__mask);
return __packed_store_active(a, vals, (IntMaskType)__mask);
}
///////////////////////////////////////////////////////////////////////////
@@ -848,49 +868,49 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
return ret; \
}
DEFINE_ATOMIC_OP(int32,int32,add,add,int32)
DEFINE_ATOMIC_OP(int32,int32,subtract,sub,int32)
DEFINE_ATOMIC_MINMAX_OP(int32,int32,min,min,int32)
DEFINE_ATOMIC_MINMAX_OP(int32,int32,max,max,int32)
DEFINE_ATOMIC_OP(int32,int32,and,and,int32)
DEFINE_ATOMIC_OP(int32,int32,or,or,int32)
DEFINE_ATOMIC_OP(int32,int32,xor,xor,int32)
DEFINE_ATOMIC_OP(int32,int32,swap,swap,int32)
DEFINE_ATOMIC_OP(int32,int32,add,add,IntMaskType)
DEFINE_ATOMIC_OP(int32,int32,subtract,sub,IntMaskType)
DEFINE_ATOMIC_MINMAX_OP(int32,int32,min,min,IntMaskType)
DEFINE_ATOMIC_MINMAX_OP(int32,int32,max,max,IntMaskType)
DEFINE_ATOMIC_OP(int32,int32,and,and,IntMaskType)
DEFINE_ATOMIC_OP(int32,int32,or,or,IntMaskType)
DEFINE_ATOMIC_OP(int32,int32,xor,xor,IntMaskType)
DEFINE_ATOMIC_OP(int32,int32,swap,swap,IntMaskType)
// For everything but atomic min and max, we can use the same
// implementations for unsigned as for signed.
DEFINE_ATOMIC_OP(unsigned int32,int32,add,add,unsigned int32)
DEFINE_ATOMIC_OP(unsigned int32,int32,subtract,sub,unsigned int32)
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,min,umin,unsigned int32)
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,max,umax,unsigned int32)
DEFINE_ATOMIC_OP(unsigned int32,int32,and,and,unsigned int32)
DEFINE_ATOMIC_OP(unsigned int32,int32,or,or,unsigned int32)
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor,unsigned int32)
DEFINE_ATOMIC_OP(unsigned int32,int32,swap,swap,unsigned int32)
DEFINE_ATOMIC_OP(unsigned int32,int32,add,add,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int32,int32,subtract,sub,UIntMaskType)
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,min,umin,UIntMaskType)
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,max,umax,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int32,int32,and,and,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int32,int32,or,or,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int32,int32,swap,swap,UIntMaskType)
DEFINE_ATOMIC_OP(float,float,swap,swap,int32)
DEFINE_ATOMIC_OP(float,float,swap,swap,IntMaskType)
DEFINE_ATOMIC_OP(int64,int64,add,add,int32)
DEFINE_ATOMIC_OP(int64,int64,subtract,sub,int32)
DEFINE_ATOMIC_MINMAX_OP(int64,int64,min,min,int32)
DEFINE_ATOMIC_MINMAX_OP(int64,int64,max,max,int32)
DEFINE_ATOMIC_OP(int64,int64,and,and,int32)
DEFINE_ATOMIC_OP(int64,int64,or,or,int32)
DEFINE_ATOMIC_OP(int64,int64,xor,xor,int32)
DEFINE_ATOMIC_OP(int64,int64,swap,swap,int32)
DEFINE_ATOMIC_OP(int64,int64,add,add,IntMaskType)
DEFINE_ATOMIC_OP(int64,int64,subtract,sub,IntMaskType)
DEFINE_ATOMIC_MINMAX_OP(int64,int64,min,min,IntMaskType)
DEFINE_ATOMIC_MINMAX_OP(int64,int64,max,max,IntMaskType)
DEFINE_ATOMIC_OP(int64,int64,and,and,IntMaskType)
DEFINE_ATOMIC_OP(int64,int64,or,or,IntMaskType)
DEFINE_ATOMIC_OP(int64,int64,xor,xor,IntMaskType)
DEFINE_ATOMIC_OP(int64,int64,swap,swap,IntMaskType)
// For everything but atomic min and max, we can use the same
// implementations for unsigned as for signed.
DEFINE_ATOMIC_OP(unsigned int64,int64,add,add,unsigned int32)
DEFINE_ATOMIC_OP(unsigned int64,int64,subtract,sub,unsigned int32)
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,min,umin,unsigned int32)
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,max,umax,unsigned int32)
DEFINE_ATOMIC_OP(unsigned int64,int64,and,and,unsigned int32)
DEFINE_ATOMIC_OP(unsigned int64,int64,or,or,unsigned int32)
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor,unsigned int32)
DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap,unsigned int32)
DEFINE_ATOMIC_OP(unsigned int64,int64,add,add,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int64,int64,subtract,sub,UIntMaskType)
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,min,umin,UIntMaskType)
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,max,umax,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int64,int64,and,and,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int64,int64,or,or,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor,UIntMaskType)
DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap,UIntMaskType)
DEFINE_ATOMIC_OP(double,double,swap,swap,int32)
DEFINE_ATOMIC_OP(double,double,swap,swap,IntMaskType)
#undef DEFINE_ATOMIC_OP
@@ -913,12 +933,12 @@ static inline uniform TA atomic_compare_exchange_global( \
return ret; \
}
ATOMIC_DECL_CMPXCHG(int32, int32, int32)
ATOMIC_DECL_CMPXCHG(unsigned int32, int32, unsigned int32)
ATOMIC_DECL_CMPXCHG(float, float, int32)
ATOMIC_DECL_CMPXCHG(int64, int64, int32)
ATOMIC_DECL_CMPXCHG(unsigned int64, int64, unsigned int32)
ATOMIC_DECL_CMPXCHG(double, double, int32)
ATOMIC_DECL_CMPXCHG(int32, int32, IntMaskType)
ATOMIC_DECL_CMPXCHG(unsigned int32, int32, UIntMaskType)
ATOMIC_DECL_CMPXCHG(float, float, IntMaskType)
ATOMIC_DECL_CMPXCHG(int64, int64, IntMaskType)
ATOMIC_DECL_CMPXCHG(unsigned int64, int64, UIntMaskType)
ATOMIC_DECL_CMPXCHG(double, double, IntMaskType)
#undef ATOMIC_DECL_CMPXCHG

View File

@@ -2,7 +2,9 @@
import sys
print "char stdlib_code[] = { "
t=str(sys.argv[1])
print "char stdlib_" + t + "_code[] = { "
for line in sys.stdin:
for c in line:

View File

@@ -622,9 +622,6 @@ IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
/** Given an AST node, check to see if it's safe if we happen to run the
code for that node with the execution mask all off.
FIXME: this is actually a target-specific thing; for non SSE/AVX
targets with more complete masking support, some of this won't apply...
*/
static bool
lCheckAllOffSafety(ASTNode *node, void *data) {
@@ -648,6 +645,11 @@ lCheckAllOffSafety(ASTNode *node, void *data) {
return false;
}
if (g->target.allOffMaskIsSafe == true)
// Don't worry about memory accesses if we have a target that can
// safely run them with the mask all off
return true;
IndexExpr *ie;
if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL && ie->baseExpr != NULL) {
const Type *type = ie->baseExpr->GetType();