Add "generic" 4, 8, and 16-wide targets.
When used, these targets end up with calls to undefined functions for all of the various special vector stuff ispc needs to compile ispc programs (masked store, gather, min/max, sqrt, etc.). These targets are not yet useful for anything, but are a step toward having an option to C++ code with calls out to intrinsics. Reorganized the directory structure a bit and put the LLVM bitcode used to define target-specific stuff (as well as some generic built-ins stuff) into a builtins/ directory. Note that for building on Windows, it's now necessary to set a LLVM_VERSION environment variable (with values like LLVM_2_9, LLVM_3_0, LLVM_3_1svn, etc.)
This commit is contained in:
64
Makefile
64
Makefile
@@ -62,14 +62,17 @@ CXX_SRC=ast.cpp builtins.cpp ctx.cpp decl.cpp expr.cpp func.cpp ispc.cpp \
|
|||||||
util.cpp
|
util.cpp
|
||||||
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
|
||||||
opt.h stmt.h sym.h type.h util.h
|
opt.h stmt.h sym.h type.h util.h
|
||||||
BUILTINS_SRC=builtins-avx.ll builtins-avx-x2.ll builtins-sse2.ll builtins-sse2-x2.ll \
|
TARGETS=avx avx-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 generic-16
|
||||||
builtins-sse4.ll builtins-sse4-x2.ll builtins-dispatch.ll
|
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
|
||||||
|
builtins/dispatch.ll
|
||||||
|
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
|
||||||
|
builtins-c-32.cpp builtins-c-64.cpp
|
||||||
BISON_SRC=parse.yy
|
BISON_SRC=parse.yy
|
||||||
FLEX_SRC=lex.ll
|
FLEX_SRC=lex.ll
|
||||||
|
|
||||||
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_SRC:.ll=.o) \
|
OBJS=$(addprefix objs/, $(CXX_SRC:.cpp=.o) $(BUILTINS_OBJS) \
|
||||||
builtins-c-32.o builtins-c-64.o stdlib_ispc.o $(BISON_SRC:.yy=.o) \
|
stdlib_generic_ispc.o stdlib_x86_ispc.o \
|
||||||
$(FLEX_SRC:.ll=.o))
|
$(BISON_SRC:.yy=.o) $(FLEX_SRC:.ll=.o))
|
||||||
|
|
||||||
default: ispc
|
default: ispc
|
||||||
|
|
||||||
@@ -104,6 +107,10 @@ objs/%.o: %.cpp
|
|||||||
@echo Compiling $<
|
@echo Compiling $<
|
||||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||||
|
|
||||||
|
objs/%.o: objs/%.cpp
|
||||||
|
@echo Compiling $<
|
||||||
|
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||||
|
|
||||||
objs/parse.cc: parse.yy
|
objs/parse.cc: parse.yy
|
||||||
@echo Running bison on $<
|
@echo Running bison on $<
|
||||||
@$(YACC) -o $@ $<
|
@$(YACC) -o $@ $<
|
||||||
@@ -120,41 +127,24 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
|
|||||||
@echo Compiling $<
|
@echo Compiling $<
|
||||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||||
|
|
||||||
objs/builtins-%.cpp: builtins-%.ll
|
objs/builtins-%.cpp: builtins/%.ll builtins/util.m4 $(wildcard builtins/*common.ll)
|
||||||
@echo Creating C++ source from builtin definitions file $<
|
|
||||||
@m4 -DLLVM_VERSION=$(LLVM_VERSION) builtins.m4 $< | ./bitcode2cpp.py $< > $@
|
|
||||||
|
|
||||||
objs/builtins-%.o: objs/builtins-%.cpp
|
|
||||||
@echo Compiling $<
|
|
||||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
objs/builtins-c-32.cpp: builtins-c.c
|
|
||||||
@echo Creating C++ source from builtins definition file $<
|
@echo Creating C++ source from builtins definition file $<
|
||||||
@$(CLANG) -m32 -emit-llvm -c $< -o - | llvm-dis - | ./bitcode2cpp.py builtins-c-32.c > $@
|
@m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) $< | ./bitcode2cpp.py $< > $@
|
||||||
|
|
||||||
objs/builtins-c-32.o: objs/builtins-c-32.cpp
|
objs/builtins-c-32.cpp: builtins/builtins.c
|
||||||
@echo Compiling $<
|
|
||||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
|
||||||
|
|
||||||
objs/builtins-c-64.cpp: builtins-c.c
|
|
||||||
@echo Creating C++ source from builtins definition file $<
|
@echo Creating C++ source from builtins definition file $<
|
||||||
@$(CLANG) -m64 -emit-llvm -c $< -o - | llvm-dis - | ./bitcode2cpp.py builtins-c-64.c > $@
|
@$(CLANG) -m32 -emit-llvm -c $< -o - | llvm-dis - | ./bitcode2cpp.py c-32 > $@
|
||||||
|
|
||||||
objs/builtins-c-64.o: objs/builtins-c-64.cpp
|
objs/builtins-c-64.cpp: builtins/builtins.c
|
||||||
@echo Compiling $<
|
@echo Creating C++ source from builtins definition file $<
|
||||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
@$(CLANG) -m64 -emit-llvm -c $< -o - | llvm-dis - | ./bitcode2cpp.py c-64 > $@
|
||||||
|
|
||||||
objs/stdlib_ispc.cpp: stdlib.ispc
|
objs/stdlib_generic_ispc.cpp: stdlib.ispc
|
||||||
@echo Creating C++ source from $<
|
@echo Creating C++ source from $< for generic
|
||||||
@$(CLANG) -E -x c -DISPC=1 -DPI=3.1415926536 $< -o - | ./stdlib2cpp.py > $@
|
@$(CLANG) -E -x c -DISPC_TARGET_GENERIC=1 -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||||
|
./stdlib2cpp.py generic > $@
|
||||||
|
|
||||||
objs/stdlib_ispc.o: objs/stdlib_ispc.cpp
|
objs/stdlib_x86_ispc.cpp: stdlib.ispc
|
||||||
@echo Compiling $<
|
@echo Creating C++ source from $< for x86
|
||||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
@$(CLANG) -E -x c -DISPC=1 -DPI=3.1415926536 $< -o - | \
|
||||||
|
./stdlib2cpp.py x86 > $@
|
||||||
objs/builtins-sse2.cpp: builtins.m4 builtins-sse2-common.ll builtins-sse2.ll
|
|
||||||
objs/builtins-sse2-x2.cpp: builtins.m4 builtins-sse2-common.ll builtins-sse2-x2.ll
|
|
||||||
objs/builtins-sse4.cpp: builtins.m4 builtins-sse4-common.ll builtins-sse4.ll
|
|
||||||
objs/builtins-sse4-x2.cpp: builtins.m4 builtins-sse4-common.ll builtins-sse4-x2.ll
|
|
||||||
objs/builtins-avx.cpp: builtins.m4 builtins-avx-common.ll builtins-avx.ll
|
|
||||||
objs/builtins-avx-x2.cpp: builtins.m4 builtins-avx-common.ll builtins-avx-x2.ll
|
|
||||||
|
|||||||
@@ -11,7 +11,8 @@ length=0
|
|||||||
|
|
||||||
src=str(sys.argv[1])
|
src=str(sys.argv[1])
|
||||||
|
|
||||||
target = re.sub(".*builtins-", "", src)
|
target = re.sub("builtins/target-", "", src)
|
||||||
|
target = re.sub("builtins/", "", target)
|
||||||
target = re.sub("\.ll$", "", target)
|
target = re.sub("\.ll$", "", target)
|
||||||
target = re.sub("\.c$", "", target)
|
target = re.sub("\.c$", "", target)
|
||||||
target = re.sub("-", "_", target)
|
target = re.sub("-", "_", target)
|
||||||
|
|||||||
51
builtins.cpp
51
builtins.cpp
@@ -99,6 +99,9 @@ lLLVMTypeToISPCType(const llvm::Type *t, bool intAsUnsigned) {
|
|||||||
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
|
return intAsUnsigned ? AtomicType::UniformUInt64 : AtomicType::UniformInt64;
|
||||||
|
|
||||||
// varying
|
// varying
|
||||||
|
if (LLVMTypes::MaskType != LLVMTypes::Int32VectorType &&
|
||||||
|
t == LLVMTypes::MaskType)
|
||||||
|
return AtomicType::VaryingBool;
|
||||||
else if (t == LLVMTypes::Int8VectorType)
|
else if (t == LLVMTypes::Int8VectorType)
|
||||||
return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
|
return intAsUnsigned ? AtomicType::VaryingUInt8 : AtomicType::VaryingInt8;
|
||||||
else if (t == LLVMTypes::Int16VectorType)
|
else if (t == LLVMTypes::Int16VectorType)
|
||||||
@@ -194,7 +197,7 @@ lCreateISPCSymbol(llvm::Function *func, SymbolTable *symbolTable) {
|
|||||||
// symbol creation code below assumes that any LLVM vector of i32s is a
|
// symbol creation code below assumes that any LLVM vector of i32s is a
|
||||||
// varying int32. Here, we need that to be interpreted as a varying
|
// varying int32. Here, we need that to be interpreted as a varying
|
||||||
// bool, so just have a one-off override for that one...
|
// bool, so just have a one-off override for that one...
|
||||||
if (name == "__sext_varying_bool") {
|
if (g->target.maskBitCount != 1 && name == "__sext_varying_bool") {
|
||||||
const Type *returnType = AtomicType::VaryingInt32;
|
const Type *returnType = AtomicType::VaryingInt32;
|
||||||
std::vector<const Type *> argTypes;
|
std::vector<const Type *> argTypes;
|
||||||
argTypes.push_back(AtomicType::VaryingBool);
|
argTypes.push_back(AtomicType::VaryingBool);
|
||||||
@@ -556,7 +559,7 @@ lSetInternalFunctions(llvm::Module *module) {
|
|||||||
int count = sizeof(names) / sizeof(names[0]);
|
int count = sizeof(names) / sizeof(names[0]);
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
llvm::Function *f = module->getFunction(names[i]);
|
llvm::Function *f = module->getFunction(names[i]);
|
||||||
if (f != NULL)
|
if (f != NULL && f->empty() == false)
|
||||||
f->setLinkage(llvm::GlobalValue::InternalLinkage);
|
f->setLinkage(llvm::GlobalValue::InternalLinkage);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -744,6 +747,33 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
FATAL("logic error in DefineStdlib");
|
FATAL("logic error in DefineStdlib");
|
||||||
}
|
}
|
||||||
break;
|
break;
|
||||||
|
case Target::GENERIC:
|
||||||
|
switch (g->target.vectorWidth) {
|
||||||
|
case 4:
|
||||||
|
extern unsigned char builtins_bitcode_generic_4[];
|
||||||
|
extern int builtins_bitcode_generic_4_length;
|
||||||
|
AddBitcodeToModule(builtins_bitcode_generic_4,
|
||||||
|
builtins_bitcode_generic_4_length,
|
||||||
|
module, symbolTable);
|
||||||
|
break;
|
||||||
|
case 8:
|
||||||
|
extern unsigned char builtins_bitcode_generic_8[];
|
||||||
|
extern int builtins_bitcode_generic_8_length;
|
||||||
|
AddBitcodeToModule(builtins_bitcode_generic_8,
|
||||||
|
builtins_bitcode_generic_8_length,
|
||||||
|
module, symbolTable);
|
||||||
|
break;
|
||||||
|
case 16:
|
||||||
|
extern unsigned char builtins_bitcode_generic_16[];
|
||||||
|
extern int builtins_bitcode_generic_16_length;
|
||||||
|
AddBitcodeToModule(builtins_bitcode_generic_16,
|
||||||
|
builtins_bitcode_generic_16_length,
|
||||||
|
module, symbolTable);
|
||||||
|
break;
|
||||||
|
default:
|
||||||
|
FATAL("logic error in DefineStdlib");
|
||||||
|
}
|
||||||
|
break;
|
||||||
default:
|
default:
|
||||||
FATAL("logic error");
|
FATAL("logic error");
|
||||||
}
|
}
|
||||||
@@ -771,11 +801,16 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
|
|||||||
if (includeStdlibISPC) {
|
if (includeStdlibISPC) {
|
||||||
// If the user wants the standard library to be included, parse the
|
// If the user wants the standard library to be included, parse the
|
||||||
// serialized version of the stdlib.ispc file to get its
|
// serialized version of the stdlib.ispc file to get its
|
||||||
// definitions added. Disable emission of performance warnings for
|
// definitions added.
|
||||||
// now, since the user doesn't care about any of that in the stdlib
|
if (g->target.isa == Target::GENERIC) {
|
||||||
// implementation...
|
extern char stdlib_generic_code[];
|
||||||
extern char stdlib_code[];
|
yy_scan_string(stdlib_generic_code);
|
||||||
yy_scan_string(stdlib_code);
|
yyparse();
|
||||||
yyparse();
|
}
|
||||||
|
else {
|
||||||
|
extern char stdlib_x86_code[];
|
||||||
|
yy_scan_string(stdlib_x86_code);
|
||||||
|
yyparse();
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|||||||
@@ -32,6 +32,9 @@
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; AVX target implementation.
|
;; AVX target implementation.
|
||||||
|
|
||||||
|
ctlztz()
|
||||||
|
define_prefetches()
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rcp
|
;; rcp
|
||||||
|
|
||||||
@@ -32,12 +32,16 @@
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; Basic 16-wide definitions
|
;; Basic 16-wide definitions
|
||||||
|
|
||||||
stdlib_core(16)
|
define(`WIDTH',`16')
|
||||||
packed_load_and_store(16)
|
define(`MASK',`i32')
|
||||||
scans(16)
|
include(`util.m4')
|
||||||
int64minmax(16)
|
|
||||||
|
|
||||||
include(`builtins-avx-common.ll')
|
stdlib_core()
|
||||||
|
packed_load_and_store()
|
||||||
|
scans()
|
||||||
|
int64minmax()
|
||||||
|
|
||||||
|
include(`target-avx-common.ll')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rcp
|
;; rcp
|
||||||
@@ -32,12 +32,16 @@
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; Basic 8-wide definitions
|
;; Basic 8-wide definitions
|
||||||
|
|
||||||
stdlib_core(8)
|
define(`WIDTH',`8')
|
||||||
packed_load_and_store(8)
|
define(`MASK',`i32')
|
||||||
scans(8)
|
include(`util.m4')
|
||||||
int64minmax(8)
|
|
||||||
|
|
||||||
include(`builtins-avx-common.ll')
|
stdlib_core()
|
||||||
|
packed_load_and_store()
|
||||||
|
scans()
|
||||||
|
int64minmax()
|
||||||
|
|
||||||
|
include(`target-avx-common.ll')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rcp
|
;; rcp
|
||||||
34
builtins/target-generic-16.ll
Normal file
34
builtins/target-generic-16.ll
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
;; Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
;; All rights reserved.
|
||||||
|
;;
|
||||||
|
;; Redistribution and use in source and binary forms, with or without
|
||||||
|
;; modification, are permitted provided that the following conditions are
|
||||||
|
;; met:
|
||||||
|
;;
|
||||||
|
;; * Redistributions of source code must retain the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer.
|
||||||
|
;;
|
||||||
|
;; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer in the
|
||||||
|
;; documentation and/or other materials provided with the distribution.
|
||||||
|
;;
|
||||||
|
;; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
;; contributors may be used to endorse or promote products derived from
|
||||||
|
;; this software without specific prior written permission.
|
||||||
|
;;
|
||||||
|
;;
|
||||||
|
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
define(`WIDTH',`16')
|
||||||
|
include(`target-generic-common.ll')
|
||||||
|
|
||||||
34
builtins/target-generic-4.ll
Normal file
34
builtins/target-generic-4.ll
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
;; Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
;; All rights reserved.
|
||||||
|
;;
|
||||||
|
;; Redistribution and use in source and binary forms, with or without
|
||||||
|
;; modification, are permitted provided that the following conditions are
|
||||||
|
;; met:
|
||||||
|
;;
|
||||||
|
;; * Redistributions of source code must retain the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer.
|
||||||
|
;;
|
||||||
|
;; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer in the
|
||||||
|
;; documentation and/or other materials provided with the distribution.
|
||||||
|
;;
|
||||||
|
;; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
;; contributors may be used to endorse or promote products derived from
|
||||||
|
;; this software without specific prior written permission.
|
||||||
|
;;
|
||||||
|
;;
|
||||||
|
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
define(`WIDTH',`4')
|
||||||
|
include(`target-generic-common.ll')
|
||||||
|
|
||||||
34
builtins/target-generic-8.ll
Normal file
34
builtins/target-generic-8.ll
Normal file
@@ -0,0 +1,34 @@
|
|||||||
|
;; Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
;; All rights reserved.
|
||||||
|
;;
|
||||||
|
;; Redistribution and use in source and binary forms, with or without
|
||||||
|
;; modification, are permitted provided that the following conditions are
|
||||||
|
;; met:
|
||||||
|
;;
|
||||||
|
;; * Redistributions of source code must retain the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer.
|
||||||
|
;;
|
||||||
|
;; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer in the
|
||||||
|
;; documentation and/or other materials provided with the distribution.
|
||||||
|
;;
|
||||||
|
;; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
;; contributors may be used to endorse or promote products derived from
|
||||||
|
;; this software without specific prior written permission.
|
||||||
|
;;
|
||||||
|
;;
|
||||||
|
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
define(`WIDTH',`8')
|
||||||
|
include(`target-generic-common.ll')
|
||||||
|
|
||||||
277
builtins/target-generic-common.ll
Normal file
277
builtins/target-generic-common.ll
Normal file
@@ -0,0 +1,277 @@
|
|||||||
|
;; Copyright (c) 2010-2011, Intel Corporation
|
||||||
|
;; All rights reserved.
|
||||||
|
;;
|
||||||
|
;; Redistribution and use in source and binary forms, with or without
|
||||||
|
;; modification, are permitted provided that the following conditions are
|
||||||
|
;; met:
|
||||||
|
;;
|
||||||
|
;; * Redistributions of source code must retain the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer.
|
||||||
|
;;
|
||||||
|
;; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer in the
|
||||||
|
;; documentation and/or other materials provided with the distribution.
|
||||||
|
;;
|
||||||
|
;; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
;; contributors may be used to endorse or promote products derived from
|
||||||
|
;; this software without specific prior written permission.
|
||||||
|
;;
|
||||||
|
;;
|
||||||
|
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
define(`MASK',`i1')
|
||||||
|
include(`util.m4')
|
||||||
|
|
||||||
|
stdlib_core()
|
||||||
|
|
||||||
|
scans()
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; math
|
||||||
|
|
||||||
|
declare void @__fastmath() nounwind
|
||||||
|
|
||||||
|
;; round/floor/ceil
|
||||||
|
|
||||||
|
declare float @__round_uniform_float(float) nounwind readnone
|
||||||
|
declare float @__floor_uniform_float(float) nounwind readnone
|
||||||
|
declare float @__ceil_uniform_float(float) nounwind readnone
|
||||||
|
|
||||||
|
declare double @__round_uniform_double(double) nounwind readnone
|
||||||
|
declare double @__floor_uniform_double(double) nounwind readnone
|
||||||
|
declare double @__ceil_uniform_double(double) nounwind readnone
|
||||||
|
|
||||||
|
declare <WIDTH x float> @__round_varying_float(<WIDTH x float>) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__floor_varying_float(<WIDTH x float>) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__ceil_varying_float(<WIDTH x float>) nounwind readnone
|
||||||
|
declare <WIDTH x double> @__round_varying_double(<WIDTH x double>) nounwind readnone
|
||||||
|
declare <WIDTH x double> @__floor_varying_double(<WIDTH x double>) nounwind readnone
|
||||||
|
declare <WIDTH x double> @__ceil_varying_double(<WIDTH x double>) nounwind readnone
|
||||||
|
|
||||||
|
;; min/max
|
||||||
|
|
||||||
|
declare float @__max_uniform_float(float, float) nounwind readnone
|
||||||
|
declare float @__min_uniform_float(float, float) nounwind readnone
|
||||||
|
declare i32 @__min_uniform_int32(i32, i32) nounwind readnone
|
||||||
|
declare i32 @__max_uniform_int32(i32, i32) nounwind readnone
|
||||||
|
declare i32 @__min_uniform_uint32(i32, i32) nounwind readnone
|
||||||
|
declare i32 @__max_uniform_uint32(i32, i32) nounwind readnone
|
||||||
|
declare i64 @__min_uniform_int64(i64, i64) nounwind readnone
|
||||||
|
declare i64 @__max_uniform_int64(i64, i64) nounwind readnone
|
||||||
|
declare i64 @__min_uniform_uint64(i64, i64) nounwind readnone
|
||||||
|
declare i64 @__max_uniform_uint64(i64, i64) nounwind readnone
|
||||||
|
declare double @__min_uniform_double(double, double) nounwind readnone
|
||||||
|
declare double @__max_uniform_double(double, double) nounwind readnone
|
||||||
|
|
||||||
|
declare <WIDTH x float> @__max_varying_float(<WIDTH x float>,
|
||||||
|
<WIDTH x float>) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__min_varying_float(<WIDTH x float>,
|
||||||
|
<WIDTH x float>) nounwind readnone
|
||||||
|
declare <WIDTH x i32> @__min_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
|
||||||
|
declare <WIDTH x i32> @__max_varying_int32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
|
||||||
|
declare <WIDTH x i32> @__min_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
|
||||||
|
declare <WIDTH x i32> @__max_varying_uint32(<WIDTH x i32>, <WIDTH x i32>) nounwind readnone
|
||||||
|
declare <WIDTH x i64> @__min_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
|
||||||
|
declare <WIDTH x i64> @__max_varying_int64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
|
||||||
|
declare <WIDTH x i64> @__min_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
|
||||||
|
declare <WIDTH x i64> @__max_varying_uint64(<WIDTH x i64>, <WIDTH x i64>) nounwind readnone
|
||||||
|
declare <WIDTH x double> @__min_varying_double(<WIDTH x double>,
|
||||||
|
<WIDTH x double>) nounwind readnone
|
||||||
|
declare <WIDTH x double> @__max_varying_double(<WIDTH x double>,
|
||||||
|
<WIDTH x double>) nounwind readnone
|
||||||
|
|
||||||
|
;; sqrt/rsqrt/rcp
|
||||||
|
|
||||||
|
declare float @__rsqrt_uniform_float(float) nounwind readnone
|
||||||
|
declare float @__rcp_uniform_float(float) nounwind readnone
|
||||||
|
declare float @__sqrt_uniform_float(float) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__rcp_varying_float(<WIDTH x float>) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__rsqrt_varying_float(<WIDTH x float> %v) nounwind readnone
|
||||||
|
declare <WIDTH x float> @__sqrt_varying_float(<WIDTH x float>) nounwind readnone
|
||||||
|
|
||||||
|
declare double @__sqrt_uniform_double(double) nounwind readnone
|
||||||
|
declare <WIDTH x double> @__sqrt_varying_double(<WIDTH x double>) nounwind readnone
|
||||||
|
|
||||||
|
;; bit ops
|
||||||
|
|
||||||
|
declare i32 @__popcnt_int32(i32) nounwind readnone
|
||||||
|
declare i64 @__popcnt_int64(i64) nounwind readnone
|
||||||
|
|
||||||
|
declare i32 @__count_trailing_zeros_i32(i32) nounwind readnone
|
||||||
|
declare i64 @__count_trailing_zeros_i64(i64) nounwind readnone
|
||||||
|
declare i32 @__count_leading_zeros_i32(i32) nounwind readnone
|
||||||
|
declare i64 @__count_leading_zeros_i64(i64) nounwind readnone
|
||||||
|
|
||||||
|
;; svml
|
||||||
|
|
||||||
|
; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
|
||||||
|
; or, use the macro to call the 4-wide ones twice with our 8-wide
|
||||||
|
; vectors...
|
||||||
|
|
||||||
|
declare <WIDTH x float> @__svml_sin(<WIDTH x float>)
|
||||||
|
declare <WIDTH x float> @__svml_cos(<WIDTH x float>)
|
||||||
|
declare void @__svml_sincos(<WIDTH x float>, <WIDTH x float> *, <WIDTH x float> *)
|
||||||
|
declare <WIDTH x float> @__svml_tan(<WIDTH x float>)
|
||||||
|
declare <WIDTH x float> @__svml_atan(<WIDTH x float>)
|
||||||
|
declare <WIDTH x float> @__svml_atan2(<WIDTH x float>, <WIDTH x float>)
|
||||||
|
declare <WIDTH x float> @__svml_exp(<WIDTH x float>)
|
||||||
|
declare <WIDTH x float> @__svml_log(<WIDTH x float>)
|
||||||
|
declare <WIDTH x float> @__svml_pow(<WIDTH x float>, <WIDTH x float>)
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
|
;; reductions
|
||||||
|
|
||||||
|
declare i32 @__movmsk(<WIDTH x i1>) nounwind readnone
|
||||||
|
|
||||||
|
declare float @__reduce_add_float(<WIDTH x float>) nounwind readnone
|
||||||
|
declare float @__reduce_min_float(<WIDTH x float>) nounwind readnone
|
||||||
|
declare float @__reduce_max_float(<WIDTH x float>) nounwind readnone
|
||||||
|
|
||||||
|
declare i32 @__reduce_add_int32(<WIDTH x i32>) nounwind readnone
|
||||||
|
declare i32 @__reduce_min_int32(<WIDTH x i32>) nounwind readnone
|
||||||
|
declare i32 @__reduce_max_int32(<WIDTH x i32>) nounwind readnone
|
||||||
|
|
||||||
|
declare i32 @__reduce_add_uint32(<WIDTH x i32> %v) nounwind readnone
|
||||||
|
declare i32 @__reduce_min_uint32(<WIDTH x i32>) nounwind readnone
|
||||||
|
declare i32 @__reduce_max_uint32(<WIDTH x i32>) nounwind readnone
|
||||||
|
|
||||||
|
declare double @__reduce_add_double(<WIDTH x double>) nounwind readnone
|
||||||
|
declare double @__reduce_min_double(<WIDTH x double>) nounwind readnone
|
||||||
|
declare double @__reduce_max_double(<WIDTH x double>) nounwind readnone
|
||||||
|
|
||||||
|
declare i64 @__reduce_add_int64(<WIDTH x i64>) nounwind readnone
|
||||||
|
declare i64 @__reduce_min_int64(<WIDTH x i64>) nounwind readnone
|
||||||
|
declare i64 @__reduce_max_int64(<WIDTH x i64>) nounwind readnone
|
||||||
|
|
||||||
|
declare i64 @__reduce_add_uint64(<WIDTH x i64> %v) nounwind readnone
|
||||||
|
declare i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone
|
||||||
|
declare i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone
|
||||||
|
|
||||||
|
declare i1 @__reduce_equal_int32(<WIDTH x i32> %v, i32 * nocapture %samevalue,
|
||||||
|
<WIDTH x i1> %mask) nounwind
|
||||||
|
declare i1 @__reduce_equal_float(<WIDTH x float> %v, float * nocapture %samevalue,
|
||||||
|
<WIDTH x i1> %mask) nounwind
|
||||||
|
declare i1 @__reduce_equal_int64(<WIDTH x i64> %v, i64 * nocapture %samevalue,
|
||||||
|
<WIDTH x i1> %mask) nounwind
|
||||||
|
declare i1 @__reduce_equal_double(<WIDTH x double> %v, double * nocapture %samevalue,
|
||||||
|
<WIDTH x i1> %mask) nounwind
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; unaligned loads/loads+broadcasts
|
||||||
|
|
||||||
|
load_and_broadcast(WIDTH, i8, 8)
|
||||||
|
load_and_broadcast(WIDTH, i16, 16)
|
||||||
|
load_and_broadcast(WIDTH, i32, 32)
|
||||||
|
load_and_broadcast(WIDTH, i64, 64)
|
||||||
|
|
||||||
|
declare <WIDTH x i8> @__load_masked_8(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||||
|
declare <WIDTH x i16> @__load_masked_16(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||||
|
declare <WIDTH x i32> @__load_masked_32(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||||
|
declare <WIDTH x i64> @__load_masked_64(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
|
||||||
|
|
||||||
|
declare void @__masked_store_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
|
||||||
|
<WIDTH x i1>) nounwind
|
||||||
|
declare void @__masked_store_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
|
||||||
|
<WIDTH x i1>) nounwind
|
||||||
|
declare void @__masked_store_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
|
||||||
|
<WIDTH x i1>) nounwind
|
||||||
|
declare void @__masked_store_64(<WIDTH x i64>* nocapture, <WIDTH x i64>,
|
||||||
|
<WIDTH x i1> %mask) nounwind
|
||||||
|
|
||||||
|
ifelse(LLVM_VERSION,LLVM_3_1svn,`
|
||||||
|
define void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
|
||||||
|
<WIDTH x i1>) nounwind {
|
||||||
|
%v = load <WIDTH x i8> * %0
|
||||||
|
%v1 = select <WIDTH x i1> %2, <WIDTH x i8> %1, <WIDTH x i8> %v
|
||||||
|
store <WIDTH x i8> %v1, <WIDTH x i8> * %0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @__masked_store_blend_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
|
||||||
|
<WIDTH x i1>) nounwind {
|
||||||
|
%v = load <WIDTH x i16> * %0
|
||||||
|
%v1 = select <WIDTH x i1> %2, <WIDTH x i16> %1, <WIDTH x i16> %v
|
||||||
|
store <WIDTH x i16> %v1, <WIDTH x i16> * %0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @__masked_store_blend_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
|
||||||
|
<WIDTH x i1>) nounwind {
|
||||||
|
%v = load <WIDTH x i32> * %0
|
||||||
|
%v1 = select <WIDTH x i1> %2, <WIDTH x i32> %1, <WIDTH x i32> %v
|
||||||
|
store <WIDTH x i32> %v1, <WIDTH x i32> * %0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @__masked_store_blend_64(<WIDTH x i64>* nocapture,
|
||||||
|
<WIDTH x i64>, <WIDTH x i1>) nounwind {
|
||||||
|
%v = load <WIDTH x i64> * %0
|
||||||
|
%v1 = select <WIDTH x i1> %2, <WIDTH x i64> %1, <WIDTH x i64> %v
|
||||||
|
store <WIDTH x i64> %v1, <WIDTH x i64> * %0
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
',`
|
||||||
|
declare void @__masked_store_blend_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
|
||||||
|
<WIDTH x i1>) nounwind
|
||||||
|
declare void @__masked_store_blend_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,
|
||||||
|
<WIDTH x i1>) nounwind
|
||||||
|
declare void @__masked_store_blend_32(<WIDTH x i32>* nocapture, <WIDTH x i32>,
|
||||||
|
<WIDTH x i1>) nounwind
|
||||||
|
declare void @__masked_store_blend_64(<WIDTH x i64>* nocapture %ptr,
|
||||||
|
<WIDTH x i64> %new,
|
||||||
|
<WIDTH x i1> %mask) nounwind
|
||||||
|
')
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; gather/scatter
|
||||||
|
|
||||||
|
define(`gather_scatter', `
|
||||||
|
declare <WIDTH x $1> @__gather_base_offsets32_$1(i8 * nocapture %ptr, <WIDTH x i32> %offsets,
|
||||||
|
i32 %offset_scale, <WIDTH x i1> %vecmask) nounwind readonly
|
||||||
|
declare <WIDTH x $1> @__gather_base_offsets64_$1(i8 * nocapture %ptr, <WIDTH x i64> %offsets,
|
||||||
|
i32 %offset_scale, <WIDTH x i1> %vecmask) nounwind readonly
|
||||||
|
declare <WIDTH x $1> @__gather32_$1(<WIDTH x i32> %ptrs,
|
||||||
|
<WIDTH x i1> %vecmask) nounwind readonly
|
||||||
|
declare <WIDTH x $1> @__gather64_$1(<WIDTH x i64> %ptrs,
|
||||||
|
<WIDTH x i1> %vecmask) nounwind readonly
|
||||||
|
|
||||||
|
declare void @__scatter_base_offsets32_$1(i8* nocapture %base, <WIDTH x i32> %offsets,
|
||||||
|
i32 %offset_scale, <WIDTH x $1> %values, <WIDTH x i1> %mask) nounwind
|
||||||
|
declare void @__scatter_base_offsets64_$1(i8* nocapture %base, <WIDTH x i64> %offsets,
|
||||||
|
i32 %offset_scale, <WIDTH x $1> %values, <WIDTH x i1> %mask) nounwind
|
||||||
|
declare void @__scatter32_$1(<WIDTH x i32> %ptrs, <WIDTH x $1> %values,
|
||||||
|
<WIDTH x i1> %mask) nounwind
|
||||||
|
declare void @__scatter64_$1(<WIDTH x i64> %ptrs, <WIDTH x $1> %values,
|
||||||
|
<WIDTH x i1> %mask) nounwind
|
||||||
|
')
|
||||||
|
|
||||||
|
gather_scatter(i8)
|
||||||
|
gather_scatter(i16)
|
||||||
|
gather_scatter(i32)
|
||||||
|
gather_scatter(i64)
|
||||||
|
|
||||||
|
declare i32 @__packed_load_active(i32 * nocapture %startptr, <WIDTH x i32> * nocapture %val_ptr,
|
||||||
|
<WIDTH x i1> %full_mask) nounwind
|
||||||
|
declare i32 @__packed_store_active(i32 * %startptr, <WIDTH x i32> %vals,
|
||||||
|
<WIDTH x i1> %full_mask) nounwind
|
||||||
|
|
||||||
|
|
||||||
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
;; prefetch
|
||||||
|
|
||||||
|
declare void @__prefetch_read_uniform_1(i8 *) nounwind readnone
|
||||||
|
declare void @__prefetch_read_uniform_2(i8 *) nounwind readnone
|
||||||
|
declare void @__prefetch_read_uniform_3(i8 *) nounwind readnone
|
||||||
|
declare void @__prefetch_read_uniform_nt(i8 *) nounwind readnone
|
||||||
|
|
||||||
@@ -29,6 +29,9 @@
|
|||||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
ctlztz()
|
||||||
|
define_prefetches()
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rcp
|
;; rcp
|
||||||
|
|
||||||
@@ -36,12 +36,16 @@
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; standard 8-wide definitions from m4 macros
|
;; standard 8-wide definitions from m4 macros
|
||||||
|
|
||||||
stdlib_core(8)
|
define(`WIDTH',`8')
|
||||||
packed_load_and_store(8)
|
define(`MASK',`i32')
|
||||||
scans(8)
|
include(`util.m4')
|
||||||
int64minmax(8)
|
|
||||||
|
|
||||||
include(`builtins-sse2-common.ll')
|
stdlib_core()
|
||||||
|
packed_load_and_store()
|
||||||
|
scans()
|
||||||
|
int64minmax()
|
||||||
|
|
||||||
|
include(`target-sse2-common.ll')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rcp
|
;; rcp
|
||||||
@@ -33,12 +33,16 @@
|
|||||||
;; Define the standard library builtins for the SSE2 target
|
;; Define the standard library builtins for the SSE2 target
|
||||||
|
|
||||||
; Define some basics for a 4-wide target
|
; Define some basics for a 4-wide target
|
||||||
stdlib_core(4)
|
define(`WIDTH',`4')
|
||||||
packed_load_and_store(4)
|
define(`MASK',`i32')
|
||||||
scans(4)
|
include(`util.m4')
|
||||||
int64minmax(4)
|
|
||||||
|
|
||||||
include(`builtins-sse2-common.ll')
|
stdlib_core()
|
||||||
|
packed_load_and_store()
|
||||||
|
scans()
|
||||||
|
int64minmax()
|
||||||
|
|
||||||
|
include(`target-sse2-common.ll')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rounding
|
;; rounding
|
||||||
@@ -29,6 +29,9 @@
|
|||||||
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
|
ctlztz()
|
||||||
|
define_prefetches()
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rounding floats
|
;; rounding floats
|
||||||
|
|
||||||
@@ -36,12 +36,16 @@
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; standard 8-wide definitions from m4 macros
|
;; standard 8-wide definitions from m4 macros
|
||||||
|
|
||||||
stdlib_core(8)
|
define(`WIDTH',`8')
|
||||||
packed_load_and_store(8)
|
define(`MASK',`i32')
|
||||||
scans(8)
|
include(`util.m4')
|
||||||
int64minmax(8)
|
|
||||||
|
|
||||||
include(`builtins-sse4-common.ll')
|
stdlib_core()
|
||||||
|
packed_load_and_store()
|
||||||
|
scans()
|
||||||
|
int64minmax()
|
||||||
|
|
||||||
|
include(`target-sse4-common.ll')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rcp
|
;; rcp
|
||||||
@@ -33,12 +33,16 @@
|
|||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
|
|
||||||
; Define common 4-wide stuff
|
; Define common 4-wide stuff
|
||||||
stdlib_core(4)
|
define(`WIDTH',`4')
|
||||||
packed_load_and_store(4)
|
define(`MASK',`i32')
|
||||||
scans(4)
|
include(`util.m4')
|
||||||
int64minmax(4)
|
|
||||||
|
|
||||||
include(`builtins-sse4-common.ll')
|
stdlib_core()
|
||||||
|
packed_load_and_store()
|
||||||
|
scans()
|
||||||
|
int64minmax()
|
||||||
|
|
||||||
|
include(`target-sse4-common.ll')
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; rcp
|
;; rcp
|
||||||
File diff suppressed because it is too large
Load Diff
10
ctx.cpp
10
ctx.cpp
@@ -875,8 +875,11 @@ FunctionEmitContext::LaneMask(llvm::Value *v) {
|
|||||||
// into an i32 value
|
// into an i32 value
|
||||||
std::vector<Symbol *> mm;
|
std::vector<Symbol *> mm;
|
||||||
m->symbolTable->LookupFunction("__movmsk", &mm);
|
m->symbolTable->LookupFunction("__movmsk", &mm);
|
||||||
// There should be one with signed int signature, one unsigned int.
|
if (g->target.maskBitCount == 1)
|
||||||
Assert(mm.size() == 2);
|
Assert(mm.size() == 1);
|
||||||
|
else
|
||||||
|
// There should be one with signed int signature, one unsigned int.
|
||||||
|
Assert(mm.size() == 2);
|
||||||
// We can actually call either one, since both are i32s as far as
|
// We can actually call either one, since both are i32s as far as
|
||||||
// LLVM's type system is concerned...
|
// LLVM's type system is concerned...
|
||||||
llvm::Function *fmm = mm[0]->function;
|
llvm::Function *fmm = mm[0]->function;
|
||||||
@@ -929,6 +932,9 @@ FunctionEmitContext::I1VecToBoolVec(llvm::Value *b) {
|
|||||||
return NULL;
|
return NULL;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (g->target.maskBitCount == 1)
|
||||||
|
return b;
|
||||||
|
|
||||||
LLVM_TYPE_CONST llvm::ArrayType *at =
|
LLVM_TYPE_CONST llvm::ArrayType *at =
|
||||||
llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(b->getType());
|
llvm::dyn_cast<LLVM_TYPE_CONST llvm::ArrayType>(b->getType());
|
||||||
if (at) {
|
if (at) {
|
||||||
|
|||||||
5
func.cpp
5
func.cpp
@@ -288,7 +288,10 @@ Function::emitCode(FunctionEmitContext *ctx, llvm::Function *function,
|
|||||||
bool checkMask = (type->isTask == true) ||
|
bool checkMask = (type->isTask == true) ||
|
||||||
((function->hasFnAttr(llvm::Attribute::AlwaysInline) == false) &&
|
((function->hasFnAttr(llvm::Attribute::AlwaysInline) == false) &&
|
||||||
costEstimate > CHECK_MASK_AT_FUNCTION_START_COST);
|
costEstimate > CHECK_MASK_AT_FUNCTION_START_COST);
|
||||||
if (checkMask && g->opt.disableCoherentControlFlow == false) {
|
checkMask &= (g->target.maskingIsFree == false);
|
||||||
|
checkMask &= (g->opt.disableCoherentControlFlow == false);
|
||||||
|
|
||||||
|
if (checkMask) {
|
||||||
llvm::Value *mask = ctx->GetFunctionMask();
|
llvm::Value *mask = ctx->GetFunctionMask();
|
||||||
llvm::Value *allOn = ctx->All(mask);
|
llvm::Value *allOn = ctx->All(mask);
|
||||||
llvm::BasicBlock *bbAllOn = ctx->CreateBasicBlock("all_on");
|
llvm::BasicBlock *bbAllOn = ctx->CreateBasicBlock("all_on");
|
||||||
|
|||||||
52
ispc.cpp
52
ispc.cpp
@@ -129,24 +129,60 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
|||||||
t->nativeVectorWidth = 4;
|
t->nativeVectorWidth = 4;
|
||||||
t->vectorWidth = 4;
|
t->vectorWidth = 4;
|
||||||
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
|
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
|
||||||
|
t->maskingIsFree = false;
|
||||||
|
t->allOffMaskIsSafe = false;
|
||||||
|
t->maskBitCount = 32;
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "sse2-x2")) {
|
else if (!strcasecmp(isa, "sse2-x2")) {
|
||||||
t->isa = Target::SSE2;
|
t->isa = Target::SSE2;
|
||||||
t->nativeVectorWidth = 4;
|
t->nativeVectorWidth = 4;
|
||||||
t->vectorWidth = 8;
|
t->vectorWidth = 8;
|
||||||
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
|
t->attributes = "+sse,+sse2,-sse3,-sse41,-sse42,-sse4a,-ssse3,-popcnt";
|
||||||
|
t->maskingIsFree = false;
|
||||||
|
t->allOffMaskIsSafe = false;
|
||||||
|
t->maskBitCount = 32;
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "sse4")) {
|
else if (!strcasecmp(isa, "sse4")) {
|
||||||
t->isa = Target::SSE4;
|
t->isa = Target::SSE4;
|
||||||
t->nativeVectorWidth = 4;
|
t->nativeVectorWidth = 4;
|
||||||
t->vectorWidth = 4;
|
t->vectorWidth = 4;
|
||||||
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
|
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
|
||||||
|
t->maskingIsFree = false;
|
||||||
|
t->allOffMaskIsSafe = false;
|
||||||
|
t->maskBitCount = 32;
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2")) {
|
else if (!strcasecmp(isa, "sse4x2") || !strcasecmp(isa, "sse4-x2")) {
|
||||||
t->isa = Target::SSE4;
|
t->isa = Target::SSE4;
|
||||||
t->nativeVectorWidth = 4;
|
t->nativeVectorWidth = 4;
|
||||||
t->vectorWidth = 8;
|
t->vectorWidth = 8;
|
||||||
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
|
t->attributes = "+sse,+sse2,+sse3,+sse41,-sse42,-sse4a,+ssse3,-popcnt,+cmov";
|
||||||
|
t->maskingIsFree = false;
|
||||||
|
t->allOffMaskIsSafe = false;
|
||||||
|
t->maskBitCount = 32;
|
||||||
|
}
|
||||||
|
else if (!strcasecmp(isa, "generic-4")) {
|
||||||
|
t->isa = Target::GENERIC;
|
||||||
|
t->nativeVectorWidth = 4;
|
||||||
|
t->vectorWidth = 4;
|
||||||
|
t->maskingIsFree = true;
|
||||||
|
t->allOffMaskIsSafe = true;
|
||||||
|
t->maskBitCount = 1;
|
||||||
|
}
|
||||||
|
else if (!strcasecmp(isa, "generic-8")) {
|
||||||
|
t->isa = Target::GENERIC;
|
||||||
|
t->nativeVectorWidth = 8;
|
||||||
|
t->vectorWidth = 8;
|
||||||
|
t->maskingIsFree = true;
|
||||||
|
t->allOffMaskIsSafe = true;
|
||||||
|
t->maskBitCount = 1;
|
||||||
|
}
|
||||||
|
else if (!strcasecmp(isa, "generic-16")) {
|
||||||
|
t->isa = Target::GENERIC;
|
||||||
|
t->nativeVectorWidth = 16;
|
||||||
|
t->vectorWidth = 16;
|
||||||
|
t->maskingIsFree = true;
|
||||||
|
t->allOffMaskIsSafe = true;
|
||||||
|
t->maskBitCount = 1;
|
||||||
}
|
}
|
||||||
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
#if defined(LLVM_3_0) || defined(LLVM_3_0svn) || defined(LLVM_3_1svn)
|
||||||
else if (!strcasecmp(isa, "avx")) {
|
else if (!strcasecmp(isa, "avx")) {
|
||||||
@@ -154,12 +190,18 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
|||||||
t->nativeVectorWidth = 8;
|
t->nativeVectorWidth = 8;
|
||||||
t->vectorWidth = 8;
|
t->vectorWidth = 8;
|
||||||
t->attributes = "+avx,+popcnt,+cmov";
|
t->attributes = "+avx,+popcnt,+cmov";
|
||||||
|
t->maskingIsFree = false;
|
||||||
|
t->allOffMaskIsSafe = false;
|
||||||
|
t->maskBitCount = 32;
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx-x2")) {
|
else if (!strcasecmp(isa, "avx-x2")) {
|
||||||
t->isa = Target::AVX;
|
t->isa = Target::AVX;
|
||||||
t->nativeVectorWidth = 8;
|
t->nativeVectorWidth = 8;
|
||||||
t->vectorWidth = 16;
|
t->vectorWidth = 16;
|
||||||
t->attributes = "+avx,+popcnt,+cmov";
|
t->attributes = "+avx,+popcnt,+cmov";
|
||||||
|
t->maskingIsFree = false;
|
||||||
|
t->allOffMaskIsSafe = false;
|
||||||
|
t->maskBitCount = 32;
|
||||||
}
|
}
|
||||||
#endif // LLVM 3.0+
|
#endif // LLVM 3.0+
|
||||||
#if defined(LLVM_3_1svn)
|
#if defined(LLVM_3_1svn)
|
||||||
@@ -168,12 +210,18 @@ Target::GetTarget(const char *arch, const char *cpu, const char *isa,
|
|||||||
t->nativeVectorWidth = 8;
|
t->nativeVectorWidth = 8;
|
||||||
t->vectorWidth = 8;
|
t->vectorWidth = 8;
|
||||||
t->attributes = "+avx2,+popcnt,+cmov";
|
t->attributes = "+avx2,+popcnt,+cmov";
|
||||||
|
t->maskingIsFree = false;
|
||||||
|
t->allOffMaskIsSafe = false;
|
||||||
|
t->maskBitCount = 32;
|
||||||
}
|
}
|
||||||
else if (!strcasecmp(isa, "avx2-x2")) {
|
else if (!strcasecmp(isa, "avx2-x2")) {
|
||||||
t->isa = Target::AVX2;
|
t->isa = Target::AVX2;
|
||||||
t->nativeVectorWidth = 16;
|
t->nativeVectorWidth = 16;
|
||||||
t->vectorWidth = 16;
|
t->vectorWidth = 16;
|
||||||
t->attributes = "+avx2,+popcnt,+cmov";
|
t->attributes = "+avx2,+popcnt,+cmov";
|
||||||
|
t->maskingIsFree = false;
|
||||||
|
t->allOffMaskIsSafe = false;
|
||||||
|
t->maskBitCount = 32;
|
||||||
}
|
}
|
||||||
#endif // LLVM 3.1
|
#endif // LLVM 3.1
|
||||||
else {
|
else {
|
||||||
@@ -221,7 +269,7 @@ Target::SupportedTargetISAs() {
|
|||||||
#ifdef LLVM_3_1svn
|
#ifdef LLVM_3_1svn
|
||||||
", avx2, avx2-x2"
|
", avx2, avx2-x2"
|
||||||
#endif // LLVM_3_1svn
|
#endif // LLVM_3_1svn
|
||||||
;
|
", generic-4, generic-8, generic-16";
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
@@ -300,6 +348,8 @@ Target::GetISAString() const {
|
|||||||
return "avx";
|
return "avx";
|
||||||
case Target::AVX2:
|
case Target::AVX2:
|
||||||
return "avx2";
|
return "avx2";
|
||||||
|
case Target::GENERIC:
|
||||||
|
return "generic";
|
||||||
default:
|
default:
|
||||||
FATAL("Unhandled target in GetISAString()");
|
FATAL("Unhandled target in GetISAString()");
|
||||||
}
|
}
|
||||||
|
|||||||
19
ispc.h
19
ispc.h
@@ -193,7 +193,7 @@ struct Target {
|
|||||||
flexible/performant of them will apear last in the enumerant. Note
|
flexible/performant of them will apear last in the enumerant. Note
|
||||||
also that __best_available_isa() needs to be updated if ISAs are
|
also that __best_available_isa() needs to be updated if ISAs are
|
||||||
added or the enumerant values are reordered. */
|
added or the enumerant values are reordered. */
|
||||||
enum ISA { SSE2, SSE4, AVX, AVX2, NUM_ISAS };
|
enum ISA { SSE2, SSE4, AVX, AVX2, GENERIC, NUM_ISAS };
|
||||||
|
|
||||||
/** Instruction set being compiled to. */
|
/** Instruction set being compiled to. */
|
||||||
ISA isa;
|
ISA isa;
|
||||||
@@ -222,6 +222,23 @@ struct Target {
|
|||||||
|
|
||||||
/** Indicates whether position independent code should be generated. */
|
/** Indicates whether position independent code should be generated. */
|
||||||
bool generatePIC;
|
bool generatePIC;
|
||||||
|
|
||||||
|
/** Is there overhead associated with masking on the target
|
||||||
|
architecture; e.g. there is on SSE, due to extra blends and the
|
||||||
|
like, but there isn't with an ISA that supports masking
|
||||||
|
natively. */
|
||||||
|
bool maskingIsFree;
|
||||||
|
|
||||||
|
/** Is it safe to run code with the mask all if: e.g. on SSE, the fast
|
||||||
|
gather trick assumes that at least one program instance is running
|
||||||
|
(so that it can safely assume that the array base pointer is
|
||||||
|
valid). */
|
||||||
|
bool allOffMaskIsSafe;
|
||||||
|
|
||||||
|
/** How many bits are used to store each element of the mask: e.g. this
|
||||||
|
is 32 on SSE/AVX, since that matches the HW better, but it's 1 for
|
||||||
|
the generic target. */
|
||||||
|
int maskBitCount;
|
||||||
};
|
};
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
165
ispc.vcxproj
165
ispc.vcxproj
@@ -22,11 +22,15 @@
|
|||||||
<ClCompile Include="gen-bitcode-c-32.cpp" />
|
<ClCompile Include="gen-bitcode-c-32.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-c-64.cpp" />
|
<ClCompile Include="gen-bitcode-c-64.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-dispatch.cpp" />
|
<ClCompile Include="gen-bitcode-dispatch.cpp" />
|
||||||
|
<ClCompile Include="gen-bitcode-generic-4.cpp" />
|
||||||
|
<ClCompile Include="gen-bitcode-generic-8.cpp" />
|
||||||
|
<ClCompile Include="gen-bitcode-generic-16.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-sse2.cpp" />
|
<ClCompile Include="gen-bitcode-sse2.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-sse2-x2.cpp" />
|
<ClCompile Include="gen-bitcode-sse2-x2.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-sse4.cpp" />
|
<ClCompile Include="gen-bitcode-sse4.cpp" />
|
||||||
<ClCompile Include="gen-bitcode-sse4-x2.cpp" />
|
<ClCompile Include="gen-bitcode-sse4-x2.cpp" />
|
||||||
<ClCompile Include="gen-stdlib.cpp" />
|
<ClCompile Include="gen-stdlib-generic.cpp" />
|
||||||
|
<ClCompile Include="gen-stdlib-x86.cpp" />
|
||||||
<ClCompile Include="ispc.cpp" />
|
<ClCompile Include="ispc.cpp" />
|
||||||
<ClCompile Include="lex.cc">
|
<ClCompile Include="lex.cc">
|
||||||
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4146;4800;4996;4355;4624;4005;4003;4018</DisableSpecificWarnings>
|
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4146;4800;4996;4355;4624;4005;4003;4018</DisableSpecificWarnings>
|
||||||
@@ -40,15 +44,15 @@
|
|||||||
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4146;4800;4996;4355;4624;4005;4065</DisableSpecificWarnings>
|
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">4146;4800;4996;4355;4624;4005;4065</DisableSpecificWarnings>
|
||||||
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">4146;4800;4996;4355;4624;4005;4065</DisableSpecificWarnings>
|
<DisableSpecificWarnings Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">4146;4800;4996;4355;4624;4005;4065</DisableSpecificWarnings>
|
||||||
</ClCompile>
|
</ClCompile>
|
||||||
<CustomBuild Include="builtins-c.c">
|
<CustomBuild Include="builtins\builtins.c">
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-32.c > gen-bitcode-c-32.cpp;
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-32 > gen-bitcode-c-32.cpp;
|
||||||
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-64.c > gen-bitcode-c-64.cpp</Command>
|
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-64 > gen-bitcode-c-64.cpp</Command>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">clang builtins-c.c</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building builtins.c</Message>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-32.c > gen-bitcode-c-32.cpp;
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -m32 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-32 > gen-bitcode-c-32.cpp;
|
||||||
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins-c.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py builtins-c-64.c > gen-bitcode-c-64.cpp</Command>
|
%LLVM_INSTALL_DIR%\bin\clang -m64 -emit-llvm builtins\builtins.c -c -o - | %LLVM_INSTALL_DIR%\bin\llvm-dis - | python bitcode2cpp.py c-64 > gen-bitcode-c-64.cpp</Command>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">clang builtins-c.c</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building builtins.c</Message>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-c-32.cpp;gen-bitcore-c-64.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-c-32.cpp;gen-bitcode-c-64.cpp</Outputs>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-c-32.cpp;gen-bitcore-c-64.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-c-32.cpp;gen-bitcode-c-64.cpp</Outputs>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
<ClCompile Include="stmt.cpp" />
|
<ClCompile Include="stmt.cpp" />
|
||||||
<ClCompile Include="sym.cpp" />
|
<ClCompile Include="sym.cpp" />
|
||||||
@@ -75,105 +79,148 @@
|
|||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="stdlib.ispc">
|
<CustomBuild Include="stdlib.ispc">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py x86 > gen-stdlib-x86.cpp;
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-stdlib.cpp</Outputs>
|
%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DISPC_TARGET_GENERIC=1 -DPI=3.1415926535 | python stdlib2cpp.py generic > gen-stdlib-generic.cpp;
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py > gen-stdlib.cpp</Command>
|
</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-stdlib.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-stdlib-generic.cpp;gen-stdlib-x86.cpp</Outputs>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-stdlib.cpp</Message>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DPI=3.1415926535 | python stdlib2cpp.py x86 > gen-stdlib-x86.cpp;
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-stdlib.cpp</Message>
|
%LLVM_INSTALL_DIR%\bin\clang -E -x c %(Filename).ispc -DISPC=1 -DISPC_TARGET_GENERIC=1 -DPI=3.1415926535 | python stdlib2cpp.py generic > gen-stdlib-generic.cpp;
|
||||||
|
</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-stdlib-generic.cpp;gen-stdlib-x86.cpp</Outputs>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-stdlib-{generic,x86}.cpp</Message>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-stdlib-{generic,x86}.cpp</Message>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="builtins-sse4.ll">
|
<CustomBuild Include="builtins\dispatch.ll">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse4.ll | python bitcode2cpp.py builtins-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\dispatch.ll | python bitcode2cpp.py dispatch.ll > gen-bitcode-dispatch.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse4.ll | python bitcode2cpp.py builtins-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
|
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
|
||||||
</CustomBuild>
|
|
||||||
</ItemGroup>
|
|
||||||
<ItemGroup>
|
|
||||||
<CustomBuild Include="builtins-dispatch.ll">
|
|
||||||
<FileType>Document</FileType>
|
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-dispatch.ll | python bitcode2cpp.py builtins-dispatch.ll > gen-bitcode-dispatch.cpp</Command>
|
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-dispatch.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-dispatch.cpp</Outputs>
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4</AdditionalInputs>
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4</AdditionalInputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-dispatch.ll | python bitcode2cpp.py builtins-dispatch.ll > gen-bitcode-dispatch.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\dispatch.ll | python bitcode2cpp.py dispatch.ll > gen-bitcode-dispatch.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-dispatch.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-dispatch.cpp</Outputs>
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4</AdditionalInputs>
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4</AdditionalInputs>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-dispatch.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-dispatch.cpp</Message>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-dispatch.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-dispatch.cpp</Message>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="builtins-sse4-x2.ll">
|
<CustomBuild Include="builtins\target-sse4.ll">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse4-x2.ll | python bitcode2cpp.py builtins-sse4-x2.ll > gen-bitcode-sse4-x2.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4.ll | python bitcode2cpp.py builtins\target-sse4.ll > gen-bitcode-sse4.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4.cpp</Message>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="builtins\target-sse4-x2.ll">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll > gen-bitcode-sse4-x2.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4-x2.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse4-x2.cpp</Outputs>
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse4-x2.ll | python bitcode2cpp.py builtins-sse4-x2.ll > gen-bitcode-sse4-x2.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse4-x2.ll | python bitcode2cpp.py builtins\target-sse4-x2.ll > gen-bitcode-sse4-x2.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4-x2.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse4-x2.cpp</Outputs>
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse4-common.ll</AdditionalInputs>
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse4-common.ll</AdditionalInputs>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4-x2.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse4-x2.cpp</Message>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4-x2.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse4-x2.cpp</Message>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="builtins-sse2.ll">
|
<CustomBuild Include="builtins\target-sse2.ll">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse2.ll | python bitcode2cpp.py builtins-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse2.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse2.cpp</Outputs>
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse2.ll | python bitcode2cpp.py builtins-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2.ll | python bitcode2cpp.py builtins\target-sse2.ll > gen-bitcode-sse2.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse2.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse2.cpp</Outputs>
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse2.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse2.cpp</Message>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse2.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse2.cpp</Message>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="builtins-sse2-x2.ll">
|
<CustomBuild Include="builtins\target-sse2-x2.ll">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-sse2-x2.ll | python bitcode2cpp.py builtins-sse2-x2.ll > gen-bitcode-sse2-x2.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll > gen-bitcode-sse2-x2.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse2-x2.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-sse2-x2.cpp</Outputs>
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-sse2-x2.ll | python bitcode2cpp.py builtins-sse2-x2.ll > gen-bitcode-sse2-x2.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-sse2-x2.ll | python bitcode2cpp.py builtins\target-sse2-x2.ll > gen-bitcode-sse2-x2.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse2-x2.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-sse2-x2.cpp</Outputs>
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse2-common.ll</AdditionalInputs>
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-sse2-common.ll</AdditionalInputs>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse2-x2.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-sse2-x2.cpp</Message>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse2-x2.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-sse2-x2.cpp</Message>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="builtins-avx.ll">
|
<CustomBuild Include="builtins\target-avx.ll">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-avx.ll | python bitcode2cpp.py builtins-avx.ll > gen-bitcode-avx.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx.ll | python bitcode2cpp.py builtins\target-avx.ll > gen-bitcode-avx.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx.cpp</Outputs>
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-avx-common.ll</AdditionalInputs>
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll</AdditionalInputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-avx.ll | python bitcode2cpp.py builtins-avx.ll > gen-bitcode-avx.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx.ll | python bitcode2cpp.py builtins\target-avx.ll > gen-bitcode-avx.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx.cpp</Outputs>
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-avx-common.ll</AdditionalInputs>
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll</AdditionalInputs>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx.cpp</Message>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx.cpp</Message>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="builtins-avx-x2.ll">
|
<CustomBuild Include="builtins\target-avx-x2.ll">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 builtins.m4 builtins-avx-x2.ll | python bitcode2cpp.py builtins-avx-x2.ll > gen-bitcode-avx-x2.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx-x2.ll | python bitcode2cpp.py builtins\target-avx-x2.ll > gen-bitcode-avx-x2.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx-x2.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-avx-x2.cpp</Outputs>
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins.m4;builtins-sse.ll</AdditionalInputs>
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-avx-common.ll</AdditionalInputs>
|
||||||
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 builtins.m4 builtins-avx-x2.ll | python bitcode2cpp.py builtins-avx-x2.ll > gen-bitcode-avx-x2.cpp</Command>
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx-x2.ll | python bitcode2cpp.py builtins\target-avx-x2.ll > gen-bitcode-avx-x2.cpp</Command>
|
||||||
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx-x2.cpp</Outputs>
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-avx-x2.cpp</Outputs>
|
||||||
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins.m4;builtins-sse.ll</AdditionalInputs>
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-avx-common.ll</AdditionalInputs>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx-x2.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-avx-x2.cpp</Message>
|
||||||
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx-x2.cpp</Message>
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-avx-x2.cpp</Message>
|
||||||
</CustomBuild>
|
</CustomBuild>
|
||||||
</ItemGroup>
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="builtins\target-generic-4.ll">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll > gen-bitcode-generic-4.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-4.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-4.ll | python bitcode2cpp.py builtins\target-generic-4.ll > gen-bitcode-generic-4.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-4.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-4.cpp</Message>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-4.cpp</Message>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="builtins\target-generic-8.ll">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll > gen-bitcode-generic-8.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-8.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-8.ll | python bitcode2cpp.py builtins\target-generic-8.ll > gen-bitcode-generic-8.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-8.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-8.cpp</Message>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-8.cpp</Message>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
|
<ItemGroup>
|
||||||
|
<CustomBuild Include="builtins\target-generic-16.ll">
|
||||||
|
<FileType>Document</FileType>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll > gen-bitcode-generic-16.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">gen-bitcode-generic-16.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||||
|
<Command Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-generic-16.ll | python bitcode2cpp.py builtins\target-generic-16.ll > gen-bitcode-generic-16.cpp</Command>
|
||||||
|
<Outputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">gen-bitcode-generic-16.cpp</Outputs>
|
||||||
|
<AdditionalInputs Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">builtins\util.m4;builtins\target-generic-common.ll</AdditionalInputs>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">Building gen-bitcode-generic-16.cpp</Message>
|
||||||
|
<Message Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">Building gen-bitcode-generic-16.cpp</Message>
|
||||||
|
</CustomBuild>
|
||||||
|
</ItemGroup>
|
||||||
<ItemGroup>
|
<ItemGroup>
|
||||||
<CustomBuild Include="lex.ll">
|
<CustomBuild Include="lex.ll">
|
||||||
<FileType>Document</FileType>
|
<FileType>Document</FileType>
|
||||||
|
|||||||
27
llvmutil.cpp
27
llvmutil.cpp
@@ -105,11 +105,14 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
|||||||
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
|
LLVMTypes::FloatPointerType = llvm::PointerType::get(LLVMTypes::FloatType, 0);
|
||||||
LLVMTypes::DoublePointerType = llvm::PointerType::get(LLVMTypes::DoubleType, 0);
|
LLVMTypes::DoublePointerType = llvm::PointerType::get(LLVMTypes::DoubleType, 0);
|
||||||
|
|
||||||
// Note that both the mask and bool vectors are vector of int32s
|
if (target.maskBitCount == 1)
|
||||||
// (not i1s). LLVM ends up generating much better SSE code with
|
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||||
// this representation.
|
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
|
||||||
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
else {
|
||||||
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.vectorWidth);
|
assert(target.maskBitCount == 32);
|
||||||
|
LLVMTypes::MaskType = LLVMTypes::BoolVectorType =
|
||||||
|
llvm::VectorType::get(llvm::Type::getInt32Ty(*ctx), target.vectorWidth);
|
||||||
|
}
|
||||||
|
|
||||||
LLVMTypes::Int1VectorType =
|
LLVMTypes::Int1VectorType =
|
||||||
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
|
llvm::VectorType::get(llvm::Type::getInt1Ty(*ctx), target.vectorWidth);
|
||||||
@@ -141,7 +144,11 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
|||||||
|
|
||||||
std::vector<llvm::Constant *> maskOnes;
|
std::vector<llvm::Constant *> maskOnes;
|
||||||
llvm::Constant *onMask = NULL;
|
llvm::Constant *onMask = NULL;
|
||||||
onMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), -1,
|
if (target.maskBitCount == 1)
|
||||||
|
onMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 1,
|
||||||
|
false /*unsigned*/); // 0x1
|
||||||
|
else
|
||||||
|
onMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), -1,
|
||||||
true /*signed*/); // 0xffffffff
|
true /*signed*/); // 0xffffffff
|
||||||
|
|
||||||
for (int i = 0; i < target.vectorWidth; ++i)
|
for (int i = 0; i < target.vectorWidth; ++i)
|
||||||
@@ -150,8 +157,12 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target target) {
|
|||||||
|
|
||||||
std::vector<llvm::Constant *> maskZeros;
|
std::vector<llvm::Constant *> maskZeros;
|
||||||
llvm::Constant *offMask = NULL;
|
llvm::Constant *offMask = NULL;
|
||||||
offMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0,
|
if (target.maskBitCount == 1)
|
||||||
true /*signed*/);
|
offMask = llvm::ConstantInt::get(llvm::Type::getInt1Ty(*ctx), 0,
|
||||||
|
true /*signed*/);
|
||||||
|
else
|
||||||
|
offMask = llvm::ConstantInt::get(llvm::Type::getInt32Ty(*ctx), 0,
|
||||||
|
true /*signed*/);
|
||||||
|
|
||||||
for (int i = 0; i < target.vectorWidth; ++i)
|
for (int i = 0; i < target.vectorWidth; ++i)
|
||||||
maskZeros.push_back(offMask);
|
maskZeros.push_back(offMask);
|
||||||
|
|||||||
22
module.cpp
22
module.cpp
@@ -1158,22 +1158,14 @@ Module::execPreprocessor(const char* infilename, llvm::raw_string_ostream* ostre
|
|||||||
opts.addMacroDef("PI=3.1415926535");
|
opts.addMacroDef("PI=3.1415926535");
|
||||||
|
|
||||||
// Add #define for current compilation target
|
// Add #define for current compilation target
|
||||||
switch (g->target.isa) {
|
char targetMacro[128];
|
||||||
case Target::SSE2:
|
sprintf(targetMacro, "ISPC_TARGET_%s", g->target.GetISAString());
|
||||||
opts.addMacroDef("ISPC_TARGET_SSE2");
|
char *p = targetMacro;
|
||||||
break;
|
while (*p) {
|
||||||
case Target::SSE4:
|
*p = toupper(*p);
|
||||||
opts.addMacroDef("ISPC_TARGET_SSE4");
|
++p;
|
||||||
break;
|
|
||||||
case Target::AVX:
|
|
||||||
opts.addMacroDef("ISPC_TARGET_AVX");
|
|
||||||
break;
|
|
||||||
case Target::AVX2:
|
|
||||||
opts.addMacroDef("ISPC_TARGET_AVX2");
|
|
||||||
break;
|
|
||||||
default:
|
|
||||||
FATAL("Unhandled target ISA in preprocessor symbol definition");
|
|
||||||
}
|
}
|
||||||
|
opts.addMacroDef(targetMacro);
|
||||||
|
|
||||||
if (g->target.is32Bit)
|
if (g->target.is32Bit)
|
||||||
opts.addMacroDef("ISPC_POINTER_SIZE=32");
|
opts.addMacroDef("ISPC_POINTER_SIZE=32");
|
||||||
|
|||||||
2
opt.cpp
2
opt.cpp
@@ -2444,7 +2444,7 @@ MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
|
|||||||
int count = sizeof(names) / sizeof(names[0]);
|
int count = sizeof(names) / sizeof(names[0]);
|
||||||
for (int i = 0; i < count; ++i) {
|
for (int i = 0; i < count; ++i) {
|
||||||
llvm::Function *f = m->module->getFunction(names[i]);
|
llvm::Function *f = m->module->getFunction(names[i]);
|
||||||
if (f != NULL) {
|
if (f != NULL && f->empty() == false) {
|
||||||
f->setLinkage(llvm::GlobalValue::InternalLinkage);
|
f->setLinkage(llvm::GlobalValue::InternalLinkage);
|
||||||
modifiedAny = true;
|
modifiedAny = true;
|
||||||
}
|
}
|
||||||
|
|||||||
3
parse.yy
3
parse.yy
@@ -1605,7 +1605,8 @@ lAddFunctionParams(Declarator *decl) {
|
|||||||
|
|
||||||
/** Add a symbol for the built-in mask variable to the symbol table */
|
/** Add a symbol for the built-in mask variable to the symbol table */
|
||||||
static void lAddMaskToSymbolTable(SourcePos pos) {
|
static void lAddMaskToSymbolTable(SourcePos pos) {
|
||||||
const Type *t = AtomicType::VaryingConstUInt32;
|
const Type *t = g->target.isa == Target::GENERIC ?
|
||||||
|
AtomicType::VaryingConstBool : AtomicType::VaryingConstUInt32;
|
||||||
Symbol *maskSymbol = new Symbol("__mask", pos, t);
|
Symbol *maskSymbol = new Symbol("__mask", pos, t);
|
||||||
m->symbolTable->AddVariable(maskSymbol);
|
m->symbolTable->AddVariable(maskSymbol);
|
||||||
}
|
}
|
||||||
|
|||||||
144
stdlib.ispc
144
stdlib.ispc
@@ -38,6 +38,14 @@
|
|||||||
ispc code
|
ispc code
|
||||||
*/
|
*/
|
||||||
|
|
||||||
|
#ifdef ISPC_TARGET_GENERIC
|
||||||
|
#define IntMaskType bool
|
||||||
|
#define UIntMaskType bool
|
||||||
|
#else
|
||||||
|
#define IntMaskType int32
|
||||||
|
#define UIntMaskType unsigned int32
|
||||||
|
#endif
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
// Low level primitives
|
// Low level primitives
|
||||||
|
|
||||||
@@ -274,13 +282,21 @@ static inline int32 sign_extend(bool v) {
|
|||||||
static inline uniform bool any(bool v) {
|
static inline uniform bool any(bool v) {
|
||||||
// We only care about whether "any" is true for the active program instances,
|
// We only care about whether "any" is true for the active program instances,
|
||||||
// so we have to make v with the current program mask.
|
// so we have to make v with the current program mask.
|
||||||
|
#ifdef ISPC_TARGET_GENERIC
|
||||||
|
return __movmsk(v & __mask) != 0;
|
||||||
|
#else
|
||||||
return __movmsk(__sext_varying_bool(v) & __mask) != 0;
|
return __movmsk(__sext_varying_bool(v) & __mask) != 0;
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uniform bool all(bool v) {
|
static inline uniform bool all(bool v) {
|
||||||
// As with any(), we need to explicitly mask v with the current program mask
|
// As with any(), we need to explicitly mask v with the current program mask
|
||||||
// so we're only looking at the current lanes
|
// so we're only looking at the current lanes
|
||||||
|
#ifdef ISPC_TARGET_GENERIC
|
||||||
|
bool match = ((v & __mask) == __mask);
|
||||||
|
#else
|
||||||
int32 match = __sext_varying_bool((__sext_varying_bool(v) & __mask) == __mask);
|
int32 match = __sext_varying_bool((__sext_varying_bool(v) & __mask) == __mask);
|
||||||
|
#endif
|
||||||
return __movmsk(match) == (1 << programCount) - 1;
|
return __movmsk(match) == (1 << programCount) - 1;
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -308,7 +324,11 @@ static inline int popcnt(int64 v) {
|
|||||||
|
|
||||||
static inline uniform int popcnt(bool v) {
|
static inline uniform int popcnt(bool v) {
|
||||||
// As with any() and all(), only count across the active lanes
|
// As with any() and all(), only count across the active lanes
|
||||||
|
#ifdef ISPC_TARGET_GENERIC
|
||||||
|
return __popcnt_int32(__movmsk(v & __mask));
|
||||||
|
#else
|
||||||
return __popcnt_int32(__movmsk(__sext_varying_bool(v) & __mask));
|
return __popcnt_int32(__movmsk(__sext_varying_bool(v) & __mask));
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uniform int lanemask() {
|
static inline uniform int lanemask() {
|
||||||
@@ -672,19 +692,19 @@ static inline uniform bool reduce_equal(TYPE v, uniform TYPE * uniform value) {
|
|||||||
return __reduce_equal_##FUNCTYPE(v, value, (MASKTYPE)__mask); \
|
return __reduce_equal_##FUNCTYPE(v, value, (MASKTYPE)__mask); \
|
||||||
}
|
}
|
||||||
|
|
||||||
REDUCE_EQUAL(int32, int32, int32)
|
REDUCE_EQUAL(int32, int32, IntMaskType)
|
||||||
REDUCE_EQUAL(unsigned int32, int32, unsigned int32)
|
REDUCE_EQUAL(unsigned int32, int32, UIntMaskType)
|
||||||
REDUCE_EQUAL(float, float, int32)
|
REDUCE_EQUAL(float, float, IntMaskType)
|
||||||
REDUCE_EQUAL(int64, int64, int32)
|
REDUCE_EQUAL(int64, int64, IntMaskType)
|
||||||
REDUCE_EQUAL(unsigned int64, int64, unsigned int32)
|
REDUCE_EQUAL(unsigned int64, int64, UIntMaskType)
|
||||||
REDUCE_EQUAL(double, double, int32)
|
REDUCE_EQUAL(double, double, IntMaskType)
|
||||||
|
|
||||||
static int32 exclusive_scan_add(int32 v) {
|
static int32 exclusive_scan_add(int32 v) {
|
||||||
return __exclusive_scan_add_i32(v, (int32)__mask);
|
return __exclusive_scan_add_i32(v, (IntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int32 exclusive_scan_add(unsigned int32 v) {
|
static unsigned int32 exclusive_scan_add(unsigned int32 v) {
|
||||||
return __exclusive_scan_add_i32(v, __mask);
|
return __exclusive_scan_add_i32((int32)v, (IntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static float exclusive_scan_add(float v) {
|
static float exclusive_scan_add(float v) {
|
||||||
@@ -692,11 +712,11 @@ static float exclusive_scan_add(float v) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int64 exclusive_scan_add(int64 v) {
|
static int64 exclusive_scan_add(int64 v) {
|
||||||
return __exclusive_scan_add_i64(v, (int32)__mask);
|
return __exclusive_scan_add_i64(v, (IntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int64 exclusive_scan_add(unsigned int64 v) {
|
static unsigned int64 exclusive_scan_add(unsigned int64 v) {
|
||||||
return __exclusive_scan_add_i64(v, __mask);
|
return __exclusive_scan_add_i64(v, (UIntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static double exclusive_scan_add(double v) {
|
static double exclusive_scan_add(double v) {
|
||||||
@@ -704,35 +724,35 @@ static double exclusive_scan_add(double v) {
|
|||||||
}
|
}
|
||||||
|
|
||||||
static int32 exclusive_scan_and(int32 v) {
|
static int32 exclusive_scan_and(int32 v) {
|
||||||
return __exclusive_scan_and_i32(v, (int32)__mask);
|
return __exclusive_scan_and_i32(v, (IntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int32 exclusive_scan_and(unsigned int32 v) {
|
static unsigned int32 exclusive_scan_and(unsigned int32 v) {
|
||||||
return __exclusive_scan_and_i32(v, __mask);
|
return __exclusive_scan_and_i32(v, (UIntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int64 exclusive_scan_and(int64 v) {
|
static int64 exclusive_scan_and(int64 v) {
|
||||||
return __exclusive_scan_and_i64(v, (int32)__mask);
|
return __exclusive_scan_and_i64(v, (IntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int64 exclusive_scan_and(unsigned int64 v) {
|
static unsigned int64 exclusive_scan_and(unsigned int64 v) {
|
||||||
return __exclusive_scan_and_i64(v, __mask);
|
return __exclusive_scan_and_i64(v, (UIntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int32 exclusive_scan_or(int32 v) {
|
static int32 exclusive_scan_or(int32 v) {
|
||||||
return __exclusive_scan_or_i32(v, (int32)__mask);
|
return __exclusive_scan_or_i32(v, (IntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int32 exclusive_scan_or(unsigned int32 v) {
|
static unsigned int32 exclusive_scan_or(unsigned int32 v) {
|
||||||
return __exclusive_scan_or_i32(v, __mask);
|
return __exclusive_scan_or_i32(v, (UIntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static int64 exclusive_scan_or(int64 v) {
|
static int64 exclusive_scan_or(int64 v) {
|
||||||
return __exclusive_scan_or_i64(v, (int32)__mask);
|
return __exclusive_scan_or_i64(v, (IntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static unsigned int64 exclusive_scan_or(unsigned int64 v) {
|
static unsigned int64 exclusive_scan_or(unsigned int64 v) {
|
||||||
return __exclusive_scan_or_i64(v, __mask);
|
return __exclusive_scan_or_i64(v, (UIntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
@@ -741,23 +761,23 @@ static unsigned int64 exclusive_scan_or(unsigned int64 v) {
|
|||||||
static inline uniform int
|
static inline uniform int
|
||||||
packed_load_active(uniform unsigned int * uniform a,
|
packed_load_active(uniform unsigned int * uniform a,
|
||||||
unsigned int * uniform vals) {
|
unsigned int * uniform vals) {
|
||||||
return __packed_load_active(a, vals, (unsigned int32)__mask);
|
return __packed_load_active(a, vals, (UIntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uniform int
|
static inline uniform int
|
||||||
packed_store_active(uniform unsigned int * uniform a,
|
packed_store_active(uniform unsigned int * uniform a,
|
||||||
unsigned int vals) {
|
unsigned int vals) {
|
||||||
return __packed_store_active(a, vals, (unsigned int32)__mask);
|
return __packed_store_active(a, vals, (UIntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uniform int
|
static inline uniform int
|
||||||
packed_load_active(uniform int * uniform a, int * uniform vals) {
|
packed_load_active(uniform int * uniform a, int * uniform vals) {
|
||||||
return __packed_load_active(a, vals, (int32)__mask);
|
return __packed_load_active(a, vals, (IntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline uniform int
|
static inline uniform int
|
||||||
packed_store_active(uniform int * uniform a, int vals) {
|
packed_store_active(uniform int * uniform a, int vals) {
|
||||||
return __packed_store_active(a, vals, (int32)__mask);
|
return __packed_store_active(a, vals, (IntMaskType)__mask);
|
||||||
}
|
}
|
||||||
|
|
||||||
///////////////////////////////////////////////////////////////////////////
|
///////////////////////////////////////////////////////////////////////////
|
||||||
@@ -848,49 +868,49 @@ static inline TA atomic_##OPA##_global(uniform TA * varying ptr, \
|
|||||||
return ret; \
|
return ret; \
|
||||||
}
|
}
|
||||||
|
|
||||||
DEFINE_ATOMIC_OP(int32,int32,add,add,int32)
|
DEFINE_ATOMIC_OP(int32,int32,add,add,IntMaskType)
|
||||||
DEFINE_ATOMIC_OP(int32,int32,subtract,sub,int32)
|
DEFINE_ATOMIC_OP(int32,int32,subtract,sub,IntMaskType)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(int32,int32,min,min,int32)
|
DEFINE_ATOMIC_MINMAX_OP(int32,int32,min,min,IntMaskType)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(int32,int32,max,max,int32)
|
DEFINE_ATOMIC_MINMAX_OP(int32,int32,max,max,IntMaskType)
|
||||||
DEFINE_ATOMIC_OP(int32,int32,and,and,int32)
|
DEFINE_ATOMIC_OP(int32,int32,and,and,IntMaskType)
|
||||||
DEFINE_ATOMIC_OP(int32,int32,or,or,int32)
|
DEFINE_ATOMIC_OP(int32,int32,or,or,IntMaskType)
|
||||||
DEFINE_ATOMIC_OP(int32,int32,xor,xor,int32)
|
DEFINE_ATOMIC_OP(int32,int32,xor,xor,IntMaskType)
|
||||||
DEFINE_ATOMIC_OP(int32,int32,swap,swap,int32)
|
DEFINE_ATOMIC_OP(int32,int32,swap,swap,IntMaskType)
|
||||||
|
|
||||||
// For everything but atomic min and max, we can use the same
|
// For everything but atomic min and max, we can use the same
|
||||||
// implementations for unsigned as for signed.
|
// implementations for unsigned as for signed.
|
||||||
DEFINE_ATOMIC_OP(unsigned int32,int32,add,add,unsigned int32)
|
DEFINE_ATOMIC_OP(unsigned int32,int32,add,add,UIntMaskType)
|
||||||
DEFINE_ATOMIC_OP(unsigned int32,int32,subtract,sub,unsigned int32)
|
DEFINE_ATOMIC_OP(unsigned int32,int32,subtract,sub,UIntMaskType)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,min,umin,unsigned int32)
|
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,min,umin,UIntMaskType)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,max,umax,unsigned int32)
|
DEFINE_ATOMIC_MINMAX_OP(unsigned int32,uint32,max,umax,UIntMaskType)
|
||||||
DEFINE_ATOMIC_OP(unsigned int32,int32,and,and,unsigned int32)
|
DEFINE_ATOMIC_OP(unsigned int32,int32,and,and,UIntMaskType)
|
||||||
DEFINE_ATOMIC_OP(unsigned int32,int32,or,or,unsigned int32)
|
DEFINE_ATOMIC_OP(unsigned int32,int32,or,or,UIntMaskType)
|
||||||
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor,unsigned int32)
|
DEFINE_ATOMIC_OP(unsigned int32,int32,xor,xor,UIntMaskType)
|
||||||
DEFINE_ATOMIC_OP(unsigned int32,int32,swap,swap,unsigned int32)
|
DEFINE_ATOMIC_OP(unsigned int32,int32,swap,swap,UIntMaskType)
|
||||||
|
|
||||||
DEFINE_ATOMIC_OP(float,float,swap,swap,int32)
|
DEFINE_ATOMIC_OP(float,float,swap,swap,IntMaskType)
|
||||||
|
|
||||||
DEFINE_ATOMIC_OP(int64,int64,add,add,int32)
|
DEFINE_ATOMIC_OP(int64,int64,add,add,IntMaskType)
|
||||||
DEFINE_ATOMIC_OP(int64,int64,subtract,sub,int32)
|
DEFINE_ATOMIC_OP(int64,int64,subtract,sub,IntMaskType)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(int64,int64,min,min,int32)
|
DEFINE_ATOMIC_MINMAX_OP(int64,int64,min,min,IntMaskType)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(int64,int64,max,max,int32)
|
DEFINE_ATOMIC_MINMAX_OP(int64,int64,max,max,IntMaskType)
|
||||||
DEFINE_ATOMIC_OP(int64,int64,and,and,int32)
|
DEFINE_ATOMIC_OP(int64,int64,and,and,IntMaskType)
|
||||||
DEFINE_ATOMIC_OP(int64,int64,or,or,int32)
|
DEFINE_ATOMIC_OP(int64,int64,or,or,IntMaskType)
|
||||||
DEFINE_ATOMIC_OP(int64,int64,xor,xor,int32)
|
DEFINE_ATOMIC_OP(int64,int64,xor,xor,IntMaskType)
|
||||||
DEFINE_ATOMIC_OP(int64,int64,swap,swap,int32)
|
DEFINE_ATOMIC_OP(int64,int64,swap,swap,IntMaskType)
|
||||||
|
|
||||||
// For everything but atomic min and max, we can use the same
|
// For everything but atomic min and max, we can use the same
|
||||||
// implementations for unsigned as for signed.
|
// implementations for unsigned as for signed.
|
||||||
DEFINE_ATOMIC_OP(unsigned int64,int64,add,add,unsigned int32)
|
DEFINE_ATOMIC_OP(unsigned int64,int64,add,add,UIntMaskType)
|
||||||
DEFINE_ATOMIC_OP(unsigned int64,int64,subtract,sub,unsigned int32)
|
DEFINE_ATOMIC_OP(unsigned int64,int64,subtract,sub,UIntMaskType)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,min,umin,unsigned int32)
|
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,min,umin,UIntMaskType)
|
||||||
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,max,umax,unsigned int32)
|
DEFINE_ATOMIC_MINMAX_OP(unsigned int64,uint64,max,umax,UIntMaskType)
|
||||||
DEFINE_ATOMIC_OP(unsigned int64,int64,and,and,unsigned int32)
|
DEFINE_ATOMIC_OP(unsigned int64,int64,and,and,UIntMaskType)
|
||||||
DEFINE_ATOMIC_OP(unsigned int64,int64,or,or,unsigned int32)
|
DEFINE_ATOMIC_OP(unsigned int64,int64,or,or,UIntMaskType)
|
||||||
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor,unsigned int32)
|
DEFINE_ATOMIC_OP(unsigned int64,int64,xor,xor,UIntMaskType)
|
||||||
DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap,unsigned int32)
|
DEFINE_ATOMIC_OP(unsigned int64,int64,swap,swap,UIntMaskType)
|
||||||
|
|
||||||
DEFINE_ATOMIC_OP(double,double,swap,swap,int32)
|
DEFINE_ATOMIC_OP(double,double,swap,swap,IntMaskType)
|
||||||
|
|
||||||
#undef DEFINE_ATOMIC_OP
|
#undef DEFINE_ATOMIC_OP
|
||||||
|
|
||||||
@@ -913,12 +933,12 @@ static inline uniform TA atomic_compare_exchange_global( \
|
|||||||
return ret; \
|
return ret; \
|
||||||
}
|
}
|
||||||
|
|
||||||
ATOMIC_DECL_CMPXCHG(int32, int32, int32)
|
ATOMIC_DECL_CMPXCHG(int32, int32, IntMaskType)
|
||||||
ATOMIC_DECL_CMPXCHG(unsigned int32, int32, unsigned int32)
|
ATOMIC_DECL_CMPXCHG(unsigned int32, int32, UIntMaskType)
|
||||||
ATOMIC_DECL_CMPXCHG(float, float, int32)
|
ATOMIC_DECL_CMPXCHG(float, float, IntMaskType)
|
||||||
ATOMIC_DECL_CMPXCHG(int64, int64, int32)
|
ATOMIC_DECL_CMPXCHG(int64, int64, IntMaskType)
|
||||||
ATOMIC_DECL_CMPXCHG(unsigned int64, int64, unsigned int32)
|
ATOMIC_DECL_CMPXCHG(unsigned int64, int64, UIntMaskType)
|
||||||
ATOMIC_DECL_CMPXCHG(double, double, int32)
|
ATOMIC_DECL_CMPXCHG(double, double, IntMaskType)
|
||||||
|
|
||||||
#undef ATOMIC_DECL_CMPXCHG
|
#undef ATOMIC_DECL_CMPXCHG
|
||||||
|
|
||||||
|
|||||||
@@ -2,7 +2,9 @@
|
|||||||
|
|
||||||
import sys
|
import sys
|
||||||
|
|
||||||
print "char stdlib_code[] = { "
|
t=str(sys.argv[1])
|
||||||
|
|
||||||
|
print "char stdlib_" + t + "_code[] = { "
|
||||||
|
|
||||||
for line in sys.stdin:
|
for line in sys.stdin:
|
||||||
for c in line:
|
for c in line:
|
||||||
|
|||||||
8
stmt.cpp
8
stmt.cpp
@@ -622,9 +622,6 @@ IfStmt::emitMaskedTrueAndFalse(FunctionEmitContext *ctx, llvm::Value *oldMask,
|
|||||||
|
|
||||||
/** Given an AST node, check to see if it's safe if we happen to run the
|
/** Given an AST node, check to see if it's safe if we happen to run the
|
||||||
code for that node with the execution mask all off.
|
code for that node with the execution mask all off.
|
||||||
|
|
||||||
FIXME: this is actually a target-specific thing; for non SSE/AVX
|
|
||||||
targets with more complete masking support, some of this won't apply...
|
|
||||||
*/
|
*/
|
||||||
static bool
|
static bool
|
||||||
lCheckAllOffSafety(ASTNode *node, void *data) {
|
lCheckAllOffSafety(ASTNode *node, void *data) {
|
||||||
@@ -648,6 +645,11 @@ lCheckAllOffSafety(ASTNode *node, void *data) {
|
|||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if (g->target.allOffMaskIsSafe == true)
|
||||||
|
// Don't worry about memory accesses if we have a target that can
|
||||||
|
// safely run them with the mask all off
|
||||||
|
return true;
|
||||||
|
|
||||||
IndexExpr *ie;
|
IndexExpr *ie;
|
||||||
if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL && ie->baseExpr != NULL) {
|
if ((ie = dynamic_cast<IndexExpr *>(node)) != NULL && ie->baseExpr != NULL) {
|
||||||
const Type *type = ie->baseExpr->GetType();
|
const Type *type = ie->baseExpr->GetType();
|
||||||
|
|||||||
Reference in New Issue
Block a user