diff --git a/Makefile b/Makefile
index f1e3ad78..b83714c9 100644
--- a/Makefile
+++ b/Makefile
@@ -71,7 +71,8 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \
type.cpp util.cpp
HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \
opt.h stmt.h sym.h type.h util.h
-TARGETS=avx avx-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 generic-16
+TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \
+ generic-16
BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \
builtins/dispatch.ll
BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \
diff --git a/builtins.cpp b/builtins.cpp
index dce7c9fa..93d5c20c 100644
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -717,11 +717,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
extern int builtins_bitcode_sse4_x2_length;
switch (g->target.vectorWidth) {
case 4:
- AddBitcodeToModule(builtins_bitcode_sse4, builtins_bitcode_sse4_length,
+ AddBitcodeToModule(builtins_bitcode_sse4,
+ builtins_bitcode_sse4_length,
module, symbolTable);
break;
case 8:
- AddBitcodeToModule(builtins_bitcode_sse4_x2, builtins_bitcode_sse4_x2_length,
+ AddBitcodeToModule(builtins_bitcode_sse4_x2,
+ builtins_bitcode_sse4_x2_length,
module, symbolTable);
break;
default:
@@ -729,18 +731,39 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
}
break;
case Target::AVX:
- case Target::AVX2:
switch (g->target.vectorWidth) {
case 8:
- extern unsigned char builtins_bitcode_avx[];
- extern int builtins_bitcode_avx_length;
- AddBitcodeToModule(builtins_bitcode_avx, builtins_bitcode_avx_length,
+ extern unsigned char builtins_bitcode_avx1[];
+ extern int builtins_bitcode_avx1_length;
+ AddBitcodeToModule(builtins_bitcode_avx1,
+ builtins_bitcode_avx1_length,
module, symbolTable);
break;
case 16:
- extern unsigned char builtins_bitcode_avx_x2[];
- extern int builtins_bitcode_avx_x2_length;
- AddBitcodeToModule(builtins_bitcode_avx_x2, builtins_bitcode_avx_x2_length,
+ extern unsigned char builtins_bitcode_avx1_x2[];
+ extern int builtins_bitcode_avx1_x2_length;
+ AddBitcodeToModule(builtins_bitcode_avx1_x2,
+ builtins_bitcode_avx1_x2_length,
+ module, symbolTable);
+ break;
+ default:
+ FATAL("logic error in DefineStdlib");
+ }
+ break;
+ case Target::AVX2:
+ switch (g->target.vectorWidth) {
+ case 8:
+ extern unsigned char builtins_bitcode_avx2[];
+ extern int builtins_bitcode_avx2_length;
+ AddBitcodeToModule(builtins_bitcode_avx2,
+ builtins_bitcode_avx2_length,
+ module, symbolTable);
+ break;
+ case 16:
+ extern unsigned char builtins_bitcode_avx2_x2[];
+ extern int builtins_bitcode_avx2_x2_length;
+ AddBitcodeToModule(builtins_bitcode_avx2_x2,
+ builtins_bitcode_avx2_x2_length,
module, symbolTable);
break;
default:
diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll
index 53f57c88..c1979e30 100644
--- a/builtins/target-avx-x2.ll
+++ b/builtins/target-avx-x2.ll
@@ -170,33 +170,6 @@ define <16 x float> @__min_varying_float(<16 x float>,
}
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; int min/max
-
-define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
- binary4to16(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
- ret <16 x i32> %ret
-}
-
-define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
- binary4to16(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
- ret <16 x i32> %ret
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; unsigned int min/max
-
-define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
- binary4to16(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
- ret <16 x i32> %ret
-}
-
-define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
- binary4to16(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
- ret <16 x i32> %ret
-}
-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; horizontal ops
@@ -622,12 +595,7 @@ define void @__masked_store_blend_64(<16 x i64>* nocapture %ptr, <16 x i64> %new
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; gather/scatter
-
-gen_gather(16, i8)
-gen_gather(16, i16)
-gen_gather(16, i32)
-gen_gather(16, i64)
+;; scatter
gen_scatter(16, i8)
gen_scatter(16, i16)
diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll
index b86ca712..53659b7c 100644
--- a/builtins/target-avx.ll
+++ b/builtins/target-avx.ll
@@ -170,33 +170,6 @@ define <8 x float> @__min_varying_float(<8 x float>,
}
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; int min/max
-
-define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
- binary4to8(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
- ret <8 x i32> %ret
-}
-
-define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
- binary4to8(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
- ret <8 x i32> %ret
-}
-
-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; unsigned int min/max
-
-define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
- binary4to8(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
- ret <8 x i32> %ret
-}
-
-define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
- binary4to8(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
- ret <8 x i32> %ret
-}
-
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
; horizontal ops
@@ -238,7 +211,7 @@ reduce_equal(8)
;; horizontal int32 ops
define <8 x i32> @__add_varying_int32(<8 x i32>,
- <8 x i32>) nounwind readnone alwaysinline {
+ <8 x i32>) nounwind readnone alwaysinline {
%s = add <8 x i32> %0, %1
ret <8 x i32> %s
}
@@ -314,7 +287,7 @@ define double @__reduce_max_double(<8 x double>) nounwind readnone alwaysinline
;; horizontal int64 ops
define <8 x i64> @__add_varying_int64(<8 x i64>,
- <8 x i64>) nounwind readnone alwaysinline {
+ <8 x i64>) nounwind readnone alwaysinline {
%s = add <8 x i64> %0, %1
ret <8 x i64> %s
}
@@ -403,9 +376,6 @@ define <8 x i64> @__masked_load_64(i8 *, <8 x i32> %mask) nounwind alwaysinline
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; masked store
-; FIXME: there is no AVX instruction for these, but we could be clever
-; by packing the bits down and setting the last 3/4 or half, respectively,
-; of the mask to zero... Not sure if this would be a win in the end
gen_masked_store(8, i8, 8)
gen_masked_store(8, i16, 16)
@@ -520,12 +490,7 @@ define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new,
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; gather/scatter
-
-gen_gather(8, i8)
-gen_gather(8, i16)
-gen_gather(8, i32)
-gen_gather(8, i64)
+;; scatter
gen_scatter(8, i8)
gen_scatter(8, i16)
diff --git a/builtins/target-avx1-x2.ll b/builtins/target-avx1-x2.ll
new file mode 100644
index 00000000..d05da95f
--- /dev/null
+++ b/builtins/target-avx1-x2.ll
@@ -0,0 +1,69 @@
+;; Copyright (c) 2010-2011, Intel Corporation
+;; All rights reserved.
+;;
+;; Redistribution and use in source and binary forms, with or without
+;; modification, are permitted provided that the following conditions are
+;; met:
+;;
+;; * Redistributions of source code must retain the above copyright
+;; notice, this list of conditions and the following disclaimer.
+;;
+;; * Redistributions in binary form must reproduce the above copyright
+;; notice, this list of conditions and the following disclaimer in the
+;; documentation and/or other materials provided with the distribution.
+;;
+;; * Neither the name of Intel Corporation nor the names of its
+;; contributors may be used to endorse or promote products derived from
+;; this software without specific prior written permission.
+;;
+;;
+;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(`target-avx-x2.ll')
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; int min/max
+
+define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
+ binary4to16(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
+ ret <16 x i32> %ret
+}
+
+define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
+ binary4to16(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
+ ret <16 x i32> %ret
+}
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; unsigned int min/max
+
+define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
+ binary4to16(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
+ ret <16 x i32> %ret
+}
+
+define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
+ binary4to16(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
+ ret <16 x i32> %ret
+}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; gather
+
+gen_gather(16, i8)
+gen_gather(16, i16)
+gen_gather(16, i32)
+gen_gather(16, i64)
+
+
diff --git a/builtins/target-avx1.ll b/builtins/target-avx1.ll
new file mode 100644
index 00000000..137ddf00
--- /dev/null
+++ b/builtins/target-avx1.ll
@@ -0,0 +1,70 @@
+;; Copyright (c) 2010-2011, Intel Corporation
+;; All rights reserved.
+;;
+;; Redistribution and use in source and binary forms, with or without
+;; modification, are permitted provided that the following conditions are
+;; met:
+;;
+;; * Redistributions of source code must retain the above copyright
+;; notice, this list of conditions and the following disclaimer.
+;;
+;; * Redistributions in binary form must reproduce the above copyright
+;; notice, this list of conditions and the following disclaimer in the
+;; documentation and/or other materials provided with the distribution.
+;;
+;; * Neither the name of Intel Corporation nor the names of its
+;; contributors may be used to endorse or promote products derived from
+;; this software without specific prior written permission.
+;;
+;;
+;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(`target-avx.ll')
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; int min/max
+
+define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
+ binary4to8(ret, i32, @llvm.x86.sse41.pminsd, %0, %1)
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
+ binary4to8(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1)
+ ret <8 x i32> %ret
+}
+
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; unsigned int min/max
+
+define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
+ binary4to8(ret, i32, @llvm.x86.sse41.pminud, %0, %1)
+ ret <8 x i32> %ret
+}
+
+define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
+ binary4to8(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1)
+ ret <8 x i32> %ret
+}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; gather
+
+gen_gather(8, i8)
+gen_gather(8, i16)
+gen_gather(8, i32)
+gen_gather(8, i64)
+
+
+
diff --git a/builtins/target-avx2-x2.ll b/builtins/target-avx2-x2.ll
new file mode 100644
index 00000000..fa4f345f
--- /dev/null
+++ b/builtins/target-avx2-x2.ll
@@ -0,0 +1,74 @@
+;; Copyright (c) 2010-2011, Intel Corporation
+;; All rights reserved.
+;;
+;; Redistribution and use in source and binary forms, with or without
+;; modification, are permitted provided that the following conditions are
+;; met:
+;;
+;; * Redistributions of source code must retain the above copyright
+;; notice, this list of conditions and the following disclaimer.
+;;
+;; * Redistributions in binary form must reproduce the above copyright
+;; notice, this list of conditions and the following disclaimer in the
+;; documentation and/or other materials provided with the distribution.
+;;
+;; * Neither the name of Intel Corporation nor the names of its
+;; contributors may be used to endorse or promote products derived from
+;; this software without specific prior written permission.
+;;
+;;
+;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(`target-avx-x2.ll')
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; int min/max
+
+declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readonly
+declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readonly
+
+define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
+ binary8to16(m, i32, @llvm.x86.avx2.pmins.d, %0, %1)
+ ret <16 x i32> %m
+}
+
+define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
+ binary8to16(m, i32, @llvm.x86.avx2.pmaxs.d, %0, %1)
+ ret <16 x i32> %m
+}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; unsigned int min/max
+
+declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readonly
+declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readonly
+
+define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
+ binary8to16(m, i32, @llvm.x86.avx2.pminu.d, %0, %1)
+ ret <16 x i32> %m
+}
+
+define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline {
+ binary8to16(m, i32, @llvm.x86.avx2.pmaxu.d, %0, %1)
+ ret <16 x i32> %m
+}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; gather
+
+gen_gather(16, i8)
+gen_gather(16, i16)
+gen_gather(16, i32)
+gen_gather(16, i64)
+
+
diff --git a/builtins/target-avx2.ll b/builtins/target-avx2.ll
new file mode 100644
index 00000000..c812ede1
--- /dev/null
+++ b/builtins/target-avx2.ll
@@ -0,0 +1,75 @@
+;; Copyright (c) 2010-2011, Intel Corporation
+;; All rights reserved.
+;;
+;; Redistribution and use in source and binary forms, with or without
+;; modification, are permitted provided that the following conditions are
+;; met:
+;;
+;; * Redistributions of source code must retain the above copyright
+;; notice, this list of conditions and the following disclaimer.
+;;
+;; * Redistributions in binary form must reproduce the above copyright
+;; notice, this list of conditions and the following disclaimer in the
+;; documentation and/or other materials provided with the distribution.
+;;
+;; * Neither the name of Intel Corporation nor the names of its
+;; contributors may be used to endorse or promote products derived from
+;; this software without specific prior written permission.
+;;
+;;
+;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
+;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
+;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
+;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
+;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
+;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
+;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
+;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+include(`target-avx.ll')
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; int min/max
+
+declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readonly
+declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readonly
+
+define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
+ %m = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %0, <8 x i32> %1)
+ ret <8 x i32> %m
+}
+
+define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
+ %m = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %0, <8 x i32> %1)
+ ret <8 x i32> %m
+}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; unsigned int min/max
+
+declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readonly
+declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readonly
+
+define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
+ %m = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %0, <8 x i32> %1)
+ ret <8 x i32> %m
+}
+
+define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline {
+ %m = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %0, <8 x i32> %1)
+ ret <8 x i32> %m
+}
+
+;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
+;; gather
+
+gen_gather(8, i8)
+gen_gather(8, i16)
+gen_gather(8, i32)
+gen_gather(8, i64)
+
+
+
diff --git a/ispc.vcxproj b/ispc.vcxproj
index fc65004b..38457518 100755
--- a/ispc.vcxproj
+++ b/ispc.vcxproj
@@ -18,8 +18,10 @@
-
-
+
+
+
+
@@ -158,29 +160,55 @@
-
+
Document
- m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx.ll | python bitcode2cpp.py builtins\target-avx.ll > gen-bitcode-avx.cpp
- gen-bitcode-avx.cpp
- builtins\util.m4;builtins\target-avx-common.ll
- m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx.ll | python bitcode2cpp.py builtins\target-avx.ll > gen-bitcode-avx.cpp
- gen-bitcode-avx.cpp
- builtins\util.m4;builtins\target-avx-common.ll
- Building gen-bitcode-avx.cpp
- Building gen-bitcode-avx.cpp
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp
+ gen-bitcode-avx1.cpp
+ builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp
+ gen-bitcode-avx1.cpp
+ builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll
+ Building gen-bitcode-avx1.cpp
+ Building gen-bitcode-avx1.cpp
-
+
Document
- m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx-x2.ll | python bitcode2cpp.py builtins\target-avx-x2.ll > gen-bitcode-avx-x2.cpp
- gen-bitcode-avx-x2.cpp
- builtins\util.m4;builtins\target-avx-common.ll
- m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx-x2.ll | python bitcode2cpp.py builtins\target-avx-x2.ll > gen-bitcode-avx-x2.cpp
- gen-bitcode-avx-x2.cpp
- builtins\util.m4;builtins\target-avx-common.ll
- Building gen-bitcode-avx-x2.cpp
- Building gen-bitcode-avx-x2.cpp
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp
+ gen-bitcode-avx1-x2.cpp
+ builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp
+ gen-bitcode-avx1-x2.cpp
+ builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll
+ Building gen-bitcode-avx1-x2.cpp
+ Building gen-bitcode-avx1-x2.cpp
+
+
+
+
+ Document
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp
+ gen-bitcode-avx2.cpp
+ builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp
+ gen-bitcode-avx2.cpp
+ builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll
+ Building gen-bitcode-avx2.cpp
+ Building gen-bitcode-avx2.cpp
+
+
+
+
+ Document
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp
+ gen-bitcode-avx2-x2.cpp
+ builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll
+ m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp
+ gen-bitcode-avx2-x2.cpp
+ builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll
+ Building gen-bitcode-avx2-x2.cpp
+ Building gen-bitcode-avx2-x2.cpp