From 58a0b4a20d1895d5dcfc97aaed4d63f045d7dc5f Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Fri, 13 Jan 2012 14:39:33 -0800 Subject: [PATCH] Add separate set of builtins for AVX2. (i.e., stop just reusing the ones for AVX1). For now the only difference is that the int/uint min/max functions call the new intrinsic for that. Once gather is available from LLVM, that will go here as well. --- Makefile | 3 +- builtins.cpp | 41 ++++++++++++++++----- builtins/target-avx-x2.ll | 34 +---------------- builtins/target-avx.ll | 41 ++------------------- builtins/target-avx1-x2.ll | 69 +++++++++++++++++++++++++++++++++++ builtins/target-avx1.ll | 70 +++++++++++++++++++++++++++++++++++ builtins/target-avx2-x2.ll | 74 +++++++++++++++++++++++++++++++++++++ builtins/target-avx2.ll | 75 ++++++++++++++++++++++++++++++++++++++ ispc.vcxproj | 68 ++++++++++++++++++++++++---------- 9 files changed, 374 insertions(+), 101 deletions(-) create mode 100644 builtins/target-avx1-x2.ll create mode 100644 builtins/target-avx1.ll create mode 100644 builtins/target-avx2-x2.ll create mode 100644 builtins/target-avx2.ll diff --git a/Makefile b/Makefile index f1e3ad78..b83714c9 100644 --- a/Makefile +++ b/Makefile @@ -71,7 +71,8 @@ CXX_SRC=ast.cpp builtins.cpp cbackend.cpp ctx.cpp decl.cpp expr.cpp func.cpp \ type.cpp util.cpp HEADERS=ast.h builtins.h ctx.h decl.h expr.h func.h ispc.h llvmutil.h module.h \ opt.h stmt.h sym.h type.h util.h -TARGETS=avx avx-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 generic-16 +TARGETS=avx1 avx1-x2 avx2 avx2-x2 sse2 sse2-x2 sse4 sse4-x2 generic-4 generic-8 \ + generic-16 BUILTINS_SRC=$(addprefix builtins/target-, $(addsuffix .ll, $(TARGETS))) \ builtins/dispatch.ll BUILTINS_OBJS=$(addprefix builtins-, $(notdir $(BUILTINS_SRC:.ll=.o))) \ diff --git a/builtins.cpp b/builtins.cpp index dce7c9fa..93d5c20c 100644 --- a/builtins.cpp +++ b/builtins.cpp @@ -717,11 +717,13 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod extern int builtins_bitcode_sse4_x2_length; switch (g->target.vectorWidth) { case 4: - AddBitcodeToModule(builtins_bitcode_sse4, builtins_bitcode_sse4_length, + AddBitcodeToModule(builtins_bitcode_sse4, + builtins_bitcode_sse4_length, module, symbolTable); break; case 8: - AddBitcodeToModule(builtins_bitcode_sse4_x2, builtins_bitcode_sse4_x2_length, + AddBitcodeToModule(builtins_bitcode_sse4_x2, + builtins_bitcode_sse4_x2_length, module, symbolTable); break; default: @@ -729,18 +731,39 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod } break; case Target::AVX: - case Target::AVX2: switch (g->target.vectorWidth) { case 8: - extern unsigned char builtins_bitcode_avx[]; - extern int builtins_bitcode_avx_length; - AddBitcodeToModule(builtins_bitcode_avx, builtins_bitcode_avx_length, + extern unsigned char builtins_bitcode_avx1[]; + extern int builtins_bitcode_avx1_length; + AddBitcodeToModule(builtins_bitcode_avx1, + builtins_bitcode_avx1_length, module, symbolTable); break; case 16: - extern unsigned char builtins_bitcode_avx_x2[]; - extern int builtins_bitcode_avx_x2_length; - AddBitcodeToModule(builtins_bitcode_avx_x2, builtins_bitcode_avx_x2_length, + extern unsigned char builtins_bitcode_avx1_x2[]; + extern int builtins_bitcode_avx1_x2_length; + AddBitcodeToModule(builtins_bitcode_avx1_x2, + builtins_bitcode_avx1_x2_length, + module, symbolTable); + break; + default: + FATAL("logic error in DefineStdlib"); + } + break; + case Target::AVX2: + switch (g->target.vectorWidth) { + case 8: + extern unsigned char builtins_bitcode_avx2[]; + extern int builtins_bitcode_avx2_length; + AddBitcodeToModule(builtins_bitcode_avx2, + builtins_bitcode_avx2_length, + module, symbolTable); + break; + case 16: + extern unsigned char builtins_bitcode_avx2_x2[]; + extern int builtins_bitcode_avx2_x2_length; + AddBitcodeToModule(builtins_bitcode_avx2_x2, + builtins_bitcode_avx2_x2_length, module, symbolTable); break; default: diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll index 53f57c88..c1979e30 100644 --- a/builtins/target-avx-x2.ll +++ b/builtins/target-avx-x2.ll @@ -170,33 +170,6 @@ define <16 x float> @__min_varying_float(<16 x float>, } -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; int min/max - -define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { - binary4to16(ret, i32, @llvm.x86.sse41.pminsd, %0, %1) - ret <16 x i32> %ret -} - -define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { - binary4to16(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1) - ret <16 x i32> %ret -} - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; unsigned int min/max - -define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { - binary4to16(ret, i32, @llvm.x86.sse41.pminud, %0, %1) - ret <16 x i32> %ret -} - -define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { - binary4to16(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1) - ret <16 x i32> %ret -} - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; horizontal ops @@ -622,12 +595,7 @@ define void @__masked_store_blend_64(<16 x i64>* nocapture %ptr, <16 x i64> %new } ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; gather/scatter - -gen_gather(16, i8) -gen_gather(16, i16) -gen_gather(16, i32) -gen_gather(16, i64) +;; scatter gen_scatter(16, i8) gen_scatter(16, i16) diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll index b86ca712..53659b7c 100644 --- a/builtins/target-avx.ll +++ b/builtins/target-avx.ll @@ -170,33 +170,6 @@ define <8 x float> @__min_varying_float(<8 x float>, } -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; int min/max - -define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { - binary4to8(ret, i32, @llvm.x86.sse41.pminsd, %0, %1) - ret <8 x i32> %ret -} - -define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { - binary4to8(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1) - ret <8 x i32> %ret -} - - -;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; unsigned int min/max - -define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { - binary4to8(ret, i32, @llvm.x86.sse41.pminud, %0, %1) - ret <8 x i32> %ret -} - -define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { - binary4to8(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1) - ret <8 x i32> %ret -} - ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; horizontal ops @@ -238,7 +211,7 @@ reduce_equal(8) ;; horizontal int32 ops define <8 x i32> @__add_varying_int32(<8 x i32>, - <8 x i32>) nounwind readnone alwaysinline { + <8 x i32>) nounwind readnone alwaysinline { %s = add <8 x i32> %0, %1 ret <8 x i32> %s } @@ -314,7 +287,7 @@ define double @__reduce_max_double(<8 x double>) nounwind readnone alwaysinline ;; horizontal int64 ops define <8 x i64> @__add_varying_int64(<8 x i64>, - <8 x i64>) nounwind readnone alwaysinline { + <8 x i64>) nounwind readnone alwaysinline { %s = add <8 x i64> %0, %1 ret <8 x i64> %s } @@ -403,9 +376,6 @@ define <8 x i64> @__masked_load_64(i8 *, <8 x i32> %mask) nounwind alwaysinline ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; masked store -; FIXME: there is no AVX instruction for these, but we could be clever -; by packing the bits down and setting the last 3/4 or half, respectively, -; of the mask to zero... Not sure if this would be a win in the end gen_masked_store(8, i8, 8) gen_masked_store(8, i16, 16) @@ -520,12 +490,7 @@ define void @__masked_store_blend_64(<8 x i64>* nocapture %ptr, <8 x i64> %new, ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; -;; gather/scatter - -gen_gather(8, i8) -gen_gather(8, i16) -gen_gather(8, i32) -gen_gather(8, i64) +;; scatter gen_scatter(8, i8) gen_scatter(8, i16) diff --git a/builtins/target-avx1-x2.ll b/builtins/target-avx1-x2.ll new file mode 100644 index 00000000..d05da95f --- /dev/null +++ b/builtins/target-avx1-x2.ll @@ -0,0 +1,69 @@ +;; Copyright (c) 2010-2011, Intel Corporation +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are +;; met: +;; +;; * Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; * Neither the name of Intel Corporation nor the names of its +;; contributors may be used to endorse or promote products derived from +;; this software without specific prior written permission. +;; +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include(`target-avx-x2.ll') + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; int min/max + +define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary4to16(ret, i32, @llvm.x86.sse41.pminsd, %0, %1) + ret <16 x i32> %ret +} + +define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary4to16(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1) + ret <16 x i32> %ret +} + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; unsigned int min/max + +define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary4to16(ret, i32, @llvm.x86.sse41.pminud, %0, %1) + ret <16 x i32> %ret +} + +define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary4to16(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1) + ret <16 x i32> %ret +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; gather + +gen_gather(16, i8) +gen_gather(16, i16) +gen_gather(16, i32) +gen_gather(16, i64) + + diff --git a/builtins/target-avx1.ll b/builtins/target-avx1.ll new file mode 100644 index 00000000..137ddf00 --- /dev/null +++ b/builtins/target-avx1.ll @@ -0,0 +1,70 @@ +;; Copyright (c) 2010-2011, Intel Corporation +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are +;; met: +;; +;; * Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; * Neither the name of Intel Corporation nor the names of its +;; contributors may be used to endorse or promote products derived from +;; this software without specific prior written permission. +;; +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include(`target-avx.ll') + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; int min/max + +define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { + binary4to8(ret, i32, @llvm.x86.sse41.pminsd, %0, %1) + ret <8 x i32> %ret +} + +define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { + binary4to8(ret, i32, @llvm.x86.sse41.pmaxsd, %0, %1) + ret <8 x i32> %ret +} + + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; unsigned int min/max + +define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { + binary4to8(ret, i32, @llvm.x86.sse41.pminud, %0, %1) + ret <8 x i32> %ret +} + +define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { + binary4to8(ret, i32, @llvm.x86.sse41.pmaxud, %0, %1) + ret <8 x i32> %ret +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; gather + +gen_gather(8, i8) +gen_gather(8, i16) +gen_gather(8, i32) +gen_gather(8, i64) + + + diff --git a/builtins/target-avx2-x2.ll b/builtins/target-avx2-x2.ll new file mode 100644 index 00000000..fa4f345f --- /dev/null +++ b/builtins/target-avx2-x2.ll @@ -0,0 +1,74 @@ +;; Copyright (c) 2010-2011, Intel Corporation +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are +;; met: +;; +;; * Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; * Neither the name of Intel Corporation nor the names of its +;; contributors may be used to endorse or promote products derived from +;; this software without specific prior written permission. +;; +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include(`target-avx-x2.ll') + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; int min/max + +declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readonly +declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readonly + +define <16 x i32> @__min_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary8to16(m, i32, @llvm.x86.avx2.pmins.d, %0, %1) + ret <16 x i32> %m +} + +define <16 x i32> @__max_varying_int32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary8to16(m, i32, @llvm.x86.avx2.pmaxs.d, %0, %1) + ret <16 x i32> %m +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; unsigned int min/max + +declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readonly +declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readonly + +define <16 x i32> @__min_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary8to16(m, i32, @llvm.x86.avx2.pminu.d, %0, %1) + ret <16 x i32> %m +} + +define <16 x i32> @__max_varying_uint32(<16 x i32>, <16 x i32>) nounwind readonly alwaysinline { + binary8to16(m, i32, @llvm.x86.avx2.pmaxu.d, %0, %1) + ret <16 x i32> %m +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; gather + +gen_gather(16, i8) +gen_gather(16, i16) +gen_gather(16, i32) +gen_gather(16, i64) + + diff --git a/builtins/target-avx2.ll b/builtins/target-avx2.ll new file mode 100644 index 00000000..c812ede1 --- /dev/null +++ b/builtins/target-avx2.ll @@ -0,0 +1,75 @@ +;; Copyright (c) 2010-2011, Intel Corporation +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are +;; met: +;; +;; * Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; * Neither the name of Intel Corporation nor the names of its +;; contributors may be used to endorse or promote products derived from +;; this software without specific prior written permission. +;; +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +include(`target-avx.ll') + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; int min/max + +declare <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32>, <8 x i32>) nounwind readonly +declare <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32>, <8 x i32>) nounwind readonly + +define <8 x i32> @__min_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { + %m = call <8 x i32> @llvm.x86.avx2.pmins.d(<8 x i32> %0, <8 x i32> %1) + ret <8 x i32> %m +} + +define <8 x i32> @__max_varying_int32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { + %m = call <8 x i32> @llvm.x86.avx2.pmaxs.d(<8 x i32> %0, <8 x i32> %1) + ret <8 x i32> %m +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; unsigned int min/max + +declare <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32>, <8 x i32>) nounwind readonly +declare <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32>, <8 x i32>) nounwind readonly + +define <8 x i32> @__min_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { + %m = call <8 x i32> @llvm.x86.avx2.pminu.d(<8 x i32> %0, <8 x i32> %1) + ret <8 x i32> %m +} + +define <8 x i32> @__max_varying_uint32(<8 x i32>, <8 x i32>) nounwind readonly alwaysinline { + %m = call <8 x i32> @llvm.x86.avx2.pmaxu.d(<8 x i32> %0, <8 x i32> %1) + ret <8 x i32> %m +} + +;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; +;; gather + +gen_gather(8, i8) +gen_gather(8, i16) +gen_gather(8, i32) +gen_gather(8, i64) + + + diff --git a/ispc.vcxproj b/ispc.vcxproj index fc65004b..38457518 100755 --- a/ispc.vcxproj +++ b/ispc.vcxproj @@ -18,8 +18,10 @@ - - + + + + @@ -158,29 +160,55 @@ - + Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx.ll | python bitcode2cpp.py builtins\target-avx.ll > gen-bitcode-avx.cpp - gen-bitcode-avx.cpp - builtins\util.m4;builtins\target-avx-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx.ll | python bitcode2cpp.py builtins\target-avx.ll > gen-bitcode-avx.cpp - gen-bitcode-avx.cpp - builtins\util.m4;builtins\target-avx-common.ll - Building gen-bitcode-avx.cpp - Building gen-bitcode-avx.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp + gen-bitcode-avx1.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1.ll | python bitcode2cpp.py builtins\target-avx1.ll > gen-bitcode-avx1.cpp + gen-bitcode-avx1.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll + Building gen-bitcode-avx1.cpp + Building gen-bitcode-avx1.cpp - + Document - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx-x2.ll | python bitcode2cpp.py builtins\target-avx-x2.ll > gen-bitcode-avx-x2.cpp - gen-bitcode-avx-x2.cpp - builtins\util.m4;builtins\target-avx-common.ll - m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx-x2.ll | python bitcode2cpp.py builtins\target-avx-x2.ll > gen-bitcode-avx-x2.cpp - gen-bitcode-avx-x2.cpp - builtins\util.m4;builtins\target-avx-common.ll - Building gen-bitcode-avx-x2.cpp - Building gen-bitcode-avx-x2.cpp + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp + gen-bitcode-avx1-x2.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx1-x2.ll | python bitcode2cpp.py builtins\target-avx1-x2.ll > gen-bitcode-avx1-x2.cpp + gen-bitcode-avx1-x2.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll + Building gen-bitcode-avx1-x2.cpp + Building gen-bitcode-avx1-x2.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp + gen-bitcode-avx2.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2.ll | python bitcode2cpp.py builtins\target-avx2.ll > gen-bitcode-avx2.cpp + gen-bitcode-avx2.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx.ll + Building gen-bitcode-avx2.cpp + Building gen-bitcode-avx2.cpp + + + + + Document + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp + gen-bitcode-avx2-x2.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\target-avx-x2.ll + m4 -Ibuiltins/ -DLLVM_VERSION=%LLVM_VERSION% builtins\target-avx2-x2.ll | python bitcode2cpp.py builtins\target-avx2-x2.ll > gen-bitcode-avx2-x2.cpp + gen-bitcode-avx2-x2.cpp + builtins\util.m4;builtins\target-avx-common.ll;builtins\targets-avx-x2.ll + Building gen-bitcode-avx2-x2.cpp + Building gen-bitcode-avx2-x2.cpp