;; Copyright (c) 2010-2012, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without ;; modification, are permitted provided that the following conditions are ;; met: ;; ;; * Redistributions of source code must retain the above copyright ;; notice, this list of conditions and the following disclaimer. ;; ;; * Redistributions in binary form must reproduce the above copyright ;; notice, this list of conditions and the following disclaimer in the ;; documentation and/or other materials provided with the distribution. ;; ;; * Neither the name of Intel Corporation nor the names of its ;; contributors may be used to endorse or promote products derived from ;; this software without specific prior written permission. ;; ;; ;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS ;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED ;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A ;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER ;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, ;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, ;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR ;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF ;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING ;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS ;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. target datalayout = "e-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-v64:64:64-v128:128:128-a0:0:64-s0:64:64-f80:128:128-n8:16:32:64-S128-v16:16:16-v32:32:32-v4:128:128"; define(`MASK',`i1') define(`HAVE_GATHER',`1') define(`HAVE_SCATTER',`1') include(`util.m4') stdlib_core() scans() reduce_equal(WIDTH) rdrand_decls() ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; broadcast/rotate/shuffle declare @__smear_float(float) nounwind readnone declare @__smear_double(double) nounwind readnone declare @__smear_i8(i8) nounwind readnone declare @__smear_i16(i16) nounwind readnone declare @__smear_i32(i32) nounwind readnone declare @__smear_i64(i64) nounwind readnone declare @__setzero_float() nounwind readnone declare @__setzero_double() nounwind readnone declare @__setzero_i8() nounwind readnone declare @__setzero_i16() nounwind readnone declare @__setzero_i32() nounwind readnone declare @__setzero_i64() nounwind readnone declare @__undef_float() nounwind readnone declare @__undef_double() nounwind readnone declare @__undef_i8() nounwind readnone declare @__undef_i16() nounwind readnone declare @__undef_i32() nounwind readnone declare @__undef_i64() nounwind readnone declare @__broadcast_float(, i32) nounwind readnone declare @__broadcast_double(, i32) nounwind readnone declare @__broadcast_i8(, i32) nounwind readnone declare @__broadcast_i16(, i32) nounwind readnone declare @__broadcast_i32(, i32) nounwind readnone declare @__broadcast_i64(, i32) nounwind readnone declare @__rotate_i8(, i32) nounwind readnone declare @__rotate_i16(, i32) nounwind readnone declare @__rotate_float(, i32) nounwind readnone declare @__rotate_i32(, i32) nounwind readnone declare @__rotate_double(, i32) nounwind readnone declare @__rotate_i64(, i32) nounwind readnone declare @__shuffle_i8(, ) nounwind readnone declare @__shuffle2_i8(, , ) nounwind readnone declare @__shuffle_i16(, ) nounwind readnone declare @__shuffle2_i16(, , ) nounwind readnone declare @__shuffle_float(, ) nounwind readnone declare @__shuffle2_float(, , ) nounwind readnone declare @__shuffle_i32(, ) nounwind readnone declare @__shuffle2_i32(, , ) nounwind readnone declare @__shuffle_double(, ) nounwind readnone declare @__shuffle2_double(, , ) nounwind readnone declare @__shuffle_i64(, ) nounwind readnone declare @__shuffle2_i64(, , ) nounwind readnone ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; aos/soa declare void @__soa_to_aos3_float( %v0, %v1, %v2, float * noalias %p) nounwind declare void @__aos_to_soa3_float(float * noalias %p, * %out0, * %out1, * %out2) nounwind declare void @__soa_to_aos4_float( %v0, %v1, %v2, %v3, float * noalias %p) nounwind declare void @__aos_to_soa4_float(float * noalias %p, * noalias %out0, * noalias %out1, * noalias %out2, * noalias %out3) nounwind ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; half conversion routines declare float @__half_to_float_uniform(i16 %v) nounwind readnone declare @__half_to_float_varying( %v) nounwind readnone declare i16 @__float_to_half_uniform(float %v) nounwind readnone declare @__float_to_half_varying( %v) nounwind readnone ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; math declare void @__fastmath() nounwind ;; round/floor/ceil declare float @__round_uniform_float(float) nounwind readnone declare float @__floor_uniform_float(float) nounwind readnone declare float @__ceil_uniform_float(float) nounwind readnone declare double @__round_uniform_double(double) nounwind readnone declare double @__floor_uniform_double(double) nounwind readnone declare double @__ceil_uniform_double(double) nounwind readnone declare @__round_varying_float() nounwind readnone declare @__floor_varying_float() nounwind readnone declare @__ceil_varying_float() nounwind readnone declare @__round_varying_double() nounwind readnone declare @__floor_varying_double() nounwind readnone declare @__ceil_varying_double() nounwind readnone ;; min/max declare float @__max_uniform_float(float, float) nounwind readnone declare float @__min_uniform_float(float, float) nounwind readnone declare i32 @__min_uniform_int32(i32, i32) nounwind readnone declare i32 @__max_uniform_int32(i32, i32) nounwind readnone declare i32 @__min_uniform_uint32(i32, i32) nounwind readnone declare i32 @__max_uniform_uint32(i32, i32) nounwind readnone declare i64 @__min_uniform_int64(i64, i64) nounwind readnone declare i64 @__max_uniform_int64(i64, i64) nounwind readnone declare i64 @__min_uniform_uint64(i64, i64) nounwind readnone declare i64 @__max_uniform_uint64(i64, i64) nounwind readnone declare double @__min_uniform_double(double, double) nounwind readnone declare double @__max_uniform_double(double, double) nounwind readnone declare @__max_varying_float(, ) nounwind readnone declare @__min_varying_float(, ) nounwind readnone declare @__min_varying_int32(, ) nounwind readnone declare @__max_varying_int32(, ) nounwind readnone declare @__min_varying_uint32(, ) nounwind readnone declare @__max_varying_uint32(, ) nounwind readnone declare @__min_varying_int64(, ) nounwind readnone declare @__max_varying_int64(, ) nounwind readnone declare @__min_varying_uint64(, ) nounwind readnone declare @__max_varying_uint64(, ) nounwind readnone declare @__min_varying_double(, ) nounwind readnone declare @__max_varying_double(, ) nounwind readnone ;; sqrt/rsqrt/rcp declare float @__rsqrt_uniform_float(float) nounwind readnone declare float @__rcp_uniform_float(float) nounwind readnone declare float @__sqrt_uniform_float(float) nounwind readnone declare @__rcp_varying_float() nounwind readnone declare @__rsqrt_varying_float() nounwind readnone declare @__sqrt_varying_float() nounwind readnone declare double @__sqrt_uniform_double(double) nounwind readnone declare @__sqrt_varying_double() nounwind readnone ;; bit ops declare i32 @__popcnt_int32(i32) nounwind readnone declare i64 @__popcnt_int64(i64) nounwind readnone declare i32 @__count_trailing_zeros_i32(i32) nounwind readnone declare i64 @__count_trailing_zeros_i64(i64) nounwind readnone declare i32 @__count_leading_zeros_i32(i32) nounwind readnone declare i64 @__count_leading_zeros_i64(i64) nounwind readnone ; FIXME: need either to wire these up to the 8-wide SVML entrypoints, ; or, use the macro to call the 4-wide ones twice with our 8-wide ; vectors... ;; svml include(`svml.m4') svml_stubs(float,f,WIDTH) svml_stubs(double,d,WIDTH) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; reductions declare i64 @__movmsk() nounwind readnone declare i1 @__any() nounwind readnone declare i1 @__all() nounwind readnone declare i1 @__none() nounwind readnone declare i16 @__reduce_add_int8() nounwind readnone declare i32 @__reduce_add_int16() nounwind readnone declare float @__reduce_add_float() nounwind readnone declare float @__reduce_min_float() nounwind readnone declare float @__reduce_max_float() nounwind readnone declare i64 @__reduce_add_int32() nounwind readnone declare i32 @__reduce_min_int32() nounwind readnone declare i32 @__reduce_max_int32() nounwind readnone declare i32 @__reduce_min_uint32() nounwind readnone declare i32 @__reduce_max_uint32() nounwind readnone declare double @__reduce_add_double() nounwind readnone declare double @__reduce_min_double() nounwind readnone declare double @__reduce_max_double() nounwind readnone declare i64 @__reduce_add_int64() nounwind readnone declare i64 @__reduce_min_int64() nounwind readnone declare i64 @__reduce_max_int64() nounwind readnone declare i64 @__reduce_min_uint64() nounwind readnone declare i64 @__reduce_max_uint64() nounwind readnone ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; unaligned loads/loads+broadcasts declare @__masked_load_i8(i8 * nocapture, %mask) nounwind readonly declare @__masked_load_i16(i8 * nocapture, %mask) nounwind readonly declare @__masked_load_i32(i8 * nocapture, %mask) nounwind readonly declare @__masked_load_float(i8 * nocapture, %mask) nounwind readonly declare @__masked_load_i64(i8 * nocapture, %mask) nounwind readonly declare @__masked_load_double(i8 * nocapture, %mask) nounwind readonly declare void @__masked_store_i8(* nocapture, , ) nounwind declare void @__masked_store_i16(* nocapture, , ) nounwind declare void @__masked_store_i32(* nocapture, , ) nounwind declare void @__masked_store_float(* nocapture, , ) nounwind declare void @__masked_store_i64(* nocapture, , %mask) nounwind declare void @__masked_store_double(* nocapture, , %mask) nounwind ifelse(LLVM_VERSION, `LLVM_3_0', ` declare void @__masked_store_blend_i8(* nocapture, , ) nounwind declare void @__masked_store_blend_i16(* nocapture, , ) nounwind declare void @__masked_store_blend_i32(* nocapture, , ) nounwind declare void @__masked_store_blend_float(* nocapture, , ) nounwind declare void @__masked_store_blend_i64(* nocapture, , %mask) nounwind declare void @__masked_store_blend_double(* nocapture, , %mask) nounwind ', ` define void @__masked_store_blend_i8(* nocapture, , ) nounwind alwaysinline { %v = load * %0 %v1 = select %2, %1, %v store %v1, * %0 ret void } define void @__masked_store_blend_i16(* nocapture, , ) nounwind alwaysinline { %v = load * %0 %v1 = select %2, %1, %v store %v1, * %0 ret void } define void @__masked_store_blend_i32(* nocapture, , ) nounwind alwaysinline { %v = load * %0 %v1 = select %2, %1, %v store %v1, * %0 ret void } define void @__masked_store_blend_float(* nocapture, , ) nounwind alwaysinline { %v = load * %0 %v1 = select %2, %1, %v store %v1, * %0 ret void } define void @__masked_store_blend_i64(* nocapture, , ) nounwind alwaysinline { %v = load * %0 %v1 = select %2, %1, %v store %v1, * %0 ret void } define void @__masked_store_blend_double(* nocapture, , ) nounwind alwaysinline { %v = load * %0 %v1 = select %2, %1, %v store %v1, * %0 ret void } ') ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; gather/scatter define(`gather_scatter', ` declare @__gather_base_offsets32_$1(i8 * nocapture, i32, , ) nounwind readonly declare @__gather_base_offsets64_$1(i8 * nocapture, i32, , ) nounwind readonly declare @__gather32_$1(, ) nounwind readonly declare @__gather64_$1(, ) nounwind readonly declare void @__scatter_base_offsets32_$1(i8* nocapture, i32, , , ) nounwind declare void @__scatter_base_offsets64_$1(i8* nocapture, i32, , , ) nounwind declare void @__scatter32_$1(, , ) nounwind declare void @__scatter64_$1(, , ) nounwind ') gather_scatter(i8) gather_scatter(i16) gather_scatter(i32) gather_scatter(float) gather_scatter(i64) gather_scatter(double) declare i32 @__packed_load_active(i32 * nocapture, * nocapture, ) nounwind declare i32 @__packed_store_active(i32 * nocapture, %vals, ) nounwind ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; prefetch declare void @__prefetch_read_uniform_1(i8 * nocapture) nounwind declare void @__prefetch_read_uniform_2(i8 * nocapture) nounwind declare void @__prefetch_read_uniform_3(i8 * nocapture) nounwind declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; int8/int16 builtins define_avgs()