diff --git a/.gitignore b/.gitignore index 3bec2ace..88fb0197 100644 --- a/.gitignore +++ b/.gitignore @@ -12,8 +12,5 @@ examples/*/*.png examples/*/*.ppm examples/*/objs/* *.swp -.* -!.gitignore - diff --git a/builtins/svml.m4 b/builtins/svml.m4 index 71a6a709..0a587577 100644 --- a/builtins/svml.m4 +++ b/builtins/svml.m4 @@ -1,20 +1,61 @@ -;; svml +;; copyright stub :) +;; Copyright (c) 2013, Intel Corporation +;; All rights reserved. +;; +;; Redistribution and use in source and binary forms, with or without +;; modification, are permitted provided that the following conditions are +;; met: +;; +;; * Redistributions of source code must retain the above copyright +;; notice, this list of conditions and the following disclaimer. +;; +;; * Redistributions in binary form must reproduce the above copyright +;; notice, this list of conditions and the following disclaimer in the +;; documentation and/or other materials provided with the distribution. +;; +;; * Neither the name of Intel Corporation nor the names of its +;; contributors may be used to endorse or promote products derived from +;; this software without specific prior written permission. +;; +;; +;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS +;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED +;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A +;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER +;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF +;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING +;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS +;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -;; stubs + +;; svml macro + +;; svml_stubs : stubs for svml calls +;; $1 - type ("float" or "double") +;; $2 - svml internal function suffix ("f" for float, "d" for double) +;; $3 - vector width define(`svml_stubs',` - declare <$2 x $1> @__svml_sin$3(<$2 x $1>) nounwind readnone alwaysinline - declare <$2 x $1> @__svml_asin$3(<$2 x $1>) nounwind readnone alwaysinline - declare <$2 x $1> @__svml_cos$3(<$2 x $1>) nounwind readnone alwaysinline - declare void @__svml_sincos$3(<$2 x $1>, <$2 x $1> *, <$2 x $1> *) nounwind readnone alwaysinline - declare <$2 x $1> @__svml_tan$3(<$2 x $1>) nounwind readnone alwaysinline - declare <$2 x $1> @__svml_atan$3(<$2 x $1>) nounwind readnone alwaysinline - declare <$2 x $1> @__svml_atan2$3(<$2 x $1>, <$2 x $1>) nounwind readnone alwaysinline - declare <$2 x $1> @__svml_exp$3(<$2 x $1>) nounwind readnone alwaysinline - declare <$2 x $1> @__svml_log$3(<$2 x $1>) nounwind readnone alwaysinline - declare <$2 x $1> @__svml_pow$3(<$2 x $1>, <$2 x $1>) nounwind readnone alwaysinline + declare <$3 x $1> @__svml_sin$2(<$3 x $1>) nounwind readnone alwaysinline + declare <$3 x $1> @__svml_asin$2(<$3 x $1>) nounwind readnone alwaysinline + declare <$3 x $1> @__svml_cos$2(<$3 x $1>) nounwind readnone alwaysinline + declare void @__svml_sincos$2(<$3 x $1>, <$3 x $1> *, <$3 x $1> *) nounwind readnone alwaysinline + declare <$3 x $1> @__svml_tan$2(<$3 x $1>) nounwind readnone alwaysinline + declare <$3 x $1> @__svml_atan$2(<$3 x $1>) nounwind readnone alwaysinline + declare <$3 x $1> @__svml_atan2$2(<$3 x $1>, <$3 x $1>) nounwind readnone alwaysinline + declare <$3 x $1> @__svml_exp$2(<$3 x $1>) nounwind readnone alwaysinline + declare <$3 x $1> @__svml_log$2(<$3 x $1>) nounwind readnone alwaysinline + declare <$3 x $1> @__svml_pow$2(<$3 x $1>, <$3 x $1>) nounwind readnone alwaysinline ') -;; decalre __svml calls +;; svml_declare : declaration of __svml_* intrinsics +;; $1 - type ("float" or "double") +;; $2 - __svml_* intrinsic function suffix +;; float: "f4"(sse) "f8"(avx) "f16"(avx512) +;; double: "2"(sse) "4"(avx) "8"(avx512) +;; $3 - vector width define(`svml_declare',` declare <$3 x $1> @__svml_sin$2(<$3 x $1>) nounwind readnone declare <$3 x $1> @__svml_asin$2(<$3 x $1>) nounwind readnone @@ -28,7 +69,13 @@ define(`svml_declare',` declare <$3 x $1> @__svml_pow$2(<$3 x $1>, <$3 x $1>) nounwind readnone '); -;; define native __svml calls +;; defintition of __svml_* internal functions +;; $1 - type ("float" or "double") +;; $2 - __svml_* intrinsic function suffix +;; float: "f4"(sse) "f8"(avx) "f16"(avx512) +;; double: "2"(sse) "4"(avx) "8"(avx512) +;; $3 - vector width +;; $4 - svml internal function suffix ("f" for float, "d" for double) define(`svml_define',` define <$3 x $1> @__svml_sin$4(<$3 x $1>) nounwind readnone alwaysinline { %ret = call <$3 x $1> @__svml_sin$2(<$3 x $1> %0) @@ -82,7 +129,45 @@ define(`svml_define',` ') -;; define x2 __svml calls +;; svml_define_x : defintition of __svml_* internal functions operation on extended width +;; $1 - type ("float" or "double") +;; $2 - __svml_* intrinsic function suffix +;; float: "f4"(sse) "f8"(avx) "f16"(avx512) +;; double: "2"(sse) "4"(avx) "8"(avx512) +;; $3 - vector width +;; $4 - svml internal function suffix ("f" for float, "d" for double) +;; $5 - extended width, must be at least twice the native vector width +;; contigent on existing of unary$3to$5 and binary$3to$5 macros + +;; *todo*: in sincos call use __svml_sincos[f][2,4,8,16] call, e.g. +;;define void @__svml_sincosf(<8 x float>, <8 x float> *, +;; <8 x float> *) nounwind readnone alwaysinline { +;; ; call svml_sincosf4 two times with the two 4-wide sub-vectors +;; %a = shufflevector <8 x float> %0, <8 x float> undef, +;; <4 x i32> +;; %b = shufflevector <8 x float> %0, <8 x float> undef, +;; <4 x i32> +;; +;; %cospa = alloca <4 x float> +;; %sa = call <4 x float> @__svml_sincosf4(<4 x float> * %cospa, <4 x float> %a) +;; +;; %cospb = alloca <4 x float> +;; %sb = call <4 x float> @__svml_sincosf4(<4 x float> * %cospb, <4 x float> %b) +;; +;; %sin = shufflevector <4 x float> %sa, <4 x float> %sb, +;; <8 x i32> +;; store <8 x float> %sin, <8 x float> * %1 +;; +;; %cosa = load <4 x float> * %cospa +;; %cosb = load <4 x float> * %cospb +;; %cos = shufflevector <4 x float> %cosa, <4 x float> %cosb, +;; <8 x i32> +;; store <8 x float> %cos, <8 x float> * %2 +;; +;; ret void +;;} define(`svml_define_x',` define <$5 x $1> @__svml_sin$4(<$5 x $1>) nounwind readnone alwaysinline { unary$3to$5(ret, $1, @__svml_sin$2, %0) @@ -96,7 +181,14 @@ define(`svml_define_x',` unary$3to$5(ret, $1, @__svml_cos$2, %0) ret <$5 x $1> %ret } - declare void @__svml_sincos$4(<$5 x $1>,<$5 x $1>*,<$5 x $1>*) nounwind readnone alwaysinline + define void @__svml_sincos$4(<$5 x $1>,<$5 x $1>*,<$5 x $1>*) nounwind readnone alwaysinline + { + %s = call <$5 x $1> @__svml_sin$4(<$5 x $1> %0) + %c = call <$5 x $1> @__svml_cos$4(<$5 x $1> %0) + store <$5 x $1> %s, <$5 x $1> * %1 + store <$5 x $1> %c, <$5 x $1> * %2 + ret void + } define <$5 x $1> @__svml_tan$4(<$5 x $1>) nounwind readnone alwaysinline { unary$3to$5(ret, $1, @__svml_tan$2, %0) ret <$5 x $1> %ret diff --git a/builtins/target-avx-i64x4.ll b/builtins/target-avx-i64x4.ll index d7dbb6bd..65490ea5 100644 --- a/builtins/target-avx-i64x4.ll +++ b/builtins/target-avx-i64x4.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2011, Intel Corporation +;; Copyright (c) 2013, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-avx-i64x4base.ll b/builtins/target-avx-i64x4base.ll index 05bf178d..e1832030 100644 --- a/builtins/target-avx-i64x4base.ll +++ b/builtins/target-avx-i64x4base.ll @@ -1,4 +1,4 @@ -;; Copyright (c) 2010-2012, Intel Corporation +;; Copyright (c) 2013, Intel Corporation ;; All rights reserved. ;; ;; Redistribution and use in source and binary forms, with or without diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index 30a8b030..2a5d1b32 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -209,8 +209,8 @@ declare i64 @__count_leading_zeros_i64(i64) nounwind readnone ;; svml include(`svml.m4') -svml_stubs(float, WIDTH, f) -svml_stubs(double, WIDTH, d) +svml_stubs(float,f,WIDTH) +svml_stubs(double,d,WIDTH) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; reductions diff --git a/builtins/target-neon-common.ll b/builtins/target-neon-common.ll index 92fc5ce3..1c0b421f 100644 --- a/builtins/target-neon-common.ll +++ b/builtins/target-neon-common.ll @@ -318,8 +318,8 @@ define void @__masked_store_blend_i64(* nocapture %ptr, include(`svml.m4') -svmlf_stubs(WIDTH) -svmld_stubs(WIDTH) +svml_stubs(float,f,WIDTH) +svml_stubs(double,d,WIDTH) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ;; gather diff --git a/builtins/target-sse4-16.ll b/builtins/target-sse4-16.ll index 3f8cd339..72b81ff0 100644 --- a/builtins/target-sse4-16.ll +++ b/builtins/target-sse4-16.ll @@ -210,8 +210,8 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r ; FIXME include(`svml.m4') -svml_stubs(float,8,f) -svml_stubs(double,8,d) +svml_stubs(float,f,WIDTH) +svml_stubs(double,d,WIDTH) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; horizontal ops / reductions diff --git a/builtins/target-sse4-8.ll b/builtins/target-sse4-8.ll index f43cd940..69b355e3 100644 --- a/builtins/target-sse4-8.ll +++ b/builtins/target-sse4-8.ll @@ -223,8 +223,8 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin ; FIXME include(`svml.m4') -svml_stubs(float,16,f) -svml_stubs(double,16,d) +svml_stubs(float,f,WIDTH) +svml_stubs(double,d,WIDTH) ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;; ; horizontal ops / reductions diff --git a/llvmutil.cpp b/llvmutil.cpp index 64691498..275cf794 100644 --- a/llvmutil.cpp +++ b/llvmutil.cpp @@ -189,7 +189,7 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target& target) { break; case 64: onMask = llvm::ConstantInt::get(llvm::Type::getInt64Ty(*ctx), -1, - true /*signed*/); // 0xffffffff + true /*signed*/); // 0xffffffffffffffffull break; default: FATAL("Unhandled mask width for onMask"); diff --git a/run_tests.py b/run_tests.py index 9729930f..180205a0 100755 --- a/run_tests.py +++ b/run_tests.py @@ -75,7 +75,7 @@ if not os.path.exists(ispc_exe): sys.stderr.write("Fatal error: missing ispc compiler: %s\n" % ispc_exe) sys.exit() -ispc_exe += " " + options.ispc_flags +ispc_exe += " -g " + options.ispc_flags if __name__ == '__main__': sys.stdout.write("ispc compiler: %s\n" % ispc_exe)