included suggested changes, ./tests/launch-*.ispc still fails. something is mask64 related, not sure what. help...
This commit is contained in:
3
.gitignore
vendored
3
.gitignore
vendored
@@ -12,8 +12,5 @@ examples/*/*.png
|
|||||||
examples/*/*.ppm
|
examples/*/*.ppm
|
||||||
examples/*/objs/*
|
examples/*/objs/*
|
||||||
*.swp
|
*.swp
|
||||||
.*
|
|
||||||
!.gitignore
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
124
builtins/svml.m4
124
builtins/svml.m4
@@ -1,20 +1,61 @@
|
|||||||
;; svml
|
;; copyright stub :)
|
||||||
|
;; Copyright (c) 2013, Intel Corporation
|
||||||
|
;; All rights reserved.
|
||||||
|
;;
|
||||||
|
;; Redistribution and use in source and binary forms, with or without
|
||||||
|
;; modification, are permitted provided that the following conditions are
|
||||||
|
;; met:
|
||||||
|
;;
|
||||||
|
;; * Redistributions of source code must retain the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer.
|
||||||
|
;;
|
||||||
|
;; * Redistributions in binary form must reproduce the above copyright
|
||||||
|
;; notice, this list of conditions and the following disclaimer in the
|
||||||
|
;; documentation and/or other materials provided with the distribution.
|
||||||
|
;;
|
||||||
|
;; * Neither the name of Intel Corporation nor the names of its
|
||||||
|
;; contributors may be used to endorse or promote products derived from
|
||||||
|
;; this software without specific prior written permission.
|
||||||
|
;;
|
||||||
|
;;
|
||||||
|
;; THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS
|
||||||
|
;; IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
|
||||||
|
;; TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A
|
||||||
|
;; PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER
|
||||||
|
;; OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
|
||||||
|
;; EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
|
||||||
|
;; PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
|
||||||
|
;; PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF
|
||||||
|
;; LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING
|
||||||
|
;; NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS
|
||||||
|
;; SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
|
||||||
;; stubs
|
|
||||||
|
;; svml macro
|
||||||
|
|
||||||
|
;; svml_stubs : stubs for svml calls
|
||||||
|
;; $1 - type ("float" or "double")
|
||||||
|
;; $2 - svml internal function suffix ("f" for float, "d" for double)
|
||||||
|
;; $3 - vector width
|
||||||
define(`svml_stubs',`
|
define(`svml_stubs',`
|
||||||
declare <$2 x $1> @__svml_sin$3(<$2 x $1>) nounwind readnone alwaysinline
|
declare <$3 x $1> @__svml_sin$2(<$3 x $1>) nounwind readnone alwaysinline
|
||||||
declare <$2 x $1> @__svml_asin$3(<$2 x $1>) nounwind readnone alwaysinline
|
declare <$3 x $1> @__svml_asin$2(<$3 x $1>) nounwind readnone alwaysinline
|
||||||
declare <$2 x $1> @__svml_cos$3(<$2 x $1>) nounwind readnone alwaysinline
|
declare <$3 x $1> @__svml_cos$2(<$3 x $1>) nounwind readnone alwaysinline
|
||||||
declare void @__svml_sincos$3(<$2 x $1>, <$2 x $1> *, <$2 x $1> *) nounwind readnone alwaysinline
|
declare void @__svml_sincos$2(<$3 x $1>, <$3 x $1> *, <$3 x $1> *) nounwind readnone alwaysinline
|
||||||
declare <$2 x $1> @__svml_tan$3(<$2 x $1>) nounwind readnone alwaysinline
|
declare <$3 x $1> @__svml_tan$2(<$3 x $1>) nounwind readnone alwaysinline
|
||||||
declare <$2 x $1> @__svml_atan$3(<$2 x $1>) nounwind readnone alwaysinline
|
declare <$3 x $1> @__svml_atan$2(<$3 x $1>) nounwind readnone alwaysinline
|
||||||
declare <$2 x $1> @__svml_atan2$3(<$2 x $1>, <$2 x $1>) nounwind readnone alwaysinline
|
declare <$3 x $1> @__svml_atan2$2(<$3 x $1>, <$3 x $1>) nounwind readnone alwaysinline
|
||||||
declare <$2 x $1> @__svml_exp$3(<$2 x $1>) nounwind readnone alwaysinline
|
declare <$3 x $1> @__svml_exp$2(<$3 x $1>) nounwind readnone alwaysinline
|
||||||
declare <$2 x $1> @__svml_log$3(<$2 x $1>) nounwind readnone alwaysinline
|
declare <$3 x $1> @__svml_log$2(<$3 x $1>) nounwind readnone alwaysinline
|
||||||
declare <$2 x $1> @__svml_pow$3(<$2 x $1>, <$2 x $1>) nounwind readnone alwaysinline
|
declare <$3 x $1> @__svml_pow$2(<$3 x $1>, <$3 x $1>) nounwind readnone alwaysinline
|
||||||
')
|
')
|
||||||
|
|
||||||
;; decalre __svml calls
|
;; svml_declare : declaration of __svml_* intrinsics
|
||||||
|
;; $1 - type ("float" or "double")
|
||||||
|
;; $2 - __svml_* intrinsic function suffix
|
||||||
|
;; float: "f4"(sse) "f8"(avx) "f16"(avx512)
|
||||||
|
;; double: "2"(sse) "4"(avx) "8"(avx512)
|
||||||
|
;; $3 - vector width
|
||||||
define(`svml_declare',`
|
define(`svml_declare',`
|
||||||
declare <$3 x $1> @__svml_sin$2(<$3 x $1>) nounwind readnone
|
declare <$3 x $1> @__svml_sin$2(<$3 x $1>) nounwind readnone
|
||||||
declare <$3 x $1> @__svml_asin$2(<$3 x $1>) nounwind readnone
|
declare <$3 x $1> @__svml_asin$2(<$3 x $1>) nounwind readnone
|
||||||
@@ -28,7 +69,13 @@ define(`svml_declare',`
|
|||||||
declare <$3 x $1> @__svml_pow$2(<$3 x $1>, <$3 x $1>) nounwind readnone
|
declare <$3 x $1> @__svml_pow$2(<$3 x $1>, <$3 x $1>) nounwind readnone
|
||||||
');
|
');
|
||||||
|
|
||||||
;; define native __svml calls
|
;; defintition of __svml_* internal functions
|
||||||
|
;; $1 - type ("float" or "double")
|
||||||
|
;; $2 - __svml_* intrinsic function suffix
|
||||||
|
;; float: "f4"(sse) "f8"(avx) "f16"(avx512)
|
||||||
|
;; double: "2"(sse) "4"(avx) "8"(avx512)
|
||||||
|
;; $3 - vector width
|
||||||
|
;; $4 - svml internal function suffix ("f" for float, "d" for double)
|
||||||
define(`svml_define',`
|
define(`svml_define',`
|
||||||
define <$3 x $1> @__svml_sin$4(<$3 x $1>) nounwind readnone alwaysinline {
|
define <$3 x $1> @__svml_sin$4(<$3 x $1>) nounwind readnone alwaysinline {
|
||||||
%ret = call <$3 x $1> @__svml_sin$2(<$3 x $1> %0)
|
%ret = call <$3 x $1> @__svml_sin$2(<$3 x $1> %0)
|
||||||
@@ -82,7 +129,45 @@ define(`svml_define',`
|
|||||||
')
|
')
|
||||||
|
|
||||||
|
|
||||||
;; define x2 __svml calls
|
;; svml_define_x : defintition of __svml_* internal functions operation on extended width
|
||||||
|
;; $1 - type ("float" or "double")
|
||||||
|
;; $2 - __svml_* intrinsic function suffix
|
||||||
|
;; float: "f4"(sse) "f8"(avx) "f16"(avx512)
|
||||||
|
;; double: "2"(sse) "4"(avx) "8"(avx512)
|
||||||
|
;; $3 - vector width
|
||||||
|
;; $4 - svml internal function suffix ("f" for float, "d" for double)
|
||||||
|
;; $5 - extended width, must be at least twice the native vector width
|
||||||
|
;; contigent on existing of unary$3to$5 and binary$3to$5 macros
|
||||||
|
|
||||||
|
;; *todo*: in sincos call use __svml_sincos[f][2,4,8,16] call, e.g.
|
||||||
|
;;define void @__svml_sincosf(<8 x float>, <8 x float> *,
|
||||||
|
;; <8 x float> *) nounwind readnone alwaysinline {
|
||||||
|
;; ; call svml_sincosf4 two times with the two 4-wide sub-vectors
|
||||||
|
;; %a = shufflevector <8 x float> %0, <8 x float> undef,
|
||||||
|
;; <4 x i32> <i32 0, i32 1, i32 2, i32 3>
|
||||||
|
;; %b = shufflevector <8 x float> %0, <8 x float> undef,
|
||||||
|
;; <4 x i32> <i32 4, i32 5, i32 6, i32 7>
|
||||||
|
;;
|
||||||
|
;; %cospa = alloca <4 x float>
|
||||||
|
;; %sa = call <4 x float> @__svml_sincosf4(<4 x float> * %cospa, <4 x float> %a)
|
||||||
|
;;
|
||||||
|
;; %cospb = alloca <4 x float>
|
||||||
|
;; %sb = call <4 x float> @__svml_sincosf4(<4 x float> * %cospb, <4 x float> %b)
|
||||||
|
;;
|
||||||
|
;; %sin = shufflevector <4 x float> %sa, <4 x float> %sb,
|
||||||
|
;; <8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||||
|
;; i32 4, i32 5, i32 6, i32 7>
|
||||||
|
;; store <8 x float> %sin, <8 x float> * %1
|
||||||
|
;;
|
||||||
|
;; %cosa = load <4 x float> * %cospa
|
||||||
|
;; %cosb = load <4 x float> * %cospb
|
||||||
|
;; %cos = shufflevector <4 x float> %cosa, <4 x float> %cosb,
|
||||||
|
;; <8 x i32> <i32 0, i32 1, i32 2, i32 3,
|
||||||
|
;; i32 4, i32 5, i32 6, i32 7>
|
||||||
|
;; store <8 x float> %cos, <8 x float> * %2
|
||||||
|
;;
|
||||||
|
;; ret void
|
||||||
|
;;}
|
||||||
define(`svml_define_x',`
|
define(`svml_define_x',`
|
||||||
define <$5 x $1> @__svml_sin$4(<$5 x $1>) nounwind readnone alwaysinline {
|
define <$5 x $1> @__svml_sin$4(<$5 x $1>) nounwind readnone alwaysinline {
|
||||||
unary$3to$5(ret, $1, @__svml_sin$2, %0)
|
unary$3to$5(ret, $1, @__svml_sin$2, %0)
|
||||||
@@ -96,7 +181,14 @@ define(`svml_define_x',`
|
|||||||
unary$3to$5(ret, $1, @__svml_cos$2, %0)
|
unary$3to$5(ret, $1, @__svml_cos$2, %0)
|
||||||
ret <$5 x $1> %ret
|
ret <$5 x $1> %ret
|
||||||
}
|
}
|
||||||
declare void @__svml_sincos$4(<$5 x $1>,<$5 x $1>*,<$5 x $1>*) nounwind readnone alwaysinline
|
define void @__svml_sincos$4(<$5 x $1>,<$5 x $1>*,<$5 x $1>*) nounwind readnone alwaysinline
|
||||||
|
{
|
||||||
|
%s = call <$5 x $1> @__svml_sin$4(<$5 x $1> %0)
|
||||||
|
%c = call <$5 x $1> @__svml_cos$4(<$5 x $1> %0)
|
||||||
|
store <$5 x $1> %s, <$5 x $1> * %1
|
||||||
|
store <$5 x $1> %c, <$5 x $1> * %2
|
||||||
|
ret void
|
||||||
|
}
|
||||||
define <$5 x $1> @__svml_tan$4(<$5 x $1>) nounwind readnone alwaysinline {
|
define <$5 x $1> @__svml_tan$4(<$5 x $1>) nounwind readnone alwaysinline {
|
||||||
unary$3to$5(ret, $1, @__svml_tan$2, %0)
|
unary$3to$5(ret, $1, @__svml_tan$2, %0)
|
||||||
ret <$5 x $1> %ret
|
ret <$5 x $1> %ret
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
;; Copyright (c) 2010-2011, Intel Corporation
|
;; Copyright (c) 2013, Intel Corporation
|
||||||
;; All rights reserved.
|
;; All rights reserved.
|
||||||
;;
|
;;
|
||||||
;; Redistribution and use in source and binary forms, with or without
|
;; Redistribution and use in source and binary forms, with or without
|
||||||
|
|||||||
@@ -1,4 +1,4 @@
|
|||||||
;; Copyright (c) 2010-2012, Intel Corporation
|
;; Copyright (c) 2013, Intel Corporation
|
||||||
;; All rights reserved.
|
;; All rights reserved.
|
||||||
;;
|
;;
|
||||||
;; Redistribution and use in source and binary forms, with or without
|
;; Redistribution and use in source and binary forms, with or without
|
||||||
|
|||||||
@@ -209,8 +209,8 @@ declare i64 @__count_leading_zeros_i64(i64) nounwind readnone
|
|||||||
;; svml
|
;; svml
|
||||||
|
|
||||||
include(`svml.m4')
|
include(`svml.m4')
|
||||||
svml_stubs(float, WIDTH, f)
|
svml_stubs(float,f,WIDTH)
|
||||||
svml_stubs(double, WIDTH, d)
|
svml_stubs(double,d,WIDTH)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; reductions
|
;; reductions
|
||||||
|
|||||||
@@ -318,8 +318,8 @@ define void @__masked_store_blend_i64(<WIDTH x i64>* nocapture %ptr,
|
|||||||
|
|
||||||
|
|
||||||
include(`svml.m4')
|
include(`svml.m4')
|
||||||
svmlf_stubs(WIDTH)
|
svml_stubs(float,f,WIDTH)
|
||||||
svmld_stubs(WIDTH)
|
svml_stubs(double,d,WIDTH)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
;; gather
|
;; gather
|
||||||
|
|||||||
@@ -210,8 +210,8 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r
|
|||||||
|
|
||||||
; FIXME
|
; FIXME
|
||||||
include(`svml.m4')
|
include(`svml.m4')
|
||||||
svml_stubs(float,8,f)
|
svml_stubs(float,f,WIDTH)
|
||||||
svml_stubs(double,8,d)
|
svml_stubs(double,d,WIDTH)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
; horizontal ops / reductions
|
; horizontal ops / reductions
|
||||||
|
|||||||
@@ -223,8 +223,8 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin
|
|||||||
; FIXME
|
; FIXME
|
||||||
|
|
||||||
include(`svml.m4')
|
include(`svml.m4')
|
||||||
svml_stubs(float,16,f)
|
svml_stubs(float,f,WIDTH)
|
||||||
svml_stubs(double,16,d)
|
svml_stubs(double,d,WIDTH)
|
||||||
|
|
||||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||||
; horizontal ops / reductions
|
; horizontal ops / reductions
|
||||||
|
|||||||
@@ -189,7 +189,7 @@ InitLLVMUtil(llvm::LLVMContext *ctx, Target& target) {
|
|||||||
break;
|
break;
|
||||||
case 64:
|
case 64:
|
||||||
onMask = llvm::ConstantInt::get(llvm::Type::getInt64Ty(*ctx), -1,
|
onMask = llvm::ConstantInt::get(llvm::Type::getInt64Ty(*ctx), -1,
|
||||||
true /*signed*/); // 0xffffffff
|
true /*signed*/); // 0xffffffffffffffffull
|
||||||
break;
|
break;
|
||||||
default:
|
default:
|
||||||
FATAL("Unhandled mask width for onMask");
|
FATAL("Unhandled mask width for onMask");
|
||||||
|
|||||||
@@ -75,7 +75,7 @@ if not os.path.exists(ispc_exe):
|
|||||||
sys.stderr.write("Fatal error: missing ispc compiler: %s\n" % ispc_exe)
|
sys.stderr.write("Fatal error: missing ispc compiler: %s\n" % ispc_exe)
|
||||||
sys.exit()
|
sys.exit()
|
||||||
|
|
||||||
ispc_exe += " " + options.ispc_flags
|
ispc_exe += " -g " + options.ispc_flags
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
sys.stdout.write("ispc compiler: %s\n" % ispc_exe)
|
sys.stdout.write("ispc compiler: %s\n" % ispc_exe)
|
||||||
|
|||||||
Reference in New Issue
Block a user