added svml support. experimental. for some reason all sybmols are visible..
This commit is contained in:
4
.gitignore
vendored
4
.gitignore
vendored
@@ -11,5 +11,9 @@ tests*/*run
|
||||
examples/*/*.png
|
||||
examples/*/*.ppm
|
||||
examples/*/objs/*
|
||||
*.swp
|
||||
.*
|
||||
!.gitignore
|
||||
|
||||
|
||||
|
||||
|
||||
6
Makefile
6
Makefile
@@ -246,15 +246,15 @@ objs/lex.o: objs/lex.cpp $(HEADERS) objs/parse.cc
|
||||
@echo Compiling $<
|
||||
@$(CXX) $(CXXFLAGS) -o $@ -c $<
|
||||
|
||||
objs/builtins-dispatch.cpp: builtins/dispatch.ll builtins/util.m4 $(wildcard builtins/*common.ll)
|
||||
objs/builtins-dispatch.cpp: builtins/dispatch.ll builtins/util.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
||||
@echo Creating C++ source from builtins definition file $<
|
||||
@m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX $< | python bitcode2cpp.py $< > $@
|
||||
|
||||
objs/builtins-%-32bit.cpp: builtins/%.ll builtins/util.m4 $(wildcard builtins/*common.ll)
|
||||
objs/builtins-%-32bit.cpp: builtins/%.ll builtins/util.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
||||
@echo Creating C++ source from builtins definition file $< \(32 bit version\)
|
||||
@m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX -DRUNTIME=32 $< | python bitcode2cpp.py $< 32bit > $@
|
||||
|
||||
objs/builtins-%-64bit.cpp: builtins/%.ll builtins/util.m4 $(wildcard builtins/*common.ll)
|
||||
objs/builtins-%-64bit.cpp: builtins/%.ll builtins/util.m4 builtins/svml.m4 $(wildcard builtins/*common.ll)
|
||||
@echo Creating C++ source from builtins definition file $< \(64 bit version\)
|
||||
@m4 -Ibuiltins/ -DLLVM_VERSION=$(LLVM_VERSION) -DBUILD_OS=UNIX -DRUNTIME=64 $< | python bitcode2cpp.py $< 64bit > $@
|
||||
|
||||
|
||||
13
builtins.cpp
13
builtins.cpp
@@ -582,7 +582,9 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__stdlib_tan",
|
||||
"__stdlib_tanf",
|
||||
"__svml_sin",
|
||||
"__svml_asin",
|
||||
"__svml_cos",
|
||||
"__svml_acos",
|
||||
"__svml_sincos",
|
||||
"__svml_tan",
|
||||
"__svml_atan",
|
||||
@@ -590,6 +592,17 @@ lSetInternalFunctions(llvm::Module *module) {
|
||||
"__svml_exp",
|
||||
"__svml_log",
|
||||
"__svml_pow",
|
||||
"__svml_sinf",
|
||||
"__svml_asinf",
|
||||
"__svml_cosf",
|
||||
"__svml_acosf",
|
||||
"__svml_sincosf",
|
||||
"__svml_tanf",
|
||||
"__svml_atanf",
|
||||
"__svml_atan2f",
|
||||
"__svml_expf",
|
||||
"__svml_logf",
|
||||
"__svml_powf",
|
||||
"__undef_uniform",
|
||||
"__undef_varying",
|
||||
"__vec4_add_float",
|
||||
|
||||
@@ -154,28 +154,11 @@ define <4 x double> @__sqrt_varying_double(<4 x double>) nounwind alwaysinline {
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; svml
|
||||
|
||||
; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
|
||||
; or, use the macro to call the 4-wide ones twice with our 8-wide
|
||||
; vectors...
|
||||
|
||||
;;declare <4 x double> @__svml_sin4(<4 x double>)
|
||||
;;declare <4 x double> @__svml_cos4(<4 x double>)
|
||||
;;declare void @__svml_sincos4(<4 x double>, <4 x double> *, <4 x double> *)
|
||||
;;declare <4 x double> @__svml_tan4(<4 x double>)
|
||||
;;declare <4 x double> @__svml_atan4(<4 x double>)
|
||||
;;declare <4 x double> @__svml_atan24(<4 x double>, <4 x double>)
|
||||
;;declare <4 x double> @__svml_exp4(<4 x double>)
|
||||
;;declare <4 x double> @__svml_log4(<4 x double>)
|
||||
;;declare <4 x double> @__svml_pow4(<4 x double>, <4 x double>)
|
||||
declare <4 x float> @__svml_sin(<4 x float>)
|
||||
declare <4 x float> @__svml_cos(<4 x float>)
|
||||
declare void @__svml_sincos(<4 x float>, <4 x float> *, <4 x float> *)
|
||||
declare <4 x float> @__svml_tan(<4 x float>)
|
||||
declare <4 x float> @__svml_atan(<4 x float>)
|
||||
declare <4 x float> @__svml_atan2(<4 x float>, <4 x float>)
|
||||
declare <4 x float> @__svml_exp(<4 x float>)
|
||||
declare <4 x float> @__svml_log(<4 x float>)
|
||||
declare <4 x float> @__svml_pow(<4 x float>, <4 x float>)
|
||||
include(`svml.m4')
|
||||
svmlf_declare(4)
|
||||
svmlf_define(4)
|
||||
svmld_declare(4)
|
||||
svmld_define(4)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float min/max
|
||||
|
||||
@@ -137,19 +137,9 @@ define <16 x float> @__sqrt_varying_float(<16 x float>) nounwind readonly always
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; svml
|
||||
|
||||
; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
|
||||
; or, use the macro to call the 4-wide ones 4x with our 16-wide
|
||||
; vectors...
|
||||
|
||||
declare <16 x float> @__svml_sin(<16 x float>)
|
||||
declare <16 x float> @__svml_cos(<16 x float>)
|
||||
declare void @__svml_sincos(<16 x float>, <16 x float> *, <16 x float> *)
|
||||
declare <16 x float> @__svml_tan(<16 x float>)
|
||||
declare <16 x float> @__svml_atan(<16 x float>)
|
||||
declare <16 x float> @__svml_atan2(<16 x float>, <16 x float>)
|
||||
declare <16 x float> @__svml_exp(<16 x float>)
|
||||
declare <16 x float> @__svml_log(<16 x float>)
|
||||
declare <16 x float> @__svml_pow(<16 x float>, <16 x float>)
|
||||
include(`svml.m4')
|
||||
svmlf_stubs(16)
|
||||
svmld_stubs(16)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float min/max
|
||||
|
||||
@@ -137,19 +137,11 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; svml
|
||||
|
||||
; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
|
||||
; or, use the macro to call the 4-wide ones twice with our 8-wide
|
||||
; vectors...
|
||||
|
||||
declare <8 x float> @__svml_sin(<8 x float>)
|
||||
declare <8 x float> @__svml_cos(<8 x float>)
|
||||
declare void @__svml_sincos(<8 x float>, <8 x float> *, <8 x float> *)
|
||||
declare <8 x float> @__svml_tan(<8 x float>)
|
||||
declare <8 x float> @__svml_atan(<8 x float>)
|
||||
declare <8 x float> @__svml_atan2(<8 x float>, <8 x float>)
|
||||
declare <8 x float> @__svml_exp(<8 x float>)
|
||||
declare <8 x float> @__svml_log(<8 x float>)
|
||||
declare <8 x float> @__svml_pow(<8 x float>, <8 x float>)
|
||||
include(`svml.m4')
|
||||
svmlf_declare(8)
|
||||
svmlf_define(8)
|
||||
svmld_declare(4)
|
||||
svmld_stubs(8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float min/max
|
||||
|
||||
@@ -310,6 +310,7 @@ declare double @round (double) nounwind readnone
|
||||
;declare float @llvm.sqrt.f32(float %Val)
|
||||
declare double @llvm.sqrt.f64(double %Val)
|
||||
declare float @llvm.sin.f32(float %Val)
|
||||
declare float @llvm.asin.f32(float %Val)
|
||||
declare float @llvm.cos.f32(float %Val)
|
||||
declare float @llvm.sqrt.f32(float %Val)
|
||||
declare float @llvm.exp.f32(float %Val)
|
||||
@@ -651,7 +652,18 @@ define <1 x float> @__rsqrt_varying_float(<1 x float> %v) nounwind readonly alw
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; svml stuff
|
||||
|
||||
define <1 x float> @__svml_sin(<1 x float>) nounwind readnone alwaysinline {
|
||||
declare <1 x float> @__svml_sind(<1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_asind(<1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_cosd(<1 x float>) nounwind readnone alwaysinline
|
||||
declare void @__svml_sincosd(<1 x float>, <1 x float> *, <1 x float> *) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_tand(<1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_atand(<1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_atan2d(<1 x float>, <1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_expd(<1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_logd(<1 x float>) nounwind readnone alwaysinline
|
||||
declare <1 x float> @__svml_powd(<1 x float>, <1 x float>) nounwind readnone alwaysinline
|
||||
|
||||
define <1 x float> @__svml_sinf(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_sinf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
@@ -662,7 +674,18 @@ define <1 x float> @__svml_sin(<1 x float>) nounwind readnone alwaysinline {
|
||||
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_cos(<1 x float>) nounwind readnone alwaysinline {
|
||||
define <1 x float> @__svml_asinf(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_asinf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
;%s = call float @llvm.asin.f32(float %r)
|
||||
;%rv = insertelement <1 x float> undef, float %r, i32 0
|
||||
;ret <1 x float> %rv
|
||||
unary1to1(float,@llvm.asin.f32)
|
||||
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_cosf(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_cosf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
@@ -673,18 +696,18 @@ define <1 x float> @__svml_cos(<1 x float>) nounwind readnone alwaysinline {
|
||||
|
||||
}
|
||||
|
||||
define void @__svml_sincos(<1 x float>, <1 x float> *, <1 x float> *) nounwind readnone alwaysinline {
|
||||
define void @__svml_sincosf(<1 x float>, <1 x float> *, <1 x float> *) nounwind readnone alwaysinline {
|
||||
; %s = call <1 x float> @__svml_sincosf4(<1 x float> * %2, <1 x float> %0)
|
||||
; store <1 x float> %s, <1 x float> * %1
|
||||
; ret void
|
||||
%sin = call <1 x float> @__svml_sin (<1 x float> %0)
|
||||
%cos = call <1 x float> @__svml_cos (<1 x float> %0)
|
||||
%sin = call <1 x float> @__svml_sinf(<1 x float> %0)
|
||||
%cos = call <1 x float> @__svml_cosf(<1 x float> %0)
|
||||
store <1 x float> %sin, <1 x float> * %1
|
||||
store <1 x float> %cos, <1 x float> * %2
|
||||
ret void
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_tan(<1 x float>) nounwind readnone alwaysinline {
|
||||
define <1 x float> @__svml_tanf(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_tanf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
@@ -696,7 +719,7 @@ define <1 x float> @__svml_tan(<1 x float>) nounwind readnone alwaysinline {
|
||||
ret <1 x float > %0
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_atan(<1 x float>) nounwind readnone alwaysinline {
|
||||
define <1 x float> @__svml_atanf(<1 x float>) nounwind readnone alwaysinline {
|
||||
; %ret = call <1 x float> @__svml_atanf4(<1 x float> %0)
|
||||
; ret <1 x float> %ret
|
||||
;%r = extractelement <1 x float> %0, i32 0
|
||||
@@ -709,7 +732,7 @@ define <1 x float> @__svml_atan(<1 x float>) nounwind readnone alwaysinline {
|
||||
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_atan2(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
|
||||
define <1 x float> @__svml_atan2f(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_atan2f4(<1 x float> %0, <1 x float> %1)
|
||||
;ret <1 x float> %ret
|
||||
;%y = extractelement <1 x float> %0, i32 0
|
||||
@@ -722,19 +745,19 @@ define <1 x float> @__svml_atan2(<1 x float>, <1 x float>) nounwind readnone al
|
||||
ret <1 x float > %0
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_exp(<1 x float>) nounwind readnone alwaysinline {
|
||||
define <1 x float> @__svml_expf(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_expf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
unary1to1(float, @llvm.exp.f32)
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_log(<1 x float>) nounwind readnone alwaysinline {
|
||||
define <1 x float> @__svml_logf(<1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_logf4(<1 x float> %0)
|
||||
;ret <1 x float> %ret
|
||||
unary1to1(float, @llvm.log.f32)
|
||||
}
|
||||
|
||||
define <1 x float> @__svml_pow(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
|
||||
define <1 x float> @__svml_powf(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
|
||||
;%ret = call <1 x float> @__svml_powf4(<1 x float> %0, <1 x float> %1)
|
||||
;ret <1 x float> %ret
|
||||
%r = extractelement <1 x float> %0, i32 0
|
||||
|
||||
@@ -202,21 +202,15 @@ declare i64 @__count_trailing_zeros_i64(i64) nounwind readnone
|
||||
declare i32 @__count_leading_zeros_i32(i32) nounwind readnone
|
||||
declare i64 @__count_leading_zeros_i64(i64) nounwind readnone
|
||||
|
||||
;; svml
|
||||
|
||||
; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
|
||||
; or, use the macro to call the 4-wide ones twice with our 8-wide
|
||||
; vectors...
|
||||
|
||||
declare <WIDTH x float> @__svml_sin(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_cos(<WIDTH x float>)
|
||||
declare void @__svml_sincos(<WIDTH x float>, <WIDTH x float> *, <WIDTH x float> *)
|
||||
declare <WIDTH x float> @__svml_tan(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_atan(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_atan2(<WIDTH x float>, <WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_exp(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_log(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_pow(<WIDTH x float>, <WIDTH x float>)
|
||||
;; svml
|
||||
|
||||
include(`svml.m4')
|
||||
svmlf_stubs(WIDTH)
|
||||
svmld_stubs(WIDTH)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; reductions
|
||||
|
||||
@@ -316,15 +316,10 @@ define void @__masked_store_blend_i64(<WIDTH x i64>* nocapture %ptr,
|
||||
;; yuck. We need declarations of these, even though we shouldnt ever
|
||||
;; actually generate calls to them for the NEON target...
|
||||
|
||||
declare <WIDTH x float> @__svml_sin(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_cos(<WIDTH x float>)
|
||||
declare void @__svml_sincos(<WIDTH x float>, <WIDTH x float> *, <WIDTH x float> *)
|
||||
declare <WIDTH x float> @__svml_tan(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_atan(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_atan2(<WIDTH x float>, <WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_exp(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_log(<WIDTH x float>)
|
||||
declare <WIDTH x float> @__svml_pow(<WIDTH x float>, <WIDTH x float>)
|
||||
|
||||
include(`svml.m4')
|
||||
svmlf_stubs(WIDTH)
|
||||
svmld_stubs(WIDTH)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; gather
|
||||
|
||||
@@ -105,28 +105,28 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; svml stuff
|
||||
|
||||
declare <4 x float> @__svml_sinf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_cosf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_sincosf4(<4 x float> *, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_tanf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_atanf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_atan2f4(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_expf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
||||
include(`svml.m4')
|
||||
svmlf_declare(4)
|
||||
svmld_declare(2)
|
||||
svmld_stubs(8)
|
||||
|
||||
|
||||
define <8 x float> @__svml_sin(<8 x float>) nounwind readnone alwaysinline {
|
||||
define <8 x float> @__svml_sinf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_sinf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_cos(<8 x float>) nounwind readnone alwaysinline {
|
||||
define <8 x float> @__svml_asinf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_asinf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_cosf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_cosf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define void @__svml_sincos(<8 x float>, <8 x float> *,
|
||||
define void @__svml_sincosf(<8 x float>, <8 x float> *,
|
||||
<8 x float> *) nounwind readnone alwaysinline {
|
||||
; call svml_sincosf4 two times with the two 4-wide sub-vectors
|
||||
%a = shufflevector <8 x float> %0, <8 x float> undef,
|
||||
@@ -155,33 +155,33 @@ define void @__svml_sincos(<8 x float>, <8 x float> *,
|
||||
ret void
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_tan(<8 x float>) nounwind readnone alwaysinline {
|
||||
define <8 x float> @__svml_tanf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_tanf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_atan(<8 x float>) nounwind readnone alwaysinline {
|
||||
define <8 x float> @__svml_atanf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_atanf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_atan2(<8 x float>,
|
||||
define <8 x float> @__svml_atan2f(<8 x float>,
|
||||
<8 x float>) nounwind readnone alwaysinline {
|
||||
binary4to8(ret, float, @__svml_atan2f4, %0, %1)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_exp(<8 x float>) nounwind readnone alwaysinline {
|
||||
define <8 x float> @__svml_expf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_expf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_log(<8 x float>) nounwind readnone alwaysinline {
|
||||
define <8 x float> @__svml_logf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_logf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_pow(<8 x float>,
|
||||
define <8 x float> @__svml_powf(<8 x float>,
|
||||
<8 x float>) nounwind readnone alwaysinline {
|
||||
binary4to8(ret, float, @__svml_powf4, %0, %1)
|
||||
ret <8 x float> %ret
|
||||
|
||||
@@ -496,62 +496,11 @@ define <4 x float> @__sqrt_varying_float(<4 x float>) nounwind readonly alwaysin
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; svml stuff
|
||||
|
||||
declare <4 x float> @__svml_sinf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_cosf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_sincosf4(<4 x float> *, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_tanf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_atanf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_atan2f4(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_expf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @__svml_sin(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_sinf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_cos(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_cosf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define void @__svml_sincos(<4 x float>, <4 x float> *, <4 x float> *) nounwind readnone alwaysinline {
|
||||
%s = call <4 x float> @__svml_sincosf4(<4 x float> * %2, <4 x float> %0)
|
||||
store <4 x float> %s, <4 x float> * %1
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_tan(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_tanf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_atan(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_atanf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_atan2(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_atan2f4(<4 x float> %0, <4 x float> %1)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_exp(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_expf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_log(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_logf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_powf4(<4 x float> %0, <4 x float> %1)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
include(`svml.m4')
|
||||
svmlf_declare(4)
|
||||
svmld_declare(2)
|
||||
svmlf_define(4)
|
||||
svmld_stubs(4)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
;; float min/max
|
||||
|
||||
@@ -209,16 +209,9 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r
|
||||
;; svml
|
||||
|
||||
; FIXME
|
||||
|
||||
declare <8 x float> @__svml_sin(<8 x float>)
|
||||
declare <8 x float> @__svml_cos(<8 x float>)
|
||||
declare void @__svml_sincos(<8 x float>, <8 x float> *, <8 x float> *)
|
||||
declare <8 x float> @__svml_tan(<8 x float>)
|
||||
declare <8 x float> @__svml_atan(<8 x float>)
|
||||
declare <8 x float> @__svml_atan2(<8 x float>, <8 x float>)
|
||||
declare <8 x float> @__svml_exp(<8 x float>)
|
||||
declare <8 x float> @__svml_log(<8 x float>)
|
||||
declare <8 x float> @__svml_pow(<8 x float>, <8 x float>)
|
||||
include(`svml.m4')
|
||||
svmlf_stubs(8)
|
||||
svmld_stubs(8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops / reductions
|
||||
|
||||
@@ -222,15 +222,9 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin
|
||||
|
||||
; FIXME
|
||||
|
||||
declare <16 x float> @__svml_sin(<16 x float>)
|
||||
declare <16 x float> @__svml_cos(<16 x float>)
|
||||
declare void @__svml_sincos(<16 x float>, <16 x float> *, <16 x float> *)
|
||||
declare <16 x float> @__svml_tan(<16 x float>)
|
||||
declare <16 x float> @__svml_atan(<16 x float>)
|
||||
declare <16 x float> @__svml_atan2(<16 x float>, <16 x float>)
|
||||
declare <16 x float> @__svml_exp(<16 x float>)
|
||||
declare <16 x float> @__svml_log(<16 x float>)
|
||||
declare <16 x float> @__svml_pow(<16 x float>, <16 x float>)
|
||||
include(`svml.m4')
|
||||
svmlf_stubs(16)
|
||||
svmld_stubs(16)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops / reductions
|
||||
|
||||
@@ -105,28 +105,28 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; svml stuff
|
||||
|
||||
declare <4 x float> @__svml_sinf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_cosf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_sincosf4(<4 x float> *, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_tanf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_atanf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_atan2f4(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_expf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
||||
include(`svml.m4')
|
||||
svmlf_declare(4)
|
||||
svmld_declare(2)
|
||||
svmld_stubs(8)
|
||||
|
||||
|
||||
define <8 x float> @__svml_sin(<8 x float>) nounwind readnone alwaysinline {
|
||||
define <8 x float> @__svml_sinf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_sinf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_cos(<8 x float>) nounwind readnone alwaysinline {
|
||||
define <8 x float> @__svml_asinf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_asinf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_cosf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_cosf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define void @__svml_sincos(<8 x float>, <8 x float> *,
|
||||
define void @__svml_sincosf(<8 x float>, <8 x float> *,
|
||||
<8 x float> *) nounwind readnone alwaysinline {
|
||||
; call svml_sincosf4 two times with the two 4-wide sub-vectors
|
||||
%a = shufflevector <8 x float> %0, <8 x float> undef,
|
||||
@@ -155,33 +155,33 @@ define void @__svml_sincos(<8 x float>, <8 x float> *,
|
||||
ret void
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_tan(<8 x float>) nounwind readnone alwaysinline {
|
||||
define <8 x float> @__svml_tanf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_tanf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_atan(<8 x float>) nounwind readnone alwaysinline {
|
||||
define <8 x float> @__svml_atanf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_atanf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_atan2(<8 x float>,
|
||||
define <8 x float> @__svml_atan2f(<8 x float>,
|
||||
<8 x float>) nounwind readnone alwaysinline {
|
||||
binary4to8(ret, float, @__svml_atan2f4, %0, %1)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_exp(<8 x float>) nounwind readnone alwaysinline {
|
||||
define <8 x float> @__svml_expf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_expf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_log(<8 x float>) nounwind readnone alwaysinline {
|
||||
define <8 x float> @__svml_logf(<8 x float>) nounwind readnone alwaysinline {
|
||||
unary4to8(ret, float, @__svml_logf4, %0)
|
||||
ret <8 x float> %ret
|
||||
}
|
||||
|
||||
define <8 x float> @__svml_pow(<8 x float>,
|
||||
define <8 x float> @__svml_powf(<8 x float>,
|
||||
<8 x float>) nounwind readnone alwaysinline {
|
||||
binary4to8(ret, float, @__svml_powf4, %0, %1)
|
||||
ret <8 x float> %ret
|
||||
|
||||
@@ -209,62 +209,11 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; svml stuff
|
||||
|
||||
declare <4 x float> @__svml_sinf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_cosf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_sincosf4(<4 x float> *, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_tanf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_atanf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_atan2f4(<4 x float>, <4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_expf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
|
||||
declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
|
||||
|
||||
|
||||
define <4 x float> @__svml_sin(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_sinf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_cos(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_cosf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define void @__svml_sincos(<4 x float>, <4 x float> *, <4 x float> *) nounwind readnone alwaysinline {
|
||||
%s = call <4 x float> @__svml_sincosf4(<4 x float> * %2, <4 x float> %0)
|
||||
store <4 x float> %s, <4 x float> * %1
|
||||
ret void
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_tan(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_tanf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_atan(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_atanf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_atan2(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_atan2f4(<4 x float> %0, <4 x float> %1)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_exp(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_expf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_log(<4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_logf4(<4 x float> %0)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
|
||||
define <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
|
||||
%ret = call <4 x float> @__svml_powf4(<4 x float> %0, <4 x float> %1)
|
||||
ret <4 x float> %ret
|
||||
}
|
||||
include(`svml.m4')
|
||||
svmlf_declare(4)
|
||||
svmlf_define(4)
|
||||
svmld_declare(2)
|
||||
svmld_stubs(8)
|
||||
|
||||
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
|
||||
; horizontal ops / reductions
|
||||
|
||||
@@ -3160,6 +3160,7 @@ define float @__stdlib_powf(float, float) nounwind readnone alwaysinline {
|
||||
}
|
||||
|
||||
declare double @sin(double) nounwind readnone
|
||||
declare double @asin(double) nounwind readnone
|
||||
declare double @cos(double) nounwind readnone
|
||||
declare void @sincos(double, double *, double *) nounwind readnone
|
||||
declare double @tan(double) nounwind readnone
|
||||
@@ -3174,6 +3175,11 @@ define double @__stdlib_sin(double) nounwind readnone alwaysinline {
|
||||
ret double %r
|
||||
}
|
||||
|
||||
define double @__stdlib_asin(double) nounwind readnone alwaysinline {
|
||||
%r = call double @asin(double %0)
|
||||
ret double %r
|
||||
}
|
||||
|
||||
define double @__stdlib_cos(double) nounwind readnone alwaysinline {
|
||||
%r = call double @cos(double %0)
|
||||
ret double %r
|
||||
|
||||
102
stdlib.ispc
102
stdlib.ispc
@@ -2180,7 +2180,7 @@ static inline uniform float frexp(uniform float x, uniform int * uniform pw2) {
|
||||
__declspec(safe)
|
||||
static inline float sin(float x_full) {
|
||||
if (__math_lib == __math_lib_svml) {
|
||||
return __svml_sin(x_full);
|
||||
return __svml_sinf(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
float ret;
|
||||
@@ -2313,8 +2313,10 @@ static inline float asin(float x) {
|
||||
bool isnan = (x > 1);
|
||||
|
||||
float v;
|
||||
if (__math_lib == __math_lib_svml ||
|
||||
__math_lib == __math_lib_system) {
|
||||
if (__math_lib == __math_lib_svml) {
|
||||
return __svml_asinf(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
float ret;
|
||||
foreach_active (i) {
|
||||
uniform float r = __stdlib_asinf(extract(x, i));
|
||||
@@ -2417,7 +2419,7 @@ static inline uniform float asin(uniform float x) {
|
||||
__declspec(safe)
|
||||
static inline float cos(float x_full) {
|
||||
if (__math_lib == __math_lib_svml) {
|
||||
return __svml_cos(x_full);
|
||||
return __svml_cosf(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
float ret;
|
||||
@@ -2545,18 +2547,28 @@ static inline float acos(float v) {
|
||||
return 1.57079637050628662109375 - asin(v);
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
static inline double acos(const double v) {
|
||||
return 1.57079637050628662109375 - asin(v);
|
||||
}
|
||||
|
||||
|
||||
__declspec(safe)
|
||||
static inline uniform float acos(uniform float v) {
|
||||
return 1.57079637050628662109375 - asin(v);
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
static inline uniform double acos(const uniform double v) {
|
||||
return 1.57079637050628662109375 - asin(v);
|
||||
}
|
||||
|
||||
|
||||
__declspec(safe)
|
||||
static inline void sincos(float x_full, varying float * uniform sin_result,
|
||||
varying float * uniform cos_result) {
|
||||
if (__math_lib == __math_lib_svml) {
|
||||
__svml_sincos(x_full, sin_result, cos_result);
|
||||
__svml_sincosf(x_full, sin_result, cos_result);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
foreach_active (i) {
|
||||
@@ -2688,7 +2700,7 @@ static inline void sincos(uniform float x_full, uniform float * uniform sin_resu
|
||||
__declspec(safe)
|
||||
static inline float tan(float x_full) {
|
||||
if (__math_lib == __math_lib_svml) {
|
||||
return __svml_tan(x_full);
|
||||
return __svml_tanf(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
float ret;
|
||||
@@ -2839,7 +2851,7 @@ static inline uniform float tan(uniform float x_full) {
|
||||
__declspec(safe)
|
||||
static inline float atan(float x_full) {
|
||||
if (__math_lib == __math_lib_svml) {
|
||||
return __svml_atan(x_full);
|
||||
return __svml_atanf(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
float ret;
|
||||
@@ -2934,7 +2946,7 @@ static inline uniform float atan(uniform float x_full) {
|
||||
__declspec(safe)
|
||||
static inline float atan2(float y, float x) {
|
||||
if (__math_lib == __math_lib_svml) {
|
||||
return __svml_atan2(y, x);
|
||||
return __svml_atan2f(y, x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
float ret;
|
||||
@@ -2997,7 +3009,7 @@ static inline float exp(float x_full) {
|
||||
return __exp_varying_float(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml) {
|
||||
return __svml_exp(x_full);
|
||||
return __svml_expf(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
float ret;
|
||||
@@ -3204,7 +3216,7 @@ static inline float log(float x_full) {
|
||||
return __log_varying_float(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml) {
|
||||
return __svml_log(x_full);
|
||||
return __svml_logf(x_full);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
float ret;
|
||||
@@ -3379,7 +3391,7 @@ static inline float pow(float a, float b) {
|
||||
return __pow_varying_float(a, b);
|
||||
}
|
||||
else if (__math_lib == __math_lib_svml) {
|
||||
return __svml_pow(a, b);
|
||||
return __svml_powf(a, b);
|
||||
}
|
||||
else if (__math_lib == __math_lib_system) {
|
||||
float ret;
|
||||
@@ -3469,7 +3481,11 @@ static inline uniform double frexp(uniform double x, uniform int * uniform pw2)
|
||||
|
||||
__declspec(safe)
|
||||
static inline double sin(double x) {
|
||||
if (__math_lib == __math_lib_ispc_fast)
|
||||
if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
return __svml_sind(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc_fast)
|
||||
return sin((float)x);
|
||||
else {
|
||||
double ret;
|
||||
@@ -3490,8 +3506,30 @@ static inline uniform double sin(uniform double x) {
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
static inline double cos(double x) {
|
||||
if (__math_lib == __math_lib_ispc_fast)
|
||||
static inline double asin(const double x) {
|
||||
if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
return __svml_asind(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc_fast)
|
||||
return asin((float)x);
|
||||
else {
|
||||
double ret;
|
||||
foreach_active (i) {
|
||||
uniform double r = __stdlib_asin(extract(x, i));
|
||||
ret = insert(ret, i, r);
|
||||
}
|
||||
return ret;
|
||||
}
|
||||
}
|
||||
|
||||
__declspec(safe)
|
||||
static inline double cos(const double x) {
|
||||
if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
return __svml_cosd(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc_fast)
|
||||
return cos((float)x);
|
||||
else {
|
||||
double ret;
|
||||
@@ -3514,7 +3552,11 @@ static inline uniform double cos(uniform double x) {
|
||||
__declspec(safe)
|
||||
static inline void sincos(double x, varying double * uniform sin_result,
|
||||
varying double * uniform cos_result) {
|
||||
if (__math_lib == __math_lib_ispc_fast) {
|
||||
if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
__svml_sincosd(x, sin_result, cos_result);
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc_fast) {
|
||||
float sr, cr;
|
||||
sincos((float)x, &sr, &cr);
|
||||
*sin_result = sr;
|
||||
@@ -3545,7 +3587,11 @@ static inline void sincos(uniform double x, uniform double * uniform sin_result,
|
||||
|
||||
__declspec(safe)
|
||||
static inline double tan(double x) {
|
||||
if (__math_lib == __math_lib_ispc_fast)
|
||||
if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
return __svml_tand(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc_fast)
|
||||
return tan((float)x);
|
||||
else {
|
||||
double ret;
|
||||
@@ -3589,7 +3635,11 @@ static inline uniform double atan(uniform double x) {
|
||||
|
||||
__declspec(safe)
|
||||
static inline double atan2(double y, double x) {
|
||||
if (__math_lib == __math_lib_ispc_fast)
|
||||
if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
return __svml_atan2d(y,x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc_fast)
|
||||
return atan2((float)y, (float)x);
|
||||
else {
|
||||
double ret;
|
||||
@@ -3611,7 +3661,11 @@ static inline uniform double atan2(uniform double y, uniform double x) {
|
||||
|
||||
__declspec(safe)
|
||||
static inline double exp(double x) {
|
||||
if (__math_lib == __math_lib_ispc_fast)
|
||||
if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
return __svml_expd(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc_fast)
|
||||
return exp((float)x);
|
||||
else {
|
||||
double ret;
|
||||
@@ -3633,7 +3687,11 @@ static inline uniform double exp(uniform double x) {
|
||||
|
||||
__declspec(safe)
|
||||
static inline double log(double x) {
|
||||
if (__math_lib == __math_lib_ispc_fast)
|
||||
if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
return __svml_logd(x);
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc_fast)
|
||||
return log((float)x);
|
||||
else {
|
||||
double ret;
|
||||
@@ -3655,7 +3713,11 @@ static inline uniform double log(uniform double x) {
|
||||
|
||||
__declspec(safe)
|
||||
static inline double pow(double a, double b) {
|
||||
if (__math_lib == __math_lib_ispc_fast)
|
||||
if (__math_lib == __math_lib_svml)
|
||||
{
|
||||
return __svml_powd(a,b);
|
||||
}
|
||||
else if (__math_lib == __math_lib_ispc_fast)
|
||||
return pow((float)a, (float)b);
|
||||
else {
|
||||
double ret;
|
||||
|
||||
Reference in New Issue
Block a user