Remove support for using SVML for math lib routines.

This path was poorly maintained and wasn't actually available on most targets.
2013-07-18 09:24:23 -07:00
parent d3c567503b
commit d9c38b5c1f
16 changed files with 18 additions and 556 deletions
--- a/builtins.cpp
+++ b/builtins.cpp
@@ -579,15 +579,6 @@ lSetInternalFunctions(llvm::Module *module) {
        "__stdlib_sinf",
        "__stdlib_tan",
        "__stdlib_tanf",
-        "__svml_sin",
-        "__svml_cos",
-        "__svml_sincos",
-        "__svml_tan",
-        "__svml_atan",
-        "__svml_atan2",
-        "__svml_exp",
-        "__svml_log",
-        "__svml_pow",
        "__undef_uniform",
        "__undef_varying",
        "__vec4_add_float",
@@ -1054,8 +1045,6 @@ DefineStdlib(SymbolTable *symbolTable, llvm::LLVMContext *ctx, llvm::Module *mod
                       symbolTable);
    lDefineConstantInt("__math_lib_ispc_fast", (int)Globals::Math_ISPCFast,
                       module, symbolTable);
-    lDefineConstantInt("__math_lib_svml", (int)Globals::Math_SVML, module,
-                       symbolTable);
    lDefineConstantInt("__math_lib_system", (int)Globals::Math_System, module,
                       symbolTable);
    lDefineConstantIntFunc("__fast_masked_vload", (int)g->opt.fastMaskedVload,
--- a/builtins/target-avx-x2.ll
+++ b/builtins/target-avx-x2.ll
@@ -134,23 +134,6 @@ define <16 x float> @__sqrt_varying_float(<16 x float>) nounwind readonly always
  ret <16 x float> %call
 }

-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; svml
-
-; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
-; or, use the macro to call the 4-wide ones 4x with our 16-wide
-; vectors...
-
-declare <16 x float> @__svml_sin(<16 x float>)
-declare <16 x float> @__svml_cos(<16 x float>)
-declare void @__svml_sincos(<16 x float>, <16 x float> *, <16 x float> *)
-declare <16 x float> @__svml_tan(<16 x float>)
-declare <16 x float> @__svml_atan(<16 x float>)
-declare <16 x float> @__svml_atan2(<16 x float>, <16 x float>)
-declare <16 x float> @__svml_exp(<16 x float>)
-declare <16 x float> @__svml_log(<16 x float>)
-declare <16 x float> @__svml_pow(<16 x float>, <16 x float>)
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float min/max

--- a/builtins/target-avx.ll
+++ b/builtins/target-avx.ll
@@ -134,23 +134,6 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin
  ret <8 x float> %call
 }

-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; svml
-
-; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
-; or, use the macro to call the 4-wide ones twice with our 8-wide
-; vectors...
-
-declare <8 x float> @__svml_sin(<8 x float>)
-declare <8 x float> @__svml_cos(<8 x float>)
-declare void @__svml_sincos(<8 x float>, <8 x float> *, <8 x float> *)
-declare <8 x float> @__svml_tan(<8 x float>)
-declare <8 x float> @__svml_atan(<8 x float>)
-declare <8 x float> @__svml_atan2(<8 x float>, <8 x float>)
-declare <8 x float> @__svml_exp(<8 x float>)
-declare <8 x float> @__svml_log(<8 x float>)
-declare <8 x float> @__svml_pow(<8 x float>, <8 x float>)
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float min/max

--- a/builtins/target-generic-1.ll
+++ b/builtins/target-generic-1.ll
@@ -647,104 +647,6 @@ define  <1 x float> @__rsqrt_varying_float(<1 x float> %v) nounwind readonly alw
  
 }

-
-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; svml stuff
-
-define  <1 x float> @__svml_sin(<1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_sinf4(<1 x float> %0)
-  ;ret <1 x float> %ret
-  ;%r = extractelement <1 x float> %0, i32 0
-  ;%s = call float @llvm.sin.f32(float %r)
-  ;%rv = insertelement <1 x float> undef, float %r, i32 0
-  ;ret <1 x float> %rv
-  unary1to1(float,@llvm.sin.f32)
-   
-}
-
-define  <1 x float> @__svml_cos(<1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_cosf4(<1 x float> %0)
-  ;ret <1 x float> %ret
-  ;%r = extractelement <1 x float> %0, i32 0
-  ;%s = call float @llvm.cos.f32(float %r)
-  ;%rv = insertelement <1 x float> undef, float %r, i32 0
-  ;ret <1 x float> %rv
-  unary1to1(float, @llvm.cos.f32)
-
-}
-
-define  void @__svml_sincos(<1 x float>, <1 x float> *, <1 x float> *) nounwind readnone alwaysinline {
-;  %s = call <1 x float> @__svml_sincosf4(<1 x float> * %2, <1 x float> %0)
-;  store <1 x float> %s, <1 x float> * %1
-;  ret void
-   %sin = call <1 x float> @__svml_sin (<1 x float> %0)
-   %cos = call <1 x float> @__svml_cos (<1 x float> %0)
-   store <1 x float> %sin, <1 x float> * %1
-   store <1 x float> %cos, <1 x float> * %2
-   ret void
-}
-
-define  <1 x float> @__svml_tan(<1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_tanf4(<1 x float> %0)
-  ;ret <1 x float> %ret
-  ;%r = extractelement <1 x float> %0, i32 0
-  ;%s = call float @llvm_tan_f32(float %r)
-  ;%rv = insertelement <1 x float> undef, float %r, i32 0
-  ;ret <1 x float> %rv
-  ;unasry1to1(float, @llvm.tan.f32)
-  ; UNSUPPORTED!
-  ret <1 x float > %0
-}
-
-define  <1 x float> @__svml_atan(<1 x float>) nounwind readnone alwaysinline {
-;  %ret = call <1 x float> @__svml_atanf4(<1 x float> %0)
-;  ret <1 x float> %ret
-  ;%r = extractelement <1 x float> %0, i32 0
-  ;%s = call float @llvm_atan_f32(float %r)
-  ;%rv = insertelement <1 x float> undef, float %r, i32 0
-  ;ret <1 x float> %rv
-  ;unsary1to1(float,@llvm.atan.f32)
-  ;UNSUPPORTED!
-  ret <1 x float > %0
-
-}
-
-define  <1 x float> @__svml_atan2(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_atan2f4(<1 x float> %0, <1 x float> %1)
-  ;ret <1 x float> %ret
-  ;%y = extractelement <1 x float> %0, i32 0
-  ;%x = extractelement <1 x float> %1, i32 0
-  ;%q = fdiv float %y, %x
-  ;%a = call float @llvm.atan.f32 (float %q)
-  ;%rv = insertelement <1 x float> undef, float %a, i32 0
-  ;ret <1 x float> %rv
-  ; UNSUPPORTED!
-  ret <1 x float > %0
-}
-
-define  <1 x float> @__svml_exp(<1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_expf4(<1 x float> %0)
-  ;ret <1 x float> %ret
-  unary1to1(float, @llvm.exp.f32)
-}
-
-define  <1 x float> @__svml_log(<1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_logf4(<1 x float> %0)
-  ;ret <1 x float> %ret
-  unary1to1(float, @llvm.log.f32)
-}
-
-define  <1 x float> @__svml_pow(<1 x float>, <1 x float>) nounwind readnone alwaysinline {
-  ;%ret = call <1 x float> @__svml_powf4(<1 x float> %0, <1 x float> %1)
-  ;ret <1 x float> %ret
-  %r = extractelement <1 x float> %0, i32 0
-  %e  = extractelement <1 x float> %1, i32 0
-  %s = call float @llvm.pow.f32(float %r,float %e)
-  %rv = insertelement <1 x float> undef, float %s, i32 0
-  ret <1 x float> %rv
-
-}
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float min/max

--- a/builtins/target-generic-common.ll
+++ b/builtins/target-generic-common.ll
@@ -202,22 +202,6 @@ declare i64 @__count_trailing_zeros_i64(i64) nounwind readnone
 declare i32 @__count_leading_zeros_i32(i32) nounwind readnone
 declare i64 @__count_leading_zeros_i64(i64) nounwind readnone

-;; svml
-
-; FIXME: need either to wire these up to the 8-wide SVML entrypoints,
-; or, use the macro to call the 4-wide ones twice with our 8-wide
-; vectors...
-
-declare <WIDTH x float> @__svml_sin(<WIDTH x float>)
-declare <WIDTH x float> @__svml_cos(<WIDTH x float>)
-declare void @__svml_sincos(<WIDTH x float>, <WIDTH x float> *, <WIDTH x float> *)
-declare <WIDTH x float> @__svml_tan(<WIDTH x float>)
-declare <WIDTH x float> @__svml_atan(<WIDTH x float>)
-declare <WIDTH x float> @__svml_atan2(<WIDTH x float>, <WIDTH x float>)
-declare <WIDTH x float> @__svml_exp(<WIDTH x float>)
-declare <WIDTH x float> @__svml_log(<WIDTH x float>)
-declare <WIDTH x float> @__svml_pow(<WIDTH x float>, <WIDTH x float>)
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; reductions

--- a/builtins/target-neon-common.ll
+++ b/builtins/target-neon-common.ll
@@ -313,19 +313,6 @@ define void @__masked_store_blend_i64(<WIDTH x i64>* nocapture %ptr,
  ret void
 }

-;; yuck.  We need declarations of these, even though we shouldnt ever
-;; actually generate calls to them for the NEON target...
-
-declare <WIDTH x float> @__svml_sin(<WIDTH x float>)
-declare <WIDTH x float> @__svml_cos(<WIDTH x float>)
-declare void @__svml_sincos(<WIDTH x float>, <WIDTH x float> *, <WIDTH x float> *)
-declare <WIDTH x float> @__svml_tan(<WIDTH x float>)
-declare <WIDTH x float> @__svml_atan(<WIDTH x float>)
-declare <WIDTH x float> @__svml_atan2(<WIDTH x float>, <WIDTH x float>)
-declare <WIDTH x float> @__svml_exp(<WIDTH x float>)
-declare <WIDTH x float> @__svml_log(<WIDTH x float>)
-declare <WIDTH x float> @__svml_pow(<WIDTH x float>, <WIDTH x float>)
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; gather

--- a/builtins/target-sse2-x2.ll
+++ b/builtins/target-sse2-x2.ll
@@ -102,92 +102,6 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin
  ret <8 x float> %call
 }

-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; svml stuff
-
-declare <4 x float> @__svml_sinf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_cosf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_sincosf4(<4 x float> *, <4 x float>) nounwind readnone
-declare <4 x float> @__svml_tanf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_atanf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_atan2f4(<4 x float>, <4 x float>) nounwind readnone
-declare <4 x float> @__svml_expf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
-
-
-define <8 x float> @__svml_sin(<8 x float>) nounwind readnone alwaysinline {
-  unary4to8(ret, float, @__svml_sinf4, %0)
-  ret <8 x float> %ret
-}
-
-define <8 x float> @__svml_cos(<8 x float>) nounwind readnone alwaysinline {
-  unary4to8(ret, float, @__svml_cosf4, %0)
-  ret <8 x float> %ret
-}
-
-define void @__svml_sincos(<8 x float>, <8 x float> *,
-                                    <8 x float> *) nounwind readnone alwaysinline {
-  ; call svml_sincosf4 two times with the two 4-wide sub-vectors
-  %a = shufflevector <8 x float> %0, <8 x float> undef,
-         <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %b = shufflevector <8 x float> %0, <8 x float> undef,
-         <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-
-  %cospa = alloca <4 x float>
-  %sa = call <4 x float> @__svml_sincosf4(<4 x float> * %cospa, <4 x float> %a)
-
-  %cospb = alloca <4 x float>
-  %sb = call <4 x float> @__svml_sincosf4(<4 x float> * %cospb, <4 x float> %b)
-
-  %sin = shufflevector <4 x float> %sa, <4 x float> %sb,
-         <8 x i32> <i32 0, i32 1, i32 2, i32 3,
-                    i32 4, i32 5, i32 6, i32 7>
-  store <8 x float> %sin, <8 x float> * %1
-
-  %cosa = load <4 x float> * %cospa
-  %cosb = load <4 x float> * %cospb
-  %cos = shufflevector <4 x float> %cosa, <4 x float> %cosb,
-         <8 x i32> <i32 0, i32 1, i32 2, i32 3,
-                    i32 4, i32 5, i32 6, i32 7>
-  store <8 x float> %cos, <8 x float> * %2
-
-  ret void
-}
-
-define <8 x float> @__svml_tan(<8 x float>) nounwind readnone alwaysinline {
-  unary4to8(ret, float, @__svml_tanf4, %0)
-  ret <8 x float> %ret
-}
-
-define <8 x float> @__svml_atan(<8 x float>) nounwind readnone alwaysinline {
-  unary4to8(ret, float, @__svml_atanf4, %0)
-  ret <8 x float> %ret
-}
-
-define <8 x float> @__svml_atan2(<8 x float>,
-                                          <8 x float>) nounwind readnone alwaysinline {
-  binary4to8(ret, float, @__svml_atan2f4, %0, %1)
-  ret <8 x float> %ret
-}
-
-define <8 x float> @__svml_exp(<8 x float>) nounwind readnone alwaysinline {
-  unary4to8(ret, float, @__svml_expf4, %0)
-  ret <8 x float> %ret
-}
-
-define <8 x float> @__svml_log(<8 x float>) nounwind readnone alwaysinline {
-  unary4to8(ret, float, @__svml_logf4, %0)
-  ret <8 x float> %ret
-}
-
-define <8 x float> @__svml_pow(<8 x float>,
-                                        <8 x float>) nounwind readnone alwaysinline {
-  binary4to8(ret, float, @__svml_powf4, %0, %1)
-  ret <8 x float> %ret
-}
-
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float min/max

--- a/builtins/target-sse2.ll
+++ b/builtins/target-sse2.ll
@@ -493,66 +493,6 @@ define <4 x float> @__sqrt_varying_float(<4 x float>) nounwind readonly alwaysin
  ret <4 x float> %call
 }

-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; svml stuff
-
-declare <4 x float> @__svml_sinf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_cosf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_sincosf4(<4 x float> *, <4 x float>) nounwind readnone
-declare <4 x float> @__svml_tanf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_atanf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_atan2f4(<4 x float>, <4 x float>) nounwind readnone
-declare <4 x float> @__svml_expf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
-
-
-define <4 x float> @__svml_sin(<4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_sinf4(<4 x float> %0)
-  ret <4 x float> %ret
-}
-
-define <4 x float> @__svml_cos(<4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_cosf4(<4 x float> %0)
-  ret <4 x float> %ret
-}
-
-define void @__svml_sincos(<4 x float>, <4 x float> *, <4 x float> *) nounwind readnone alwaysinline {
-  %s = call <4 x float> @__svml_sincosf4(<4 x float> * %2, <4 x float> %0)
-  store <4 x float> %s, <4 x float> * %1
-  ret void
-}
-
-define <4 x float> @__svml_tan(<4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_tanf4(<4 x float> %0)
-  ret <4 x float> %ret
-}
-
-define <4 x float> @__svml_atan(<4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_atanf4(<4 x float> %0)
-  ret <4 x float> %ret
-}
-
-define <4 x float> @__svml_atan2(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_atan2f4(<4 x float> %0, <4 x float> %1)
-  ret <4 x float> %ret
-}
-
-define <4 x float> @__svml_exp(<4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_expf4(<4 x float> %0)
-  ret <4 x float> %ret
-}
-
-define <4 x float> @__svml_log(<4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_logf4(<4 x float> %0)
-  ret <4 x float> %ret
-}
-
-define <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_powf4(<4 x float> %0, <4 x float> %1)
-  ret <4 x float> %ret
-}
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float min/max

--- a/builtins/target-sse4-16.ll
+++ b/builtins/target-sse4-16.ll
@@ -205,21 +205,6 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r
  ret <8 x double> %ret
 }

-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; svml
-
-; FIXME
-
-declare <8 x float> @__svml_sin(<8 x float>)
-declare <8 x float> @__svml_cos(<8 x float>)
-declare void @__svml_sincos(<8 x float>, <8 x float> *, <8 x float> *)
-declare <8 x float> @__svml_tan(<8 x float>)
-declare <8 x float> @__svml_atan(<8 x float>)
-declare <8 x float> @__svml_atan2(<8 x float>, <8 x float>)
-declare <8 x float> @__svml_exp(<8 x float>)
-declare <8 x float> @__svml_log(<8 x float>)
-declare <8 x float> @__svml_pow(<8 x float>, <8 x float>)
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; horizontal ops / reductions

--- a/builtins/target-sse4-8.ll
+++ b/builtins/target-sse4-8.ll
@@ -217,21 +217,6 @@ define <16 x double> @__max_varying_double(<16 x double>, <16 x double>) nounwin
  ret <16 x double> %ret
 }

-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-;; svml
-
-; FIXME
-
-declare <16 x float> @__svml_sin(<16 x float>)
-declare <16 x float> @__svml_cos(<16 x float>)
-declare void @__svml_sincos(<16 x float>, <16 x float> *, <16 x float> *)
-declare <16 x float> @__svml_tan(<16 x float>)
-declare <16 x float> @__svml_atan(<16 x float>)
-declare <16 x float> @__svml_atan2(<16 x float>, <16 x float>)
-declare <16 x float> @__svml_exp(<16 x float>)
-declare <16 x float> @__svml_log(<16 x float>)
-declare <16 x float> @__svml_pow(<16 x float>, <16 x float>)
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; horizontal ops / reductions

--- a/builtins/target-sse4-x2.ll
+++ b/builtins/target-sse4-x2.ll
@@ -102,92 +102,6 @@ define <8 x float> @__sqrt_varying_float(<8 x float>) nounwind readonly alwaysin
  ret <8 x float> %call
 }

-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; svml stuff
-
-declare <4 x float> @__svml_sinf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_cosf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_sincosf4(<4 x float> *, <4 x float>) nounwind readnone
-declare <4 x float> @__svml_tanf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_atanf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_atan2f4(<4 x float>, <4 x float>) nounwind readnone
-declare <4 x float> @__svml_expf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
-
-
-define <8 x float> @__svml_sin(<8 x float>) nounwind readnone alwaysinline {
-  unary4to8(ret, float, @__svml_sinf4, %0)
-  ret <8 x float> %ret
-}
-
-define <8 x float> @__svml_cos(<8 x float>) nounwind readnone alwaysinline {
-  unary4to8(ret, float, @__svml_cosf4, %0)
-  ret <8 x float> %ret
-}
-
-define void @__svml_sincos(<8 x float>, <8 x float> *,
-                                    <8 x float> *) nounwind readnone alwaysinline {
-  ; call svml_sincosf4 two times with the two 4-wide sub-vectors
-  %a = shufflevector <8 x float> %0, <8 x float> undef,
-         <4 x i32> <i32 0, i32 1, i32 2, i32 3>
-  %b = shufflevector <8 x float> %0, <8 x float> undef,
-         <4 x i32> <i32 4, i32 5, i32 6, i32 7>
-
-  %cospa = alloca <4 x float>
-  %sa = call <4 x float> @__svml_sincosf4(<4 x float> * %cospa, <4 x float> %a)
-
-  %cospb = alloca <4 x float>
-  %sb = call <4 x float> @__svml_sincosf4(<4 x float> * %cospb, <4 x float> %b)
-
-  %sin = shufflevector <4 x float> %sa, <4 x float> %sb,
-         <8 x i32> <i32 0, i32 1, i32 2, i32 3,
-                    i32 4, i32 5, i32 6, i32 7>
-  store <8 x float> %sin, <8 x float> * %1
-
-  %cosa = load <4 x float> * %cospa
-  %cosb = load <4 x float> * %cospb
-  %cos = shufflevector <4 x float> %cosa, <4 x float> %cosb,
-         <8 x i32> <i32 0, i32 1, i32 2, i32 3,
-                    i32 4, i32 5, i32 6, i32 7>
-  store <8 x float> %cos, <8 x float> * %2
-
-  ret void
-}
-
-define <8 x float> @__svml_tan(<8 x float>) nounwind readnone alwaysinline {
-  unary4to8(ret, float, @__svml_tanf4, %0)
-  ret <8 x float> %ret
-}
-
-define <8 x float> @__svml_atan(<8 x float>) nounwind readnone alwaysinline {
-  unary4to8(ret, float, @__svml_atanf4, %0)
-  ret <8 x float> %ret
-}
-
-define <8 x float> @__svml_atan2(<8 x float>,
-                                          <8 x float>) nounwind readnone alwaysinline {
-  binary4to8(ret, float, @__svml_atan2f4, %0, %1)
-  ret <8 x float> %ret
-}
-
-define <8 x float> @__svml_exp(<8 x float>) nounwind readnone alwaysinline {
-  unary4to8(ret, float, @__svml_expf4, %0)
-  ret <8 x float> %ret
-}
-
-define <8 x float> @__svml_log(<8 x float>) nounwind readnone alwaysinline {
-  unary4to8(ret, float, @__svml_logf4, %0)
-  ret <8 x float> %ret
-}
-
-define <8 x float> @__svml_pow(<8 x float>,
-                                        <8 x float>) nounwind readnone alwaysinline {
-  binary4to8(ret, float, @__svml_powf4, %0, %1)
-  ret <8 x float> %ret
-}
-
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ;; float min/max

--- a/builtins/target-sse4.ll
+++ b/builtins/target-sse4.ll
@@ -206,66 +206,6 @@ define <4 x double> @__max_varying_double(<4 x double>, <4 x double>) nounwind r
  ret <4 x double> %ret
 }

-;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
-; svml stuff
-
-declare <4 x float> @__svml_sinf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_cosf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_sincosf4(<4 x float> *, <4 x float>) nounwind readnone
-declare <4 x float> @__svml_tanf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_atanf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_atan2f4(<4 x float>, <4 x float>) nounwind readnone
-declare <4 x float> @__svml_expf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_logf4(<4 x float>) nounwind readnone
-declare <4 x float> @__svml_powf4(<4 x float>, <4 x float>) nounwind readnone
-
-
-define <4 x float> @__svml_sin(<4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_sinf4(<4 x float> %0)
-  ret <4 x float> %ret
-}
-
-define <4 x float> @__svml_cos(<4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_cosf4(<4 x float> %0)
-  ret <4 x float> %ret
-}
-
-define void @__svml_sincos(<4 x float>, <4 x float> *, <4 x float> *) nounwind readnone alwaysinline {
-  %s = call <4 x float> @__svml_sincosf4(<4 x float> * %2, <4 x float> %0)
-  store <4 x float> %s, <4 x float> * %1
-  ret void
-}
-
-define <4 x float> @__svml_tan(<4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_tanf4(<4 x float> %0)
-  ret <4 x float> %ret
-}
-
-define <4 x float> @__svml_atan(<4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_atanf4(<4 x float> %0)
-  ret <4 x float> %ret
-}
-
-define <4 x float> @__svml_atan2(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_atan2f4(<4 x float> %0, <4 x float> %1)
-  ret <4 x float> %ret
-}
-
-define <4 x float> @__svml_exp(<4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_expf4(<4 x float> %0)
-  ret <4 x float> %ret
-}
-
-define <4 x float> @__svml_log(<4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_logf4(<4 x float> %0)
-  ret <4 x float> %ret
-}
-
-define <4 x float> @__svml_pow(<4 x float>, <4 x float>) nounwind readnone alwaysinline {
-  %ret = call <4 x float> @__svml_powf4(<4 x float> %0, <4 x float> %1)
-  ret <4 x float> %ret
-}
-
 ;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
 ; horizontal ops / reductions

--- a/docs/ispc.rst
+++ b/docs/ispc.rst
@@ -3278,9 +3278,6 @@ for this argument.
  approximately 1.45e-6 over the range -10pi to 10pi.)
 * ``fast``: more efficient but lower accuracy versions of the default ``ispc``
  implementations.
-* ``svml``: use Intel "Short Vector Math Library".  Use
-  ``icc`` to link your final executable so that the appropriate libraries
-  are linked.
 * ``system``: use the system's math library.  On many systems, these
  functions are more accurate than both of ``ispc``'s implementations.
  Using these functions may be quite
--- a/ispc.h
+++ b/ispc.h
@@ -468,7 +468,7 @@ struct Globals {

    /** There are a number of math libraries that can be used for
        transcendentals and the like during program compilation. */
-    enum MathLib { Math_ISPC, Math_ISPCFast, Math_SVML, Math_System };
+    enum MathLib { Math_ISPC, Math_ISPCFast, Math_System };
    MathLib mathLib;

    /** Records whether the ispc standard library should be made available
--- a/main.cpp
+++ b/main.cpp
@@ -107,7 +107,6 @@ usage(int ret) {
    printf("    [--math-lib=<option>]\t\tSelect math library\n");
    printf("        default\t\t\t\tUse ispc's built-in math functions\n");
    printf("        fast\t\t\t\tUse high-performance but lower-accuracy math functions\n");
-    printf("        svml\t\t\t\tUse the Intel(r) SVML math libraries\n");
    printf("        system\t\t\t\tUse the system's math library (*may be quite slow*)\n");
    printf("    [-MMM <filename>\t\t\t\tWrite #include dependencies to given file.\n");
    printf("    [--nostdlib]\t\t\tDon't make the ispc standard library available\n");
@@ -353,8 +352,6 @@ int main(int Argc, char *Argv[]) {
                g->mathLib = Globals::Math_ISPC;
            else if (!strcmp(lib, "fast"))
                g->mathLib = Globals::Math_ISPCFast;
-            else if (!strcmp(lib, "svml"))
-                g->mathLib = Globals::Math_SVML;
            else if (!strcmp(lib, "system"))
                g->mathLib = Globals::Math_System;
            else {
--- a/stdlib.ispc
+++ b/stdlib.ispc
@@ -2159,10 +2159,7 @@ static inline uniform float frexp(uniform float x, uniform int * uniform pw2) {

 __declspec(safe)
 static inline float sin(float x_full) {
-    if (__math_lib == __math_lib_svml) {
-        return __svml_sin(x_full);
-    }
-    else if (__math_lib == __math_lib_system) {
+    if (__math_lib == __math_lib_system) {
        float ret;
        foreach_active (i) {
            uniform float r = __stdlib_sinf(extract(x_full, i));
@@ -2221,8 +2218,7 @@ static inline float sin(float x_full) {

 __declspec(safe)
 static inline uniform float sin(uniform float x_full) {
-    if (__math_lib == __math_lib_system ||
-        __math_lib == __math_lib_svml) {
+    if (__math_lib == __math_lib_system) {
        return __stdlib_sinf(x_full);
    }
    else if (__math_lib == __math_lib_ispc || 
@@ -2293,8 +2289,7 @@ static inline float asin(float x) {
    bool isnan = (x > 1);

    float v;
-    if (__math_lib == __math_lib_svml ||
-        __math_lib == __math_lib_system) {
+    if (__math_lib == __math_lib_system) {
        float ret;
        foreach_active (i) {
            uniform float r = __stdlib_asinf(extract(x, i));
@@ -2350,8 +2345,7 @@ static inline uniform float asin(uniform float x) {
    uniform bool isnan = (x > 1);

    uniform float v;
-    if (__math_lib == __math_lib_svml ||
-        __math_lib == __math_lib_system) {
+    if (__math_lib == __math_lib_system) {
        return __stdlib_asinf(x);
    }
    else if (__math_lib == __math_lib_ispc)
@@ -2396,10 +2390,7 @@ static inline uniform float asin(uniform float x) {

 __declspec(safe)
 static inline float cos(float x_full) {
-    if (__math_lib == __math_lib_svml) {
-        return __svml_cos(x_full);
-    }
-    else if (__math_lib == __math_lib_system) {
+    if (__math_lib == __math_lib_system) {
        float ret;
        foreach_active (i) {
            uniform float r = __stdlib_cosf(extract(x_full, i));
@@ -2457,8 +2448,7 @@ static inline float cos(float x_full) {

 __declspec(safe)
 static inline uniform float cos(uniform float x_full) {
-    if (__math_lib == __math_lib_system ||
-        __math_lib == __math_lib_svml) {
+    if (__math_lib == __math_lib_system) {
        return __stdlib_cosf(x_full);
    }
    else if (__math_lib == __math_lib_ispc || 
@@ -2535,10 +2525,7 @@ static inline uniform float acos(uniform float v) {
 __declspec(safe)
 static inline void sincos(float x_full, varying float * uniform sin_result, 
                          varying float * uniform cos_result) {
-    if (__math_lib == __math_lib_svml) {
-        __svml_sincos(x_full, sin_result, cos_result);
-    }
-    else if (__math_lib == __math_lib_system) {
+    if (__math_lib == __math_lib_system) {
        foreach_active (i) {
            uniform float s, c;
            __stdlib_sincosf(extract(x_full, i), &s, &c);
@@ -2605,8 +2592,7 @@ static inline void sincos(float x_full, varying float * uniform sin_result,
 __declspec(safe)
 static inline void sincos(uniform float x_full, uniform float * uniform sin_result,
                          uniform float * uniform cos_result) {
-    if (__math_lib == __math_lib_system ||
-        __math_lib == __math_lib_svml) {
+    if (__math_lib == __math_lib_system) {
        __stdlib_sincosf(x_full, sin_result, cos_result);
    }
    else if (__math_lib == __math_lib_ispc || 
@@ -2667,10 +2653,7 @@ static inline void sincos(uniform float x_full, uniform float * uniform sin_resu

 __declspec(safe)
 static inline float tan(float x_full) {
-    if (__math_lib == __math_lib_svml) {
-        return __svml_tan(x_full);
-    }
-    else if (__math_lib == __math_lib_system) {
+    if (__math_lib == __math_lib_system) {
        float ret;
        foreach_active (i) {
            uniform float r = __stdlib_tanf(extract(x_full, i));
@@ -2746,8 +2729,7 @@ static inline float tan(float x_full) {

 __declspec(safe)
 static inline uniform float tan(uniform float x_full) {
-    if (__math_lib == __math_lib_system ||
-        __math_lib == __math_lib_svml) {
+    if (__math_lib == __math_lib_system) {
        return __stdlib_tanf(x_full);
    }
    else if (__math_lib == __math_lib_ispc || 
@@ -2818,10 +2800,7 @@ static inline uniform float tan(uniform float x_full) {

 __declspec(safe)
 static inline float atan(float x_full) {
-    if (__math_lib == __math_lib_svml) {
-        return __svml_atan(x_full);
-    }
-    else if (__math_lib == __math_lib_system) {
+    if (__math_lib == __math_lib_system) {
        float ret;
        foreach_active (i) {
            uniform float r = __stdlib_atanf(extract(x_full, i));
@@ -2869,8 +2848,7 @@ static inline float atan(float x_full) {

 __declspec(safe)
 static inline uniform float atan(uniform float x_full) {
-    if (__math_lib == __math_lib_system ||
-        __math_lib == __math_lib_svml) {
+    if (__math_lib == __math_lib_system) {
        return __stdlib_atanf(x_full);
    }
    else if (__math_lib == __math_lib_ispc || 
@@ -2913,10 +2891,7 @@ static inline uniform float atan(uniform float x_full) {

 __declspec(safe)
 static inline float atan2(float y, float x) {
-    if (__math_lib == __math_lib_svml) {
-        return __svml_atan2(y, x);
-    }
-    else if (__math_lib == __math_lib_system) {
+    if (__math_lib == __math_lib_system) {
        float ret;
        foreach_active (i) {
            uniform float r = __stdlib_atan2f(extract(y, i), extract(x, i));
@@ -2952,8 +2927,7 @@ static inline float atan2(float y, float x) {

 __declspec(safe)
 static inline uniform float atan2(uniform float y, uniform float x) {
-    if (__math_lib == __math_lib_system ||
-        __math_lib == __math_lib_svml) {
+    if (__math_lib == __math_lib_system) {
        return __stdlib_atan2f(y, x);
    }
    else if (__math_lib == __math_lib_ispc || 
@@ -2976,9 +2950,6 @@ static inline float exp(float x_full) {
    if (__have_native_transcendentals) {
        return __exp_varying_float(x_full);
    }
-    else if (__math_lib == __math_lib_svml) {
-        return __svml_exp(x_full);
-    }
    else if (__math_lib == __math_lib_system) {
        float ret;
        foreach_active (i) {
@@ -3058,8 +3029,7 @@ static inline uniform float exp(uniform float x_full) {
    if (__have_native_transcendentals) {
        return __exp_uniform_float(x_full);
    }
-    else if (__math_lib == __math_lib_system ||
-        __math_lib == __math_lib_svml) {
+    else if (__math_lib == __math_lib_system) {
        return __stdlib_expf(x_full);
    }
    else if (__math_lib == __math_lib_ispc_fast) {
@@ -3183,9 +3153,6 @@ static inline float log(float x_full) {
    if (__have_native_transcendentals) {
        return __log_varying_float(x_full);
    }
-    else if (__math_lib == __math_lib_svml) {
-        return __svml_log(x_full);
-    }
    else if (__math_lib == __math_lib_system) {
        float ret;
        foreach_active (i) {
@@ -3274,8 +3241,7 @@ static inline uniform float log(uniform float x_full) {
    if (__have_native_transcendentals) {
        return __log_uniform_float(x_full);
    }
-    else if (__math_lib == __math_lib_system ||
-        __math_lib == __math_lib_svml) {
+    else if (__math_lib == __math_lib_system) {
        return __stdlib_logf(x_full);
    }
    else if (__math_lib == __math_lib_ispc_fast) {
@@ -3358,9 +3324,6 @@ static inline float pow(float a, float b) {
    if (__have_native_transcendentals) {
        return __pow_varying_float(a, b);
    }
-    else if (__math_lib == __math_lib_svml) {
-        return __svml_pow(a, b);
-    }
    else if (__math_lib == __math_lib_system) {
        float ret;
        foreach_active (i) {
@@ -3380,8 +3343,7 @@ static inline uniform float pow(uniform float a, uniform float b) {
    if (__have_native_transcendentals) {
        return __pow_uniform_float(a, b);
    }
-    if (__math_lib == __math_lib_system ||
-        __math_lib == __math_lib_svml) {
+    if (__math_lib == __math_lib_system) {
        return __stdlib_powf(a, b);
    }
    else if (__math_lib == __math_lib_ispc ||