From 3b16cd8c5671a55b80e7244288ed28bb45501689 Mon Sep 17 00:00:00 2001
From: Anton Mitrokhin <anton.mitrokhin@phystech.edu>
Date: Thu, 25 Sep 2014 14:48:17 +0400
Subject: [PATCH] fixed 'knc-i1x16.h' to compile with icc v.15 beta

---
 examples/intrinsics/knc-i1x16.h | 68 ++++++++++++++++-----------------
 1 file changed, 34 insertions(+), 34 deletions(-)
diff --git a/examples/intrinsics/knc-i1x16.h b/examples/intrinsics/knc-i1x16.h
index be9cbd1c..b8783a39 100644
--- a/examples/intrinsics/knc-i1x16.h
+++ b/examples/intrinsics/knc-i1x16.h
@@ -526,11 +526,11 @@ template <int ALIGN> static FORCEINLINE void __store(__vec16_i1 *p, __vec16_i1 v
   *p = v;
 }
 
-template <class RetVecType> RetVecType __smear_i1(int i);
-template <> static FORCEINLINE __vec16_i1 __smear_i1<__vec16_i1>(int i) { return i?0xFFFF:0x0; }
+template <class RetVecType> static RetVecType __smear_i1(int i);
+template <> FORCEINLINE __vec16_i1 __smear_i1<__vec16_i1>(int i) { return i?0xFFFF:0x0; }
 
-template <class RetVecType> RetVecType __setzero_i1();
-template <> static FORCEINLINE __vec16_i1 __setzero_i1<__vec16_i1>() { return 0; }
+template <class RetVecType> static RetVecType __setzero_i1();
+template <> FORCEINLINE __vec16_i1 __setzero_i1<__vec16_i1>() { return 0; }
 
 template <class RetVecType> __vec16_i1 __undef_i1();
 template <> FORCEINLINE __vec16_i1 __undef_i1<__vec16_i1>() { return __vec16_i1(); }
@@ -678,8 +678,8 @@ static FORCEINLINE __vec16_i32 __select(      bool cond, __vec16_i32 a, __vec16_
 static FORCEINLINE int32_t __extract_element(__vec16_i32  v,  int32_t index)              { return v[index];    }
 static FORCEINLINE void    __insert_element (__vec16_i32 *v, uint32_t index, int32_t val) { (*v)[index] = val;  }
 
-template <class RetVecType> RetVecType __smear_i32(int32_t i);
-template <> static FORCEINLINE __vec16_i32 __smear_i32<__vec16_i32>(int32_t i) { return _mm512_set1_epi32(i); }
+template <class RetVecType> RetVecType static __smear_i32(int32_t i);
+template <> FORCEINLINE __vec16_i32 __smear_i32<__vec16_i32>(int32_t i) { return _mm512_set1_epi32(i); }
 
 static const __vec16_i32 __ispc_one = __smear_i32<__vec16_i32>(1);
 static const __vec16_i32 __ispc_zero = __smear_i32<__vec16_i32>(0);
@@ -687,11 +687,11 @@ static const __vec16_i32 __ispc_thirty_two = __smear_i32<__vec16_i32>(32);
 static const __vec16_i32 __ispc_ffffffff = __smear_i32<__vec16_i32>(-1);
 static const __vec16_i32 __ispc_stride1(0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15);
 
-template <class RetVecType> RetVecType __setzero_i32();
-template <> static FORCEINLINE __vec16_i32 __setzero_i32<__vec16_i32>() { return _mm512_setzero_epi32(); }
+template <class RetVecType> static RetVecType __setzero_i32();
+template <> FORCEINLINE __vec16_i32 __setzero_i32<__vec16_i32>() { return _mm512_setzero_epi32(); }
 
-template <class RetVecType> RetVecType __undef_i32();
-template <> static FORCEINLINE __vec16_i32 __undef_i32<__vec16_i32>() { return __vec16_i32(); }
+template <class RetVecType> static RetVecType __undef_i32();
+template <> FORCEINLINE __vec16_i32 __undef_i32<__vec16_i32>() { return __vec16_i32(); }
 
 static FORCEINLINE __vec16_i32 __broadcast_i32(__vec16_i32 v, int index) { return _mm512_mask_permutevar_epi32(v, 0xFFFF, _mm512_set1_epi32(index), v); }
 
@@ -743,11 +743,11 @@ template <int ALIGN> static FORCEINLINE void __store(__vec16_i32 *p, __vec16_i32
 }
 
 #if 0 /* knc::fails  ./tests/foreach-25.ispc ./tests/forach-26.ispc ./tests/foreach-27.ispc */
-template <> static FORCEINLINE __vec16_i32 __load<64>(const __vec16_i32 *p) 
+template <> FORCEINLINE __vec16_i32 __load<64>(const __vec16_i32 *p) 
 {
   return _mm512_load_epi32(p);
 }
-template <> static FORCEINLINE void __store<64>(__vec16_i32 *p, __vec16_i32 v) 
+template <> FORCEINLINE void __store<64>(__vec16_i32 *p, __vec16_i32 v) 
 {
   _mm512_store_epi32(p, v);
 }
@@ -1018,21 +1018,21 @@ template <int ALIGN> static FORCEINLINE void __store(__vec16_i64 *p, __vec16_i64
 }
 
 #if 0 /* knc::fails  as with _i32 this may generate fails ... so commetining it out */
-template <> static FORCEINLINE __vec16_i64 __load<64>(const __vec16_i64 *p) 
+template <> FORCEINLINE __vec16_i64 __load<64>(const __vec16_i64 *p) 
 {
   __m512i v2 = _mm512_load_epi32(p);
   __m512i v1 = _mm512_load_epi32(((uint8_t*)p)+64);
   return __vec16_i64(v2,v1);
 }
-template <> static FORCEINLINE __vec16_i64 __load<128>(const __vec16_i64 *p) {    return __load<64>(p); }
-template <> static FORCEINLINE void __store<64>(__vec16_i64 *p, __vec16_i64 v) 
+template <> FORCEINLINE __vec16_i64 __load<128>(const __vec16_i64 *p) {    return __load<64>(p); }
+template <> FORCEINLINE void __store<64>(__vec16_i64 *p, __vec16_i64 v) 
 {
   __m512i v1 = v.v2;
   __m512i v2 = v.v1;
   _mm512_store_epi64(p, v2);
   _mm512_store_epi64(((uint8_t*)p)+64, v1);
 }
-template <> static FORCEINLINE void __store<128>(__vec16_i64 *p, __vec16_i64 v) {    __store<64>(p, v); }
+template <> FORCEINLINE void __store<128>(__vec16_i64 *p, __vec16_i64 v) {    __store<64>(p, v); }
 #endif
 
 
@@ -1068,14 +1068,14 @@ static FORCEINLINE __vec16_f __select(      bool cond, __vec16_f a, __vec16_f b)
 static FORCEINLINE float __extract_element(__vec16_f  v, uint32_t index)            { return v[index];   }
 static FORCEINLINE void   __insert_element(__vec16_f *v, uint32_t index, float val) { (*v)[index] = val; }
 
-template <class RetVecType> RetVecType __smear_float(float f);
-template <> static FORCEINLINE __vec16_f __smear_float<__vec16_f>(float f) { return _mm512_set_1to16_ps(f); }
+template <class RetVecType> static RetVecType __smear_float(float f);
+template <> FORCEINLINE __vec16_f __smear_float<__vec16_f>(float f) { return _mm512_set_1to16_ps(f); }
 
-template <class RetVecType> RetVecType __setzero_float();
-template <> static FORCEINLINE __vec16_f __setzero_float<__vec16_f>() { return _mm512_setzero_ps(); }
+template <class RetVecType> static RetVecType __setzero_float();
+template <> FORCEINLINE __vec16_f __setzero_float<__vec16_f>() { return _mm512_setzero_ps(); }
 
-template <class RetVecType> RetVecType __undef_float();
-template <> static FORCEINLINE __vec16_f __undef_float<__vec16_f>() { return __vec16_f(); }
+template <class RetVecType> static RetVecType __undef_float();
+template <> FORCEINLINE __vec16_f __undef_float<__vec16_f>() { return __vec16_f(); }
 
 static FORCEINLINE __vec16_f __broadcast_float(__vec16_f _v, int index) 
 {
@@ -1132,12 +1132,12 @@ template <int ALIGN> static FORCEINLINE void __store(__vec16_f *p, __vec16_f v)
 }
 
 #if 0 /* knc::fails  ./tests/gs-improve-progindex.ispc with segfault */
-template <> static FORCEINLINE __vec16_f __load<64>(const __vec16_f *p) 
+template <> FORCEINLINE __vec16_f __load<64>(const __vec16_f *p) 
 {
     return _mm512_load_ps(p);
 }
 /* this one doesn't fail but it is  commented out for completeness, no aligned load/stores */
-template <> static FORCEINLINE void __store<64>(__vec16_f *p, __vec16_f v) 
+template <> FORCEINLINE void __store<64>(__vec16_f *p, __vec16_f v) 
 {
   _mm512_store_ps(p, v);
 }
@@ -1310,14 +1310,14 @@ static FORCEINLINE __vec16_d __select(bool cond, __vec16_d a, __vec16_d b)
 static FORCEINLINE double __extract_element(__vec16_d  v, uint32_t index)             { return v[index];   }
 static FORCEINLINE void    __insert_element(__vec16_d *v, uint32_t index, double val) { (*v)[index] = val; }
 
-template <class RetVecType> RetVecType __smear_double(double d);
-template <> static FORCEINLINE __vec16_d __smear_double<__vec16_d>(double d) { return __vec16_d(_mm512_set1_pd(d), _mm512_set1_pd(d)); }
+template <class RetVecType> static RetVecType __smear_double(double d);
+template <> FORCEINLINE __vec16_d __smear_double<__vec16_d>(double d) { return __vec16_d(_mm512_set1_pd(d), _mm512_set1_pd(d)); }
 
-template <class RetVecType> RetVecType __setzero_double();
-template <> static FORCEINLINE __vec16_d __setzero_double<__vec16_d>() { return __vec16_d(_mm512_setzero_pd(), _mm512_setzero_pd()); }
+template <class RetVecType> static RetVecType __setzero_double();
+template <> FORCEINLINE __vec16_d __setzero_double<__vec16_d>() { return __vec16_d(_mm512_setzero_pd(), _mm512_setzero_pd()); }
 
-template <class RetVecType> RetVecType __undef_double();
-template <> static FORCEINLINE __vec16_d __undef_double<__vec16_d>() { return __vec16_d(); }
+template <class RetVecType> static RetVecType __undef_double();
+template <> FORCEINLINE __vec16_d __undef_double<__vec16_d>() { return __vec16_d(); }
 
 #define CASTD2F(_v_, _v_hi_, _v_lo_) \
   __vec16_f _v_hi_, _v_lo_;  \
@@ -1391,17 +1391,17 @@ template <int ALIGN> static FORCEINLINE void __store(__vec16_d *p, __vec16_d v)
 
 
 #if 0 /* knc::fails  as with _f this may generate fails ... so commetining it out */
-template <> static FORCEINLINE __vec16_d __load<64>(const __vec16_d *p) 
+template <> FORCEINLINE __vec16_d __load<64>(const __vec16_d *p) 
 {
   return __vec16_d(_mm512_load_pd(p), _mm512_load_pd(((uint8_t*)p)+64));
 }
-template <> static FORCEINLINE void __store<64>(__vec16_d *p, __vec16_d v) 
+template <> FORCEINLINE void __store<64>(__vec16_d *p, __vec16_d v) 
 {
   _mm512_store_pd(p, v.v1);
   _mm512_store_pd(((uint8_t*)p)+64, v.v2);
 }
-template <> static FORCEINLINE __vec16_d __load <128>(const __vec16_d *p)        { return __load<64>(p); }
-template <> static FORCEINLINE      void __store<128>(__vec16_d *p, __vec16_d v) { __store<64>(p, v);    }
+template <> FORCEINLINE __vec16_d __load <128>(const __vec16_d *p)        { return __load<64>(p); }
+template <> FORCEINLINE      void __store<128>(__vec16_d *p, __vec16_d v) { __store<64>(p, v);    }
 #endif
 
 ///////////////////////////////////////////////////////////////////////////