Represent MOVMSK'ed masks with int64s rather than int32s.

This allows us to scale up to 64-wide execution.
2012-05-25 11:48:08 -07:00
parent 38cea6dc71
commit 90db01d038
20 changed files with 137 additions and 107 deletions
--- a/examples/intrinsics/generic-16.h
+++ b/examples/intrinsics/generic-16.h
@@ -311,8 +311,8 @@ INSERT_EXTRACT(__vec1_d, double)
 ///////////////////////////////////////////////////////////////////////////
 // mask ops

-static FORCEINLINE uint32_t __movmsk(__vec16_i1 mask) {
-    return mask.v;
+static FORCEINLINE uint64_t __movmsk(__vec16_i1 mask) {
+    return (uint64_t)mask.v;
 }

 static FORCEINLINE __vec16_i1 __equal(__vec16_i1 a, __vec16_i1 b) {
--- a/examples/intrinsics/sse4.h
+++ b/examples/intrinsics/sse4.h
@@ -1,5 +1,5 @@
 /*
-  Copyright (c) 2010-2011, Intel Corporation
+  Copyright (c) 2010-2012, Intel Corporation
  All rights reserved.

  Redistribution and use in source and binary forms, with or without
@@ -224,8 +224,8 @@ CAST_BITS_SCALAR(double, int64_t)
 ///////////////////////////////////////////////////////////////////////////
 // mask ops

-static FORCEINLINE uint32_t __movmsk(__vec4_i1 mask) {
-    return _mm_movemask_ps(mask.v);
+static FORCEINLINE uint64_t __movmsk(__vec4_i1 mask) {
+    return (uint64_t)_mm_movemask_ps(mask.v);
 }

 static FORCEINLINE __vec4_i1 __equal(__vec4_i1 a, __vec4_i1 b) {