Represent MOVMSK'ed masks with int64s rather than int32s.

This allows us to scale up to 64-wide execution.
This commit is contained in:
Matt Pharr
2012-05-25 11:48:08 -07:00
parent 38cea6dc71
commit 90db01d038
20 changed files with 137 additions and 107 deletions

View File

@@ -311,8 +311,8 @@ INSERT_EXTRACT(__vec1_d, double)
///////////////////////////////////////////////////////////////////////////
// mask ops
static FORCEINLINE uint32_t __movmsk(__vec16_i1 mask) {
return mask.v;
static FORCEINLINE uint64_t __movmsk(__vec16_i1 mask) {
return (uint64_t)mask.v;
}
static FORCEINLINE __vec16_i1 __equal(__vec16_i1 a, __vec16_i1 b) {

View File

@@ -1,5 +1,5 @@
/*
Copyright (c) 2010-2011, Intel Corporation
Copyright (c) 2010-2012, Intel Corporation
All rights reserved.
Redistribution and use in source and binary forms, with or without
@@ -224,8 +224,8 @@ CAST_BITS_SCALAR(double, int64_t)
///////////////////////////////////////////////////////////////////////////
// mask ops
static FORCEINLINE uint32_t __movmsk(__vec4_i1 mask) {
return _mm_movemask_ps(mask.v);
static FORCEINLINE uint64_t __movmsk(__vec4_i1 mask) {
return (uint64_t)_mm_movemask_ps(mask.v);
}
static FORCEINLINE __vec4_i1 __equal(__vec4_i1 a, __vec4_i1 b) {