Represent MOVMSK'ed masks with int64s rather than int32s.

This allows us to scale up to 64-wide execution.
2012-05-25 11:48:08 -07:00
parent 38cea6dc71
commit 90db01d038
20 changed files with 137 additions and 107 deletions
--- a/builtins/target-sse2-x2.ll
+++ b/builtins/target-sse2-x2.ll
@@ -1,4 +1,4 @@
-;;  Copyright (c) 2010-2011, Intel Corporation
+;;  Copyright (c) 2010-2012, Intel Corporation
 ;;  All rights reserved.
 ;;
 ;;  Redistribution and use in source and binary forms, with or without
@@ -295,7 +295,7 @@ define i32 @__max_uniform_uint32(i32, i32) nounwind readonly alwaysinline {

 declare i32 @llvm.x86.sse.movmsk.ps(<4 x float>) nounwind readnone

-define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
+define i64 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
  ; first do two 4-wide movmsk calls
  %floatmask = bitcast <8 x i32> %0 to <8 x float>
  %m0 = shufflevector <8 x float> %floatmask, <8 x float> undef,
@@ -309,7 +309,8 @@ define i32 @__movmsk(<8 x i32>) nounwind readnone alwaysinline {
  ; of the second one
  %v1s = shl i32 %v1, 4
  %v = or i32 %v0, %v1s
-  ret i32 %v
+  %v64 = zext i32 %v to i64
+  ret i64 %v64
 }

 define <4 x float> @__vec4_add_float(<4 x float> %v0,