Fix AVX bug introduced in 4ab982bc16
This commit is contained in:
@@ -524,10 +524,24 @@ define void @__masked_store_64(<16 x i64>* nocapture, <16 x i64>,
|
|||||||
|
|
||||||
|
|
||||||
;; FIXME: various code elsewhere in the builtins implementations makes
|
;; FIXME: various code elsewhere in the builtins implementations makes
|
||||||
;; calls to these, basically assuming that doing so is faster than doing
|
;; calls to the 32/64 bit versions of these, basically assuming that doing
|
||||||
;; a full call to an actual masked store, which isn't likely to be the
|
;; so is faster than doing a full call to an actual masked store, which
|
||||||
;; case on AVX. So here we provide those functions but then don't actually
|
;; isn't likely to be the case on AVX. So here we provide those functions
|
||||||
;; do what the caller asked for...
|
;; but then don't actually do what the caller asked for...
|
||||||
|
|
||||||
|
declare void @llvm.trap()
|
||||||
|
|
||||||
|
define void @__masked_store_blend_8(<8 x i8>* nocapture, <8 x i8>,
|
||||||
|
<8 x i32>) nounwind alwaysinline {
|
||||||
|
call void @llvm.trap()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
define void @__masked_store_blend_16(<8 x i16>* nocapture, <8 x i16>,
|
||||||
|
<8 x i32>) nounwind alwaysinline {
|
||||||
|
call void @llvm.trap()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
define void @__masked_store_blend_32(<16 x i32>* nocapture, <16 x i32>,
|
define void @__masked_store_blend_32(<16 x i32>* nocapture, <16 x i32>,
|
||||||
<16 x i32>) nounwind alwaysinline {
|
<16 x i32>) nounwind alwaysinline {
|
||||||
|
|||||||
@@ -450,10 +450,26 @@ define void @__masked_store_64(<8 x i64>* nocapture, <8 x i64>,
|
|||||||
|
|
||||||
|
|
||||||
;; FIXME: various code elsewhere in the builtins implementations makes
|
;; FIXME: various code elsewhere in the builtins implementations makes
|
||||||
;; calls to these, basically assuming that doing so is faster than doing
|
;; calls to the 32/64 bit versions of these, basically assuming that doing
|
||||||
;; a full call to an actual masked store, which isn't likely to be the
|
;; so is faster than doing a full call to an actual masked store, which
|
||||||
;; case on AVX. So here we provide those functions but then don't actually
|
;; isn't likely to be the case on AVX. So here we provide those functions
|
||||||
;; do what the caller asked for...
|
;; but then don't actually do what the caller asked for...
|
||||||
|
|
||||||
|
declare void @llvm.trap()
|
||||||
|
|
||||||
|
define void @__masked_store_blend_8(<8 x i8>* nocapture, <8 x i8>,
|
||||||
|
<8 x i32>) nounwind alwaysinline {
|
||||||
|
call void @llvm.trap()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
define void @__masked_store_blend_16(<8 x i16>* nocapture, <8 x i16>,
|
||||||
|
<8 x i32>) nounwind alwaysinline {
|
||||||
|
call void @llvm.trap()
|
||||||
|
ret void
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>,
|
||||||
<8 x i32>) nounwind alwaysinline {
|
<8 x i32>) nounwind alwaysinline {
|
||||||
|
|||||||
Reference in New Issue
Block a user