From 6de494cfdb3ee7f7d62c8b08bcaf59685867f0bf Mon Sep 17 00:00:00 2001 From: Matt Pharr Date: Mon, 29 Aug 2011 16:50:59 -0700 Subject: [PATCH] Fix AVX bug introduced in 4ab982bc16d509fd6ec5905c2d04f8a9e8ef41bc --- builtins-avx-x2.ll | 22 ++++++++++++++++++---- builtins-avx.ll | 24 ++++++++++++++++++++---- 2 files changed, 38 insertions(+), 8 deletions(-) diff --git a/builtins-avx-x2.ll b/builtins-avx-x2.ll index 3c380e24..d5696097 100644 --- a/builtins-avx-x2.ll +++ b/builtins-avx-x2.ll @@ -524,10 +524,24 @@ define void @__masked_store_64(<16 x i64>* nocapture, <16 x i64>, ;; FIXME: various code elsewhere in the builtins implementations makes -;; calls to these, basically assuming that doing so is faster than doing -;; a full call to an actual masked store, which isn't likely to be the -;; case on AVX. So here we provide those functions but then don't actually -;; do what the caller asked for... +;; calls to the 32/64 bit versions of these, basically assuming that doing +;; so is faster than doing a full call to an actual masked store, which +;; isn't likely to be the case on AVX. So here we provide those functions +;; but then don't actually do what the caller asked for... + +declare void @llvm.trap() + +define void @__masked_store_blend_8(<8 x i8>* nocapture, <8 x i8>, + <8 x i32>) nounwind alwaysinline { + call void @llvm.trap() + ret void +} + +define void @__masked_store_blend_16(<8 x i16>* nocapture, <8 x i16>, + <8 x i32>) nounwind alwaysinline { + call void @llvm.trap() + ret void +} define void @__masked_store_blend_32(<16 x i32>* nocapture, <16 x i32>, <16 x i32>) nounwind alwaysinline { diff --git a/builtins-avx.ll b/builtins-avx.ll index e06bd87b..6b8faf39 100644 --- a/builtins-avx.ll +++ b/builtins-avx.ll @@ -450,10 +450,26 @@ define void @__masked_store_64(<8 x i64>* nocapture, <8 x i64>, ;; FIXME: various code elsewhere in the builtins implementations makes -;; calls to these, basically assuming that doing so is faster than doing -;; a full call to an actual masked store, which isn't likely to be the -;; case on AVX. So here we provide those functions but then don't actually -;; do what the caller asked for... +;; calls to the 32/64 bit versions of these, basically assuming that doing +;; so is faster than doing a full call to an actual masked store, which +;; isn't likely to be the case on AVX. So here we provide those functions +;; but then don't actually do what the caller asked for... + +declare void @llvm.trap() + +define void @__masked_store_blend_8(<8 x i8>* nocapture, <8 x i8>, + <8 x i32>) nounwind alwaysinline { + call void @llvm.trap() + ret void +} + + +define void @__masked_store_blend_16(<8 x i16>* nocapture, <8 x i16>, + <8 x i32>) nounwind alwaysinline { + call void @llvm.trap() + ret void +} + define void @__masked_store_blend_32(<8 x i32>* nocapture, <8 x i32>, <8 x i32>) nounwind alwaysinline {