Update load_and_broadcast built-in

Change function suffix to "_i32", etc, from "_32"

Improve load_and_broadcast macro in util.m4 to grab vector width from 
WIDTH variable rather than taking it as a parameter.
This commit is contained in:
Matt Pharr
2012-06-07 13:30:49 -07:00
parent 1d29991268
commit 91d22d150f
10 changed files with 56 additions and 57 deletions

View File

@@ -353,10 +353,10 @@ define i64 @__reduce_max_uint64(<16 x i64>) nounwind readnone alwaysinline {
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unaligned loads/loads+broadcasts
load_and_broadcast(16, i8, 8)
load_and_broadcast(16, i16, 16)
load_and_broadcast(16, i32, 32)
load_and_broadcast(16, i64, 64)
load_and_broadcast(i8)
load_and_broadcast(i16)
load_and_broadcast(i32)
load_and_broadcast(i64)
; no masked load instruction for i8 and i16 types??
masked_load(16, i8, 8, 1)

View File

@@ -334,10 +334,10 @@ define i64 @__reduce_max_uint64(<8 x i64>) nounwind readnone alwaysinline {
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unaligned loads/loads+broadcasts
load_and_broadcast(8, i8, 8)
load_and_broadcast(8, i16, 16)
load_and_broadcast(8, i32, 32)
load_and_broadcast(8, i64, 64)
load_and_broadcast(i8)
load_and_broadcast(i16)
load_and_broadcast(i32)
load_and_broadcast(i64)
; no masked load instruction for i8 and i16 types??
masked_load(8, i8, 8, 1)

View File

@@ -21,10 +21,10 @@ gen_masked_store(1, i64, 64)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unaligned loads/loads+broadcasts
load_and_broadcast(1, i8, 8)
load_and_broadcast(1, i16, 16)
load_and_broadcast(1, i32, 32)
load_and_broadcast(1, i64, 64)
load_and_broadcast(i8)
load_and_broadcast(i16)
load_and_broadcast(i32)
load_and_broadcast(i64)
masked_load(1, i8, 8, 1)
masked_load(1, i16, 16, 2)

View File

@@ -230,16 +230,17 @@ declare i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unaligned loads/loads+broadcasts
load_and_broadcast(WIDTH, i8, 8)
load_and_broadcast(WIDTH, i16, 16)
load_and_broadcast(WIDTH, i32, 32)
load_and_broadcast(WIDTH, i64, 64)
declare <WIDTH x i8> @__masked_load_8(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
declare <WIDTH x i16> @__masked_load_16(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
declare <WIDTH x i32> @__masked_load_32(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
declare <WIDTH x i64> @__masked_load_64(i8 * nocapture, <WIDTH x i1> %mask) nounwind readonly
load_and_broadcast(i8)
load_and_broadcast(i16)
load_and_broadcast(i32)
load_and_broadcast(i64)
declare void @__masked_store_8(<WIDTH x i8>* nocapture, <WIDTH x i8>,
<WIDTH x i1>) nounwind
declare void @__masked_store_16(<WIDTH x i16>* nocapture, <WIDTH x i16>,

View File

@@ -433,15 +433,15 @@ reduce_equal(8)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unaligned loads/loads+broadcasts
load_and_broadcast(8, i8, 8)
load_and_broadcast(8, i16, 16)
load_and_broadcast(8, i32, 32)
load_and_broadcast(8, i64, 64)
masked_load(8, i8, 8, 1)
masked_load(8, i16, 16, 2)
masked_load(8, i32, 32, 4)
masked_load(8, i64, 64, 8)
load_and_broadcast(i8)
load_and_broadcast(i16)
load_and_broadcast(i32)
load_and_broadcast(i64)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather/scatter

View File

@@ -560,10 +560,10 @@ gen_masked_store(4, i64, 64)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unaligned loads/loads+broadcasts
load_and_broadcast(4, i8, 8)
load_and_broadcast(4, i16, 16)
load_and_broadcast(4, i32, 32)
load_and_broadcast(4, i64, 64)
load_and_broadcast(i8)
load_and_broadcast(i16)
load_and_broadcast(i32)
load_and_broadcast(i64)
masked_load(4, i8, 8, 1)
masked_load(4, i16, 16, 2)

View File

@@ -360,15 +360,15 @@ reduce_equal(8)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unaligned loads/loads+broadcasts
load_and_broadcast(8, i8, 8)
load_and_broadcast(8, i16, 16)
load_and_broadcast(8, i32, 32)
load_and_broadcast(8, i64, 64)
masked_load(8, i8, 8, 1)
masked_load(8, i16, 16, 2)
masked_load(8, i32, 32, 4)
masked_load(8, i64, 64, 8)
load_and_broadcast(i8)
load_and_broadcast(i16)
load_and_broadcast(i32)
load_and_broadcast(i64)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; gather/scatter

View File

@@ -459,10 +459,10 @@ gen_masked_store(4, i64, 64)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; unaligned loads/loads+broadcasts
load_and_broadcast(4, i8, 8)
load_and_broadcast(4, i16, 16)
load_and_broadcast(4, i32, 32)
load_and_broadcast(4, i64, 64)
load_and_broadcast(i8)
load_and_broadcast(i16)
load_and_broadcast(i32)
load_and_broadcast(i64)
masked_load(4, i8, 8, 1)
masked_load(4, i16, 16, 2)

View File

@@ -1701,13 +1701,13 @@ define void @__keep_funcs_live(i8 * %ptr, <WIDTH x i8> %v8, <WIDTH x i16> %v16,
%ml64 = call <WIDTH x i64> @__masked_load_64(i8 * %ptr, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %ml64)
%lb8 = call <WIDTH x i8> @__load_and_broadcast_8(i8 * %ptr, <WIDTH x MASK> %mask)
%lb8 = call <WIDTH x i8> @__load_and_broadcast_i8(i8 * %ptr, <WIDTH x MASK> %mask)
call void @__use8(<WIDTH x i8> %lb8)
%lb16 = call <WIDTH x i16> @__load_and_broadcast_16(i8 * %ptr, <WIDTH x MASK> %mask)
%lb16 = call <WIDTH x i16> @__load_and_broadcast_i16(i8 * %ptr, <WIDTH x MASK> %mask)
call void @__use16(<WIDTH x i16> %lb16)
%lb32 = call <WIDTH x i32> @__load_and_broadcast_32(i8 * %ptr, <WIDTH x MASK> %mask)
%lb32 = call <WIDTH x i32> @__load_and_broadcast_i32(i8 * %ptr, <WIDTH x MASK> %mask)
call void @__use32(<WIDTH x i32> %lb32)
%lb64 = call <WIDTH x i64> @__load_and_broadcast_64(i8 * %ptr, <WIDTH x MASK> %mask)
%lb64 = call <WIDTH x i64> @__load_and_broadcast_i64(i8 * %ptr, <WIDTH x MASK> %mask)
call void @__use64(<WIDTH x i64> %lb64)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
@@ -2489,20 +2489,18 @@ i64minmax(WIDTH,max,uint64,ugt)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Emit code to safely load a scalar value and broadcast it across the
;; elements of a vector. Parameters:
;; $1: target vector width
;; $2: element type for which to emit the function (i32, i64, ...)
;; $3: suffix for function name (32, 64, ...)
;; elements of a vector. Parameter:
;; $1: element type for which to emit the function (i32, i64, ...)
define(`load_and_broadcast', `
define <$1 x $2> @__load_and_broadcast_$3(i8 *, <$1 x MASK> %mask) nounwind alwaysinline {
%ptr = bitcast i8 * %0 to $2 *
%val = load $2 * %ptr
define <WIDTH x $1> @__load_and_broadcast_$1(i8 *, <WIDTH x MASK> %mask) nounwind alwaysinline {
%ptr = bitcast i8 * %0 to $1 *
%val = load $1 * %ptr
%ret0 = insertelement <$1 x $2> undef, $2 %val, i32 0
forloop(i, 1, eval($1-1), `
%ret`'i = insertelement <$1 x $2> %ret`'eval(i-1), $2 %val, i32 i')
ret <$1 x $2> %ret`'eval($1-1)
%ret0 = insertelement <WIDTH x $1> undef, $1 %val, i32 0
forloop(i, 1, eval(WIDTH-1), `
%ret`'i = insertelement <WIDTH x $1> %ret`'eval(i-1), $1 %val, i32 i')
ret <WIDTH x $1> %ret`'eval(WIDTH-1)
}
')

20
opt.cpp
View File

@@ -2281,21 +2281,21 @@ GSToLoadStorePass::runOnBasicBlock(llvm::BasicBlock &bb) {
DEBUG_START_PASS("GSToLoadStorePass");
GatherImpInfo gInfo[] = {
GatherImpInfo("__pseudo_gather_base_offsets32_8", "__load_and_broadcast_8",
GatherImpInfo("__pseudo_gather_base_offsets32_8", "__load_and_broadcast_i8",
"__masked_load_8", 1),
GatherImpInfo("__pseudo_gather_base_offsets32_16", "__load_and_broadcast_16",
GatherImpInfo("__pseudo_gather_base_offsets32_16", "__load_and_broadcast_i16",
"__masked_load_16", 2),
GatherImpInfo("__pseudo_gather_base_offsets32_32", "__load_and_broadcast_32",
GatherImpInfo("__pseudo_gather_base_offsets32_32", "__load_and_broadcast_i32",
"__masked_load_32", 4),
GatherImpInfo("__pseudo_gather_base_offsets32_64", "__load_and_broadcast_64",
GatherImpInfo("__pseudo_gather_base_offsets32_64", "__load_and_broadcast_i64",
"__masked_load_64", 8),
GatherImpInfo("__pseudo_gather_base_offsets64_8", "__load_and_broadcast_8",
GatherImpInfo("__pseudo_gather_base_offsets64_8", "__load_and_broadcast_i8",
"__masked_load_8", 1),
GatherImpInfo("__pseudo_gather_base_offsets64_16", "__load_and_broadcast_16",
GatherImpInfo("__pseudo_gather_base_offsets64_16", "__load_and_broadcast_i16",
"__masked_load_16", 2),
GatherImpInfo("__pseudo_gather_base_offsets64_32", "__load_and_broadcast_32",
GatherImpInfo("__pseudo_gather_base_offsets64_32", "__load_and_broadcast_i32",
"__masked_load_32", 4),
GatherImpInfo("__pseudo_gather_base_offsets64_64", "__load_and_broadcast_64",
GatherImpInfo("__pseudo_gather_base_offsets64_64", "__load_and_broadcast_i64",
"__masked_load_64", 8)
};
ScatterImpInfo sInfo[] = {
@@ -3815,14 +3815,14 @@ MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
"__gather_elt32_i32", "__gather_elt32_i64",
"__gather_elt64_i8", "__gather_elt64_i16",
"__gather_elt64_i32", "__gather_elt64_i64",
"__load_and_broadcast_8", "__load_and_broadcast_16",
"__load_and_broadcast_32", "__load_and_broadcast_64",
"__masked_load_8", "__masked_load_16",
"__masked_load_32", "__masked_load_64",
"__masked_store_8", "__masked_store_16",
"__masked_store_32", "__masked_store_64",
"__masked_store_blend_8", "__masked_store_blend_16",
"__masked_store_blend_32", "__masked_store_blend_64",
"__load_and_broadcast_i8", "__load_and_broadcast_i16",
"__load_and_broadcast_i32", "__load_and_broadcast_i64",
"__scatter_base_offsets32_i8", "__scatter_base_offsets32_i16",
"__scatter_base_offsets32_i32", "__scatter_base_offsets32_i64",
"__scatter_base_offsets64_i8", "__scatter_base_offsets64_i16",