Add avg_{up,down}_int{8,16} routines to stdlib

These compute the average of two given values, rounding up and down,
respectively, if the result isn't exact.  When possible, these are
mapped to target-specific intrinsics (PADD[BW] on IA and VH[R]ADD[US]
on NEON.)

A subsequent commit will add pattern-matching to generate calls to
these intrinsincs when the corresponding patterns are detected in the
IR.)
This commit is contained in:
Matt Pharr
2013-08-03 20:44:25 -07:00
parent 4f48d3258a
commit 5b20b06bd9
23 changed files with 592 additions and 15 deletions

View File

@@ -277,3 +277,9 @@ define double @__max_uniform_double(double, double) nounwind readnone alwaysinli
sse_binary_scalar(ret, 2, double, @llvm.x86.sse2.max.sd, %0, %1)
ret double %ret
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16 builtins
define_avgs()

View File

@@ -864,3 +864,9 @@ declare float @__half_to_float_uniform(i16 %v) nounwind readnone
declare <WIDTH x float> @__half_to_float_varying(<WIDTH x i16> %v) nounwind readnone
declare i16 @__float_to_half_uniform(float %v) nounwind readnone
declare <WIDTH x i16> @__float_to_half_varying(<WIDTH x float> %v) nounwind readnone
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16 builtins
define_avgs()

View File

@@ -364,3 +364,8 @@ declare void @__prefetch_read_uniform_2(i8 * nocapture) nounwind
declare void @__prefetch_read_uniform_3(i8 * nocapture) nounwind
declare void @__prefetch_read_uniform_nt(i8 * nocapture) nounwind
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16 builtins
define_avgs()

View File

@@ -456,3 +456,62 @@ define i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone {
define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone {
reduce8(i64, @__max_varying_uint64, @__max_uniform_uint64)
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16
declare <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
define <8 x i8> @__avg_up_uint8(<8 x i8>, <8 x i8>) nounwind readnone {
%r = call <8 x i8> @llvm.arm.neon.vrhaddu.v8i8(<8 x i8> %0, <8 x i8> %1)
ret <8 x i8> %r
}
declare <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
define <8 x i8> @__avg_up_int8(<8 x i8>, <8 x i8>) nounwind readnone {
%r = call <8 x i8> @llvm.arm.neon.vrhadds.v8i8(<8 x i8> %0, <8 x i8> %1)
ret <8 x i8> %r
}
declare <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
define <8 x i8> @__avg_down_uint8(<8 x i8>, <8 x i8>) nounwind readnone {
%r = call <8 x i8> @llvm.arm.neon.vhaddu.v8i8(<8 x i8> %0, <8 x i8> %1)
ret <8 x i8> %r
}
declare <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8>, <8 x i8>) nounwind readnone
define <8 x i8> @__avg_down_int8(<8 x i8>, <8 x i8>) nounwind readnone {
%r = call <8 x i8> @llvm.arm.neon.vhadds.v8i8(<8 x i8> %0, <8 x i8> %1)
ret <8 x i8> %r
}
declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @__avg_up_uint16(<8 x i16>, <8 x i16>) nounwind readnone {
%r = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %0, <8 x i16> %1)
ret <8 x i16> %r
}
declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @__avg_up_int16(<8 x i16>, <8 x i16>) nounwind readnone {
%r = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %0, <8 x i16> %1)
ret <8 x i16> %r
}
declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @__avg_down_uint16(<8 x i16>, <8 x i16>) nounwind readnone {
%r = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %0, <8 x i16> %1)
ret <8 x i16> %r
}
declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @__avg_down_int16(<8 x i16>, <8 x i16>) nounwind readnone {
%r = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %0, <8 x i16> %1)
ret <8 x i16> %r
}

View File

@@ -426,3 +426,62 @@ define i64 @__reduce_min_uint64(<4 x i64>) nounwind readnone {
define i64 @__reduce_max_uint64(<4 x i64>) nounwind readnone {
reduce4(i64, @__max_varying_uint64, @__max_uniform_uint64)
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16
declare <4 x i8> @llvm.arm.neon.vrhaddu.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
define <4 x i8> @__avg_up_uint8(<4 x i8>, <4 x i8>) nounwind readnone {
%r = call <4 x i8> @llvm.arm.neon.vrhaddu.v4i8(<4 x i8> %0, <4 x i8> %1)
ret <4 x i8> %r
}
declare <4 x i8> @llvm.arm.neon.vrhadds.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
define <4 x i8> @__avg_up_int8(<4 x i8>, <4 x i8>) nounwind readnone {
%r = call <4 x i8> @llvm.arm.neon.vrhadds.v4i8(<4 x i8> %0, <4 x i8> %1)
ret <4 x i8> %r
}
declare <4 x i8> @llvm.arm.neon.vhaddu.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
define <4 x i8> @__avg_down_uint8(<4 x i8>, <4 x i8>) nounwind readnone {
%r = call <4 x i8> @llvm.arm.neon.vhaddu.v4i8(<4 x i8> %0, <4 x i8> %1)
ret <4 x i8> %r
}
declare <4 x i8> @llvm.arm.neon.vhadds.v4i8(<4 x i8>, <4 x i8>) nounwind readnone
define <4 x i8> @__avg_down_int8(<4 x i8>, <4 x i8>) nounwind readnone {
%r = call <4 x i8> @llvm.arm.neon.vhadds.v4i8(<4 x i8> %0, <4 x i8> %1)
ret <4 x i8> %r
}
declare <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
define <4 x i16> @__avg_up_uint16(<4 x i16>, <4 x i16>) nounwind readnone {
%r = call <4 x i16> @llvm.arm.neon.vrhaddu.v4i16(<4 x i16> %0, <4 x i16> %1)
ret <4 x i16> %r
}
declare <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
define <4 x i16> @__avg_up_int16(<4 x i16>, <4 x i16>) nounwind readnone {
%r = call <4 x i16> @llvm.arm.neon.vrhadds.v4i16(<4 x i16> %0, <4 x i16> %1)
ret <4 x i16> %r
}
declare <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
define <4 x i16> @__avg_down_uint16(<4 x i16>, <4 x i16>) nounwind readnone {
%r = call <4 x i16> @llvm.arm.neon.vhaddu.v4i16(<4 x i16> %0, <4 x i16> %1)
ret <4 x i16> %r
}
declare <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16>, <4 x i16>) nounwind readnone
define <4 x i16> @__avg_down_int16(<4 x i16>, <4 x i16>) nounwind readnone {
%r = call <4 x i16> @llvm.arm.neon.vhadds.v4i16(<4 x i16> %0, <4 x i16> %1)
ret <4 x i16> %r
}

View File

@@ -506,3 +506,78 @@ define i64 @__reduce_min_uint64(<WIDTH x i64>) nounwind readnone {
define i64 @__reduce_max_uint64(<WIDTH x i64>) nounwind readnone {
reduce16(i64, @__max_varying_uint64, @__max_uniform_uint64)
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16 builtins
declare <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
define <16 x i8> @__avg_up_uint8(<16 x i8>, <16 x i8>) nounwind readnone {
%r = call <16 x i8> @llvm.arm.neon.vrhaddu.v16i8(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %r
}
declare <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
define <16 x i8> @__avg_up_int8(<16 x i8>, <16 x i8>) nounwind readnone {
%r = call <16 x i8> @llvm.arm.neon.vrhadds.v16i8(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %r
}
declare <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
define <16 x i8> @__avg_down_uint8(<16 x i8>, <16 x i8>) nounwind readnone {
%r = call <16 x i8> @llvm.arm.neon.vhaddu.v16i8(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %r
}
declare <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8>, <16 x i8>) nounwind readnone
define <16 x i8> @__avg_down_int8(<16 x i8>, <16 x i8>) nounwind readnone {
%r = call <16 x i8> @llvm.arm.neon.vhadds.v16i8(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %r
}
declare <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i16> @__avg_up_uint16(<16 x i16>, <16 x i16>) nounwind readnone {
v16tov8(i16, %0, %a0, %b0)
v16tov8(i16, %1, %a1, %b1)
%r0 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %a0, <8 x i16> %a1)
%r1 = call <8 x i16> @llvm.arm.neon.vrhaddu.v8i16(<8 x i16> %b0, <8 x i16> %b1)
v8tov16(i16, %r0, %r1, %r)
ret <16 x i16> %r
}
declare <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i16> @__avg_up_int16(<16 x i16>, <16 x i16>) nounwind readnone {
v16tov8(i16, %0, %a0, %b0)
v16tov8(i16, %1, %a1, %b1)
%r0 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %a0, <8 x i16> %a1)
%r1 = call <8 x i16> @llvm.arm.neon.vrhadds.v8i16(<8 x i16> %b0, <8 x i16> %b1)
v8tov16(i16, %r0, %r1, %r)
ret <16 x i16> %r
}
declare <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i16> @__avg_down_uint16(<16 x i16>, <16 x i16>) nounwind readnone {
v16tov8(i16, %0, %a0, %b0)
v16tov8(i16, %1, %a1, %b1)
%r0 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %a0, <8 x i16> %a1)
%r1 = call <8 x i16> @llvm.arm.neon.vhaddu.v8i16(<8 x i16> %b0, <8 x i16> %b1)
v8tov16(i16, %r0, %r1, %r)
ret <16 x i16> %r
}
declare <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i16> @__avg_down_int16(<16 x i16>, <16 x i16>) nounwind readnone {
v16tov8(i16, %0, %a0, %b0)
v16tov8(i16, %1, %a1, %b1)
%r0 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %a0, <8 x i16> %a1)
%r1 = call <8 x i16> @llvm.arm.neon.vhadds.v8i16(<8 x i16> %b0, <8 x i16> %b1)
v8tov16(i16, %r0, %r1, %r)
ret <16 x i16> %r
}

View File

@@ -269,4 +269,8 @@ define i64 @__popcnt_int64(i64) nounwind readnone alwaysinline {
ret i64 %val
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16 builtins
define_avgs()

View File

@@ -449,3 +449,34 @@ gen_scatter(i32)
gen_scatter(float)
gen_scatter(i64)
gen_scatter(double)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16 builtins
declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
define <8 x i8> @__avg_up_uint8(<8 x i8>, <8 x i8>) {
%v0 = shufflevector <8 x i8> %0, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%v1 = shufflevector <8 x i8> %1, <8 x i8> undef,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 undef, i32 undef, i32 undef, i32 undef,
i32 undef, i32 undef, i32 undef, i32 undef>
%r16 = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %v0, <16 x i8> %v1)
%r = shufflevector <16 x i8> %r16, <16 x i8> undef,
<8 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7>
ret <8 x i8> %r
}
declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
define <8 x i16> @__avg_up_uint16(<8 x i16>, <8 x i16>) {
%r = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %0, <8 x i16> %1)
ret <8 x i16> %r
}
define_avg_up_int8()
define_avg_up_int16()
define_down_avgs()

View File

@@ -456,3 +456,28 @@ gen_scatter(i32)
gen_scatter(float)
gen_scatter(i64)
gen_scatter(double)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16 builtins
declare <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8>, <16 x i8>) nounwind readnone
define <16 x i8> @__avg_up_uint8(<16 x i8>, <16 x i8>) nounwind readnone {
%r = call <16 x i8> @llvm.x86.sse2.pavg.b(<16 x i8> %0, <16 x i8> %1)
ret <16 x i8> %r
}
declare <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16>, <8 x i16>) nounwind readnone
define <16 x i16> @__avg_up_uint16(<16 x i16>, <16 x i16>) nounwind readnone {
v16tov8(i16, %0, %a0, %b0)
v16tov8(i16, %1, %a1, %b1)
%r0 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %a0, <8 x i16> %a1)
%r1 = call <8 x i16> @llvm.x86.sse2.pavg.w(<8 x i16> %b0, <8 x i16> %b1)
v8tov16(i16, %r0, %r1, %r)
ret <16 x i16> %r
}
define_avg_up_int8()
define_avg_up_int16()
define_down_avgs()

View File

@@ -573,3 +573,9 @@ define <8 x double> @__max_varying_double(<8 x double>, <8 x double>) nounwind r
binary2to8(ret, double, @llvm.x86.sse2.max.pd, %0, %1)
ret <8 x double> %ret
}
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16 builtins
define_avgs()

View File

@@ -473,3 +473,9 @@ gen_scatter(i32)
gen_scatter(float)
gen_scatter(i64)
gen_scatter(double)
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16 builtins
define_avgs()

View File

@@ -49,9 +49,9 @@ define(`MASK_HIGH_BIT_ON',
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; vector assembly and deconstruction utilities
;; vector deconstruction utilities
;; split 8-wide vector into 2 4-wide vectors
;;
;;
;; $1: vector element type
;; $2: 8-wide vector
;; $3: first 4-wide vector
@@ -71,10 +71,6 @@ define(`v16tov8', `
<8 x i32> <i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
')
;; 4-wide into 2 2-wide
;; args as above
;;
define(`v4tov2', `
$3 = shufflevector <4 x $1> $2, <4 x $1> undef, <2 x i32> <i32 0, i32 1>
$4 = shufflevector <4 x $1> $2, <4 x $1> undef, <2 x i32> <i32 2, i32 3>
@@ -96,6 +92,20 @@ define(`v16tov4', `
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; vector assembly: wider vector from two narrower vectors
;;
;; $1: vector element type
;; $2: first n-wide vector
;; $3: second n-wide vector
;; $4: result 2*n-wide vector
define(`v8tov16', `
$4 = shufflevector <8 x $1> $2, <8 x $1> $3,
<16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7,
i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; Helper macro for calling various SSE instructions for scalar values
;; but where the instruction takes a vector parameter.
;; $1 : name of variable to put the final value in
@@ -4276,3 +4286,109 @@ define i1 @__rdrand_i64(i64 * %ptr) {
ret i1 %good
}
')
;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;;
;; int8/int16 builtins
define(`define_avg_up_uint8', `
define <WIDTH x i8> @__avg_up_uint8(<WIDTH x i8>, <WIDTH x i8>) {
%a16 = zext <WIDTH x i8> %0 to <WIDTH x i16>
%b16 = zext <WIDTH x i8> %1 to <WIDTH x i16>
%sum1 = add <WIDTH x i16> %a16, %b16
%sum = add <WIDTH x i16> %sum1, < forloop(i, 1, eval(WIDTH-1), `i16 1, ') i16 1 >
%avg = lshr <WIDTH x i16> %sum, < forloop(i, 1, eval(WIDTH-1), `i16 1, ') i16 1 >
%r = trunc <WIDTH x i16> %avg to <WIDTH x i8>
ret <WIDTH x i8> %r
}')
define(`define_avg_up_int8', `
define <WIDTH x i8> @__avg_up_int8(<WIDTH x i8>, <WIDTH x i8>) {
%a16 = sext <WIDTH x i8> %0 to <WIDTH x i16>
%b16 = sext <WIDTH x i8> %1 to <WIDTH x i16>
%sum1 = add <WIDTH x i16> %a16, %b16
%sum = add <WIDTH x i16> %sum1, < forloop(i, 1, eval(WIDTH-1), `i16 1, ') i16 1 >
%avg = sdiv <WIDTH x i16> %sum, < forloop(i, 1, eval(WIDTH-1), `i16 2, ') i16 2 >
%r = trunc <WIDTH x i16> %avg to <WIDTH x i8>
ret <WIDTH x i8> %r
}')
define(`define_avg_up_uint16', `
define <WIDTH x i16> @__avg_up_uint16(<WIDTH x i16>, <WIDTH x i16>) {
%a32 = zext <WIDTH x i16> %0 to <WIDTH x i32>
%b32 = zext <WIDTH x i16> %1 to <WIDTH x i32>
%sum1 = add <WIDTH x i32> %a32, %b32
%sum = add <WIDTH x i32> %sum1, < forloop(i, 1, eval(WIDTH-1), `i32 1, ') i32 1 >
%avg = lshr <WIDTH x i32> %sum, < forloop(i, 1, eval(WIDTH-1), `i32 1, ') i32 1 >
%r = trunc <WIDTH x i32> %avg to <WIDTH x i16>
ret <WIDTH x i16> %r
}')
define(`define_avg_up_int16', `
define <WIDTH x i16> @__avg_up_int16(<WIDTH x i16>, <WIDTH x i16>) {
%a32 = sext <WIDTH x i16> %0 to <WIDTH x i32>
%b32 = sext <WIDTH x i16> %1 to <WIDTH x i32>
%sum1 = add <WIDTH x i32> %a32, %b32
%sum = add <WIDTH x i32> %sum1, < forloop(i, 1, eval(WIDTH-1), `i32 1, ') i32 1 >
%avg = sdiv <WIDTH x i32> %sum, < forloop(i, 1, eval(WIDTH-1), `i32 2, ') i32 2 >
%r = trunc <WIDTH x i32> %avg to <WIDTH x i16>
ret <WIDTH x i16> %r
}')
define(`define_avg_down_uint8', `
define <WIDTH x i8> @__avg_down_uint8(<WIDTH x i8>, <WIDTH x i8>) {
%a16 = zext <WIDTH x i8> %0 to <WIDTH x i16>
%b16 = zext <WIDTH x i8> %1 to <WIDTH x i16>
%sum = add <WIDTH x i16> %a16, %b16
%avg = lshr <WIDTH x i16> %sum, < forloop(i, 1, eval(WIDTH-1), `i16 1, ') i16 1 >
%r = trunc <WIDTH x i16> %avg to <WIDTH x i8>
ret <WIDTH x i8> %r
}')
define(`define_avg_down_int8', `
define <WIDTH x i8> @__avg_down_int8(<WIDTH x i8>, <WIDTH x i8>) {
%a16 = sext <WIDTH x i8> %0 to <WIDTH x i16>
%b16 = sext <WIDTH x i8> %1 to <WIDTH x i16>
%sum = add <WIDTH x i16> %a16, %b16
%avg = sdiv <WIDTH x i16> %sum, < forloop(i, 1, eval(WIDTH-1), `i16 2, ') i16 2 >
%r = trunc <WIDTH x i16> %avg to <WIDTH x i8>
ret <WIDTH x i8> %r
}')
define(`define_avg_down_uint16', `
define <WIDTH x i16> @__avg_down_uint16(<WIDTH x i16>, <WIDTH x i16>) {
%a32 = zext <WIDTH x i16> %0 to <WIDTH x i32>
%b32 = zext <WIDTH x i16> %1 to <WIDTH x i32>
%sum = add <WIDTH x i32> %a32, %b32
%avg = lshr <WIDTH x i32> %sum, < forloop(i, 1, eval(WIDTH-1), `i32 1, ') i32 1 >
%r = trunc <WIDTH x i32> %avg to <WIDTH x i16>
ret <WIDTH x i16> %r
}')
define(`define_avg_down_int16', `
define <WIDTH x i16> @__avg_down_int16(<WIDTH x i16>, <WIDTH x i16>) {
%a32 = sext <WIDTH x i16> %0 to <WIDTH x i32>
%b32 = sext <WIDTH x i16> %1 to <WIDTH x i32>
%sum = add <WIDTH x i32> %a32, %b32
%avg = sdiv <WIDTH x i32> %sum, < forloop(i, 1, eval(WIDTH-1), `i32 2, ') i32 2 >
%r = trunc <WIDTH x i32> %avg to <WIDTH x i16>
ret <WIDTH x i16> %r
}')
define(`define_up_avgs', `
define_avg_up_uint8()
define_avg_up_int8()
define_avg_up_uint16()
define_avg_up_int16()
')
define(`define_down_avgs', `
define_avg_down_uint8()
define_avg_down_int8()
define_avg_down_uint16()
define_avg_down_int16()
')
define(`define_avgs', `
define_up_avgs()
define_down_avgs()
')

View File

@@ -3399,6 +3399,31 @@ The ``isnan()`` functions test whether the given value is a floating-point
uniform bool isnan(uniform double v)
A number of functions are also available for performing operations on 8- and
16-bit quantities; these map to specialized instructions that perform these
operations on targets that support them. ``avg_up()`` computes the average
of the two values, rounding up if their average is halfway between two
integers (i.e., it computes ``(a+b+1)/2``).
::
int8 avg_up(int8 a, int8 b)
unsigned int8 avg_up(unsigned int8 a, unsigned int8 b)
int16 avg_up(int16 a, int16 b)
unsigned int16 avg_up(unsigned int16 a, unsigned int16 b)
``avg_down()`` computes the average of the two values, rounding down (i.e.,
it computes ``(a+b)/2``).
::
int8 avg_down(int8 a, int8 b)
unsigned int8 avg_down(unsigned int8 a, unsigned int8 b)
int16 avg_down(int16 a, int16 b)
unsigned int16 avg_down(unsigned int16 a, unsigned int16 b)
Transcendental Functions
------------------------

View File

@@ -4343,6 +4343,14 @@ char MakeInternalFuncsStaticPass::ID = 0;
bool
MakeInternalFuncsStaticPass::runOnModule(llvm::Module &module) {
const char *names[] = {
"__avg_up_uint8",
"__avg_up_int8",
"__avg_up_uint16",
"__avg_up_int16",
"__avg_down_uint8",
"__avg_down_int8",
"__avg_down_uint16",
"__avg_down_int16",
"__fast_masked_vload",
"__gather_factored_base_offsets32_i8", "__gather_factored_base_offsets32_i16",
"__gather_factored_base_offsets32_i32", "__gather_factored_base_offsets32_i64",

View File

@@ -4812,8 +4812,8 @@ static const uniform int64 __idiv_table_s32[][3] = {
};
__declspec(safe)
static unmasked unsigned int8 __fast_idiv(unsigned int8 numerator,
uniform unsigned int8 divisor) {
static unmasked inline unsigned int8
__fast_idiv(unsigned int8 numerator, uniform unsigned int8 divisor) {
uniform int64 method = __idiv_table_u8[divisor-2][0];
uniform int64 multiplier = __idiv_table_u8[divisor-2][1];
uniform int64 shift = __idiv_table_u8[divisor-2][2];
@@ -4833,7 +4833,7 @@ static unmasked unsigned int8 __fast_idiv(unsigned int8 numerator,
}
__declspec(safe)
static unmasked int8 __fast_idiv(int8 numerator, uniform int8 divisor) {
static unmasked inline int8 __fast_idiv(int8 numerator, uniform int8 divisor) {
uniform int8 method = __idiv_table_s8[divisor-2][0];
uniform int16 multiplier = __idiv_table_s8[divisor-2][1];
uniform int8 shift = __idiv_table_s8[divisor-2][2];
@@ -4850,8 +4850,8 @@ static unmasked int8 __fast_idiv(int8 numerator, uniform int8 divisor) {
}
__declspec(safe)
static unmasked unsigned int16 __fast_idiv(unsigned int16 numerator,
uniform unsigned int16 divisor) {
static unmasked inline unsigned int16 __fast_idiv(unsigned int16 numerator,
uniform unsigned int16 divisor) {
uniform int64 method = __idiv_table_u16[divisor-2][0];
uniform int64 multiplier = __idiv_table_u16[divisor-2][1];
uniform int64 shift = __idiv_table_u16[divisor-2][2];
@@ -4871,7 +4871,7 @@ static unmasked unsigned int16 __fast_idiv(unsigned int16 numerator,
}
__declspec(safe)
static unmasked int16 __fast_idiv(int16 numerator, uniform int16 divisor) {
static unmasked inline int16 __fast_idiv(int16 numerator, uniform int16 divisor) {
uniform int64 method = __idiv_table_s16[divisor-2][0];
uniform int64 multiplier = __idiv_table_s16[divisor-2][1];
uniform int64 shift = __idiv_table_s16[divisor-2][2];
@@ -4889,8 +4889,8 @@ static unmasked int16 __fast_idiv(int16 numerator, uniform int16 divisor) {
}
__declspec(safe)
static unmasked inline unsigned int32 __fast_idiv(unsigned int32 numerator,
uniform unsigned int32 divisor) {
static unmasked inline inline unsigned int32 __fast_idiv(unsigned int32 numerator,
uniform unsigned int32 divisor) {
uniform int64 method = __idiv_table_u32[divisor-2][0];
uniform int64 multiplier = __idiv_table_u32[divisor-2][1];
uniform int64 shift = __idiv_table_u32[divisor-2][2];
@@ -4910,7 +4910,7 @@ static unmasked inline unsigned int32 __fast_idiv(unsigned int32 numerator,
}
__declspec(safe)
static unmasked int32 __fast_idiv(int32 numerator, uniform int32 divisor) {
static unmasked inline int32 __fast_idiv(int32 numerator, uniform int32 divisor) {
uniform int64 method = __idiv_table_s32[divisor-2][0];
uniform int64 multiplier = __idiv_table_s32[divisor-2][1];
uniform int64 shift = __idiv_table_s32[divisor-2][2];
@@ -4927,3 +4927,45 @@ static unmasked int32 __fast_idiv(int32 numerator, uniform int32 divisor) {
}
}
///////////////////////////////////////////////////////////////////////////
// Saturating int8/int16 ops
__declspec(safe)
static unmasked inline unsigned int8 avg_up(unsigned int8 a, unsigned int8 b) {
return __avg_up_uint8(a, b);
}
__declspec(safe)
static unmasked inline int8 avg_up(int8 a, int8 b) {
return __avg_up_int8(a, b);
}
__declspec(safe)
static unmasked inline unsigned int16 avg_up(unsigned int16 a, unsigned int16 b) {
return __avg_up_uint16(a, b);
}
__declspec(safe)
static unmasked inline int16 avg_up(int16 a, int16 b) {
return __avg_up_int16(a, b);
}
__declspec(safe)
static unmasked inline unsigned int8 avg_down(unsigned int8 a, unsigned int8 b) {
return __avg_down_uint8(a, b);
}
__declspec(safe)
static unmasked inline int8 avg_down(int8 a, int8 b) {
return __avg_down_int8(a, b);
}
__declspec(safe)
static unmasked inline unsigned int16 avg_down(unsigned int16 a, unsigned int16 b) {
return __avg_down_uint16(a, b);
}
__declspec(safe)
static unmasked inline int16 avg_down(int16 a, int16 b) {
return __avg_down_int16(a, b);
}

13
tests/avg-down-int16.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bf) {
int16 a = aFOO[programIndex];
int16 b = bf;
RET[programIndex] = avg_down(a, b);
}
export void result(uniform float RET[]) {
RET[programIndex] = ((int)programIndex + 1 + 5) / 2;
}

13
tests/avg-down-int8.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bf) {
int8 a = aFOO[programIndex];
int8 b = bf;
RET[programIndex] = avg_down(a, b);
}
export void result(uniform float RET[]) {
RET[programIndex] = ((int)programIndex + 1 + 5) / 2;
}

View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bf) {
unsigned int16 a = aFOO[programIndex];
unsigned int16 b = bf;
RET[programIndex] = avg_down(a, b);
}
export void result(uniform float RET[]) {
RET[programIndex] = ((int)programIndex + 1 + 5) / 2;
}

13
tests/avg-down-uint8.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bf) {
unsigned int8 a = aFOO[programIndex];
unsigned int8 b = bf;
RET[programIndex] = avg_down(a, b);
}
export void result(uniform float RET[]) {
RET[programIndex] = ((int)programIndex + 1 + 5) / 2;
}

13
tests/avg-up-int16.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bf) {
int16 a = aFOO[programIndex];
int16 b = bf;
RET[programIndex] = avg_up(a, b);
}
export void result(uniform float RET[]) {
RET[programIndex] = ((int)programIndex + 1 + 5 + 1) / 2;
}

13
tests/avg-up-int8.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bf) {
int8 a = aFOO[programIndex];
int8 b = bf;
RET[programIndex] = avg_up(a, b);
}
export void result(uniform float RET[]) {
RET[programIndex] = ((int)programIndex + 1 + 5 + 1) / 2;
}

13
tests/avg-up-uint16.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bf) {
unsigned int16 a = aFOO[programIndex];
unsigned int16 b = bf;
RET[programIndex] = avg_up(a, b);
}
export void result(uniform float RET[]) {
RET[programIndex] = ((int)programIndex + 1 + 5 + 1) / 2;
}

13
tests/avg-up-uint8.ispc Normal file
View File

@@ -0,0 +1,13 @@
export uniform int width() { return programCount; }
export void f_fu(uniform float RET[], uniform float aFOO[], uniform float bf) {
unsigned int8 a = aFOO[programIndex];
unsigned int8 b = bf;
RET[programIndex] = avg_up(a, b);
}
export void result(uniform float RET[]) {
RET[programIndex] = ((int)programIndex + 1 + 5 + 1) / 2;
}