diff --git a/builtins/dispatch.ll b/builtins/dispatch.ll index 85825a58..799fb3eb 100644 --- a/builtins/dispatch.ll +++ b/builtins/dispatch.ll @@ -110,8 +110,8 @@ define(`PTR_OP_ARGS', ifelse(LLVM_VERSION, LLVM_3_7, - ``$1 , $1 * $2'', - ``$1 * $2'' + ``$1 , $1 *'', + ``$1 *'' ) ) @@ -173,7 +173,7 @@ declare void @abort() noreturn nounwind define void @__set_system_isa() { entry: - %bi = load PTR_OP_ARGS(`i32 ',` @__system_best_isa') + %bi = load PTR_OP_ARGS(`i32 ') @__system_best_isa %unset = icmp eq i32 %bi, -1 br i1 %unset, label %set_system_isa, label %done diff --git a/builtins/svml.m4 b/builtins/svml.m4 index 5462ad07..0a587577 100644 --- a/builtins/svml.m4 +++ b/builtins/svml.m4 @@ -159,8 +159,8 @@ define(`svml_define',` ;; i32 4, i32 5, i32 6, i32 7> ;; store <8 x float> %sin, <8 x float> * %1 ;; -;; %cosa = load PTR_OP_ARGS(`<4 x float> ',` %cospa') -;; %cosb = load PTR_OP_ARGS(`<4 x float> ',` %cospb') +;; %cosa = load <4 x float> * %cospa +;; %cosb = load <4 x float> * %cospb ;; %cos = shufflevector <4 x float> %cosa, <4 x float> %cosb, ;; <8 x i32> diff --git a/builtins/target-avx-common.ll b/builtins/target-avx-common.ll index 781367ca..91638457 100644 --- a/builtins/target-avx-common.ll +++ b/builtins/target-avx-common.ll @@ -191,7 +191,7 @@ define void @__fastmath() nounwind alwaysinline { %ptr = alloca i32 %ptr8 = bitcast i32 * %ptr to i8 * call void @llvm.x86.sse.stmxcsr(i8 * %ptr8) - %oldval = load PTR_OP_ARGS(`i32 ',`%ptr') + %oldval = load PTR_OP_ARGS(`i32 ') %ptr ; turn on DAZ (64)/FTZ (32768) -> 32832 %update = or i32 %oldval, 32832 diff --git a/builtins/target-avx-x2.ll b/builtins/target-avx-x2.ll index 0ada0592..ba750bb0 100644 --- a/builtins/target-avx-x2.ll +++ b/builtins/target-avx-x2.ll @@ -427,7 +427,7 @@ define <16 x i32> @__masked_load_i32(i8 *, <16 x i32> %mask) nounwind alwaysinli %val0 = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8 * %0, <8 x float> %mask0) %mask1 = shufflevector <16 x float> %floatmask, <16 x float> undef, <8 x i32> - %ptr1 = getelementptr PTR_OP_ARGS(`i8', `%0, i32 32 ') ;; 8x4 bytes = 32 + %ptr1 = getelementptr PTR_OP_ARGS(`i8') %0, i32 32 ;; 8x4 bytes = 32 %val1 = call <8 x float> @llvm.x86.avx.maskload.ps.256(i8 * %ptr1, <8 x float> %mask1) %retval = shufflevector <8 x float> %val0, <8 x float> %val1, @@ -454,11 +454,11 @@ define <16 x i64> @__masked_load_i64(i8 *, <16 x i32> %mask) nounwind alwaysinli %mask3d = bitcast <8 x i32> %mask3 to <4 x double> %val0d = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8 * %0, <4 x double> %mask0d) - %ptr1 = getelementptr PTR_OP_ARGS(`i8', `%0, i32 32') + %ptr1 = getelementptr PTR_OP_ARGS(`i8') %0, i32 32 %val1d = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8 * %ptr1, <4 x double> %mask1d) - %ptr2 = getelementptr PTR_OP_ARGS(`i8', `%0, i32 64') + %ptr2 = getelementptr PTR_OP_ARGS(`i8') %0, i32 64 %val2d = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8 * %ptr2, <4 x double> %mask2d) - %ptr3 = getelementptr PTR_OP_ARGS(`i8', `%0, i32 96') + %ptr3 = getelementptr PTR_OP_ARGS(`i8') %0, i32 96 %val3d = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8 * %ptr3, <4 x double> %mask3d) %val01 = shufflevector <4 x double> %val0d, <4 x double> %val1d, @@ -504,7 +504,7 @@ define void @__masked_store_i32(<16 x i32>* nocapture, <16 x i32>, <8 x i32> call void @llvm.x86.avx.maskstore.ps.256(i8 * %ptr, <8 x float> %mask0, <8 x float> %val0) - %ptr1 = getelementptr PTR_OP_ARGS(`i8', `%ptr, i32 32') + %ptr1 = getelementptr PTR_OP_ARGS(`i8') %ptr, i32 32 call void @llvm.x86.avx.maskstore.ps.256(i8 * %ptr1, <8 x float> %mask1, <8 x float> %val1) ret void @@ -539,11 +539,11 @@ define void @__masked_store_i64(<16 x i64>* nocapture, <16 x i64>, <4 x i32> call void @llvm.x86.avx.maskstore.pd.256(i8 * %ptr, <4 x double> %mask0d, <4 x double> %val0) - %ptr1 = getelementptr PTR_OP_ARGS(`i8', `%ptr, i32 32') + %ptr1 = getelementptr PTR_OP_ARGS(`i8') %ptr, i32 32 call void @llvm.x86.avx.maskstore.pd.256(i8 * %ptr1, <4 x double> %mask1d, <4 x double> %val1) - %ptr2 = getelementptr PTR_OP_ARGS(`i8', `%ptr, i32 64') + %ptr2 = getelementptr PTR_OP_ARGS(`i8') %ptr, i32 64 call void @llvm.x86.avx.maskstore.pd.256(i8 * %ptr2, <4 x double> %mask2d, <4 x double> %val2) - %ptr3 = getelementptr PTR_OP_ARGS(`i8', `%ptr, i32 96') + %ptr3 = getelementptr PTR_OP_ARGS(`i8') %ptr, i32 96 call void @llvm.x86.avx.maskstore.pd.256(i8 * %ptr3, <4 x double> %mask3d, <4 x double> %val3) ret void @@ -559,7 +559,7 @@ declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, define void @__masked_store_blend_i32(<16 x i32>* nocapture, <16 x i32>, <16 x i32>) nounwind alwaysinline { %maskAsFloat = bitcast <16 x i32> %2 to <16 x float> - %oldValue = load PTR_OP_ARGS(`<16 x i32>',` %0, align 4') + %oldValue = load PTR_OP_ARGS(`<16 x i32>') %0, align 4 %oldAsFloat = bitcast <16 x i32> %oldValue to <16 x float> %newAsFloat = bitcast <16 x i32> %1 to <16 x float> @@ -596,7 +596,7 @@ declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, define void @__masked_store_blend_i64(<16 x i64>* nocapture %ptr, <16 x i64> %newi64, <16 x i32> %mask) nounwind alwaysinline { - %oldValue = load PTR_OP_ARGS(`<16 x i64>',` %ptr, align 8') + %oldValue = load PTR_OP_ARGS(`<16 x i64>') %ptr, align 8 %old = bitcast <16 x i64> %oldValue to <16 x double> %old0d = shufflevector <16 x double> %old, <16 x double> undef, <4 x i32> diff --git a/builtins/target-avx.ll b/builtins/target-avx.ll index 5194b8b7..af5b54b2 100644 --- a/builtins/target-avx.ll +++ b/builtins/target-avx.ll @@ -420,7 +420,7 @@ define <8 x i64> @__masked_load_i64(i8 *, <8 x i32> %mask) nounwind alwaysinline %mask1d = bitcast <8 x i32> %mask1 to <4 x double> %val0d = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8 * %0, <4 x double> %mask0d) - %ptr1 = getelementptr PTR_OP_ARGS(`i8', `%0, i32 32') + %ptr1 = getelementptr PTR_OP_ARGS(`i8') %0, i32 32 %val1d = call <4 x double> @llvm.x86.avx.maskload.pd.256(i8 * %ptr1, <4 x double> %mask1d) %vald = shufflevector <4 x double> %val0d, <4 x double> %val1d, @@ -469,7 +469,7 @@ define void @__masked_store_i64(<8 x i64>* nocapture, <8 x i64>, <4 x i32> call void @llvm.x86.avx.maskstore.pd.256(i8 * %ptr, <4 x double> %mask0d, <4 x double> %val0) - %ptr1 = getelementptr PTR_OP_ARGS(`i8', `%ptr, i32 32') + %ptr1 = getelementptr PTR_OP_ARGS(`i8') %ptr, i32 32 call void @llvm.x86.avx.maskstore.pd.256(i8 * %ptr1, <4 x double> %mask1d, <4 x double> %val1) ret void } @@ -487,7 +487,7 @@ declare <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float>, <8 x float>, define void @__masked_store_blend_i32(<8 x i32>* nocapture, <8 x i32>, <8 x i32>) nounwind alwaysinline { %mask_as_float = bitcast <8 x i32> %2 to <8 x float> - %oldValue = load PTR_OP_ARGS(`<8 x i32>',` %0, align 4') + %oldValue = load PTR_OP_ARGS(`<8 x i32>') %0, align 4 %oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float> %newAsFloat = bitcast <8 x i32> %1 to <8 x float> %blend = call <8 x float> @llvm.x86.avx.blendv.ps.256(<8 x float> %oldAsFloat, @@ -501,7 +501,7 @@ define void @__masked_store_blend_i32(<8 x i32>* nocapture, <8 x i32>, define void @__masked_store_blend_i64(<8 x i64>* nocapture %ptr, <8 x i64> %new, <8 x i32> %i32mask) nounwind alwaysinline { - %oldValue = load PTR_OP_ARGS(`<8 x i64>',` %ptr, align 8') + %oldValue = load PTR_OP_ARGS(`<8 x i64>') %ptr, align 8 %mask = bitcast <8 x i32> %i32mask to <8 x float> ; Do 4x64-bit blends by doing two <8 x i32> blends, where the <8 x i32> values diff --git a/builtins/target-avx1-i64x4base.ll b/builtins/target-avx1-i64x4base.ll index 2b4d1d30..75fd4e00 100644 --- a/builtins/target-avx1-i64x4base.ll +++ b/builtins/target-avx1-i64x4base.ll @@ -453,7 +453,7 @@ define void @__masked_store_blend_i32(<4 x i32>* nocapture, <4 x i32>, <4 x i64>) nounwind alwaysinline { %mask = trunc <4 x i64> %2 to <4 x i32> %mask_as_float = bitcast <4 x i32> %mask to <4 x float> - %oldValue = load PTR_OP_ARGS(` <4 x i32>',` %0, align 4') + %oldValue = load PTR_OP_ARGS(` <4 x i32>') %0, align 4 %oldAsFloat = bitcast <4 x i32> %oldValue to <4 x float> %newAsFloat = bitcast <4 x i32> %1 to <4 x float> %blend = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %oldAsFloat, @@ -471,7 +471,7 @@ declare <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double>, <4 x double>, define void @__masked_store_blend_i64(<4 x i64>* nocapture , <4 x i64>, <4 x i64>) nounwind alwaysinline { %mask_as_double = bitcast <4 x i64> %2 to <4 x double> - %oldValue = load PTR_OP_ARGS(` <4 x i64>',` %0, align 4') + %oldValue = load PTR_OP_ARGS(` <4 x i64>') %0, align 4 %oldAsDouble = bitcast <4 x i64> %oldValue to <4 x double> %newAsDouble = bitcast <4 x i64> %1 to <4 x double> %blend = call <4 x double> @llvm.x86.avx.blendv.pd.256(<4 x double> %oldAsDouble, diff --git a/builtins/target-generic-1.ll b/builtins/target-generic-1.ll index 39f70412..6618347b 100644 --- a/builtins/target-generic-1.ll +++ b/builtins/target-generic-1.ll @@ -194,7 +194,7 @@ define <1 x float> @__vselect_float(<1 x float>, <1 x float>, define void @__masked_store_blend_i8(<1 x i8>* nocapture, <1 x i8>, <1 x i32> %mask) nounwind alwaysinline { - %val = load PTR_OP_ARGS(`<1 x i8> ',` %0, align 4') + %val = load PTR_OP_ARGS(`<1 x i8> ') %0, align 4 %newval = call <1 x i8> @__vselect_i8(<1 x i8> %val, <1 x i8> %1, <1 x i32> %mask) store <1 x i8> %newval, <1 x i8> * %0, align 4 ret void @@ -202,7 +202,7 @@ define void @__masked_store_blend_i8(<1 x i8>* nocapture, <1 x i8>, define void @__masked_store_blend_i16(<1 x i16>* nocapture, <1 x i16>, <1 x i32> %mask) nounwind alwaysinline { - %val = load PTR_OP_ARGS(`<1 x i16> ',` %0, align 4') + %val = load PTR_OP_ARGS(`<1 x i16> ') %0, align 4 %newval = call <1 x i16> @__vselect_i16(<1 x i16> %val, <1 x i16> %1, <1 x i32> %mask) store <1 x i16> %newval, <1 x i16> * %0, align 4 ret void @@ -210,7 +210,7 @@ define void @__masked_store_blend_i16(<1 x i16>* nocapture, <1 x i16>, define void @__masked_store_blend_i32(<1 x i32>* nocapture, <1 x i32>, <1 x i32> %mask) nounwind alwaysinline { - %val = load PTR_OP_ARGS(`<1 x i32> ',` %0, align 4') + %val = load PTR_OP_ARGS(`<1 x i32> ') %0, align 4 %newval = call <1 x i32> @__vselect_i32(<1 x i32> %val, <1 x i32> %1, <1 x i32> %mask) store <1 x i32> %newval, <1 x i32> * %0, align 4 ret void @@ -218,7 +218,7 @@ define void @__masked_store_blend_i32(<1 x i32>* nocapture, <1 x i32>, define void @__masked_store_blend_i64(<1 x i64>* nocapture, <1 x i64>, <1 x i32> %mask) nounwind alwaysinline { - %val = load PTR_OP_ARGS(`<1 x i64> ',` %0, align 4') + %val = load PTR_OP_ARGS(`<1 x i64> ') %0, align 4 %newval = call <1 x i64> @__vselect_i64(<1 x i64> %val, <1 x i64> %1, <1 x i32> %mask) store <1 x i64> %newval, <1 x i64> * %0, align 4 ret void diff --git a/builtins/target-generic-common.ll b/builtins/target-generic-common.ll index 1dfcdcc1..e0db9168 100644 --- a/builtins/target-generic-common.ll +++ b/builtins/target-generic-common.ll @@ -278,7 +278,7 @@ declare void @__masked_store_double(* nocapture, * nocapture, , ) nounwind alwaysinline { - %v = load PTR_OP_ARGS(` ',` %0') + %v = load PTR_OP_ARGS(` ') %0 %v1 = select %2, %1, %v store %v1, * %0 ret void @@ -286,7 +286,7 @@ define void @__masked_store_blend_i8(* nocapture, , define void @__masked_store_blend_i16(* nocapture, , ) nounwind alwaysinline { - %v = load PTR_OP_ARGS(` ',` %0') + %v = load PTR_OP_ARGS(` ') %0 %v1 = select %2, %1, %v store %v1, * %0 ret void @@ -294,7 +294,7 @@ define void @__masked_store_blend_i16(* nocapture, , define void @__masked_store_blend_i32(* nocapture, , ) nounwind alwaysinline { - %v = load PTR_OP_ARGS(` ',` %0') + %v = load PTR_OP_ARGS(` ') %0 %v1 = select %2, %1, %v store %v1, * %0 ret void @@ -302,7 +302,7 @@ define void @__masked_store_blend_i32(* nocapture, , define void @__masked_store_blend_float(* nocapture, , ) nounwind alwaysinline { - %v = load PTR_OP_ARGS(` ',` %0') + %v = load PTR_OP_ARGS(` ') %0 %v1 = select %2, %1, %v store %v1, * %0 ret void @@ -310,7 +310,7 @@ define void @__masked_store_blend_float(* nocapture, * nocapture, , ) nounwind alwaysinline { - %v = load PTR_OP_ARGS(` ',` %0') + %v = load PTR_OP_ARGS(` ') %0 %v1 = select %2, %1, %v store %v1, * %0 ret void @@ -318,7 +318,7 @@ define void @__masked_store_blend_i64(* nocapture, define void @__masked_store_blend_double(* nocapture, , ) nounwind alwaysinline { - %v = load PTR_OP_ARGS(` ',` %0') + %v = load PTR_OP_ARGS(` ') %0 %v1 = select %2, %1, %v store %v1, * %0 ret void diff --git a/builtins/target-neon-common.ll b/builtins/target-neon-common.ll index f6e14965..3d82bd19 100644 --- a/builtins/target-neon-common.ll +++ b/builtins/target-neon-common.ll @@ -279,7 +279,7 @@ masked_store_float_double() define void @__masked_store_blend_i8(* nocapture %ptr, %new, %mask) nounwind alwaysinline { - %old = load PTR_OP_ARGS(` ',` %ptr') + %old = load PTR_OP_ARGS(` ') %ptr %mask1 = trunc %mask to %result = select %mask1, %new, %old store %result, * %ptr @@ -288,7 +288,7 @@ define void @__masked_store_blend_i8(* nocapture %ptr, define void @__masked_store_blend_i16(* nocapture %ptr, %new, %mask) nounwind alwaysinline { - %old = load PTR_OP_ARGS(` ',` %ptr') + %old = load PTR_OP_ARGS(` ') %ptr %mask1 = trunc %mask to %result = select %mask1, %new, %old store %result, * %ptr @@ -297,7 +297,7 @@ define void @__masked_store_blend_i16(* nocapture %ptr, * nocapture %ptr, %new, %mask) nounwind alwaysinline { - %old = load PTR_OP_ARGS(` ',` %ptr') + %old = load PTR_OP_ARGS(` ') %ptr %mask1 = trunc %mask to %result = select %mask1, %new, %old store %result, * %ptr @@ -306,7 +306,7 @@ define void @__masked_store_blend_i32(* nocapture %ptr, * nocapture %ptr, %new, %mask) nounwind alwaysinline { - %old = load PTR_OP_ARGS(` ',` %ptr') + %old = load PTR_OP_ARGS(` ') %ptr %mask1 = trunc %mask to %result = select %mask1, %new, %old store %result, * %ptr diff --git a/builtins/target-nvptx.ll b/builtins/target-nvptx.ll index f9e6b2e0..ef6ff0e1 100644 --- a/builtins/target-nvptx.ll +++ b/builtins/target-nvptx.ll @@ -1523,7 +1523,7 @@ gen_masked_store(double) define void @__masked_store_blend_i8(* nocapture, , ) nounwind alwaysinline { - %v = load PTR_OP_ARGS(` ',` %0') + %v = load PTR_OP_ARGS(` ') %0 %v1 = select %2, %1, %v store %v1, * %0 ret void @@ -1531,7 +1531,7 @@ define void @__masked_store_blend_i8(* nocapture, , define void @__masked_store_blend_i16(* nocapture, , ) nounwind alwaysinline { - %v = load PTR_OP_ARGS(` ',` %0') + %v = load PTR_OP_ARGS(` ') %0 %v1 = select %2, %1, %v store %v1, * %0 ret void @@ -1539,7 +1539,7 @@ define void @__masked_store_blend_i16(* nocapture, , define void @__masked_store_blend_i32(* nocapture, , ) nounwind alwaysinline { - %v = load PTR_OP_ARGS(` ',` %0') + %v = load PTR_OP_ARGS(` ') %0 %v1 = select %2, %1, %v store %v1, * %0 ret void @@ -1547,7 +1547,7 @@ define void @__masked_store_blend_i32(* nocapture, , define void @__masked_store_blend_float(* nocapture, , ) nounwind alwaysinline { - %v = load PTR_OP_ARGS(` ',` %0') + %v = load PTR_OP_ARGS(` ') %0 %v1 = select %2, %1, %v store %v1, * %0 ret void @@ -1555,7 +1555,7 @@ define void @__masked_store_blend_float(* nocapture, * nocapture, , ) nounwind alwaysinline { - %v = load PTR_OP_ARGS(` ',` %0') + %v = load PTR_OP_ARGS(` ') %0 %v1 = select %2, %1, %v store %v1, * %0 ret void @@ -1563,7 +1563,7 @@ define void @__masked_store_blend_i64(* nocapture, define void @__masked_store_blend_double(* nocapture, , ) nounwind alwaysinline { - %v = load PTR_OP_ARGS(` ',` %0') + %v = load PTR_OP_ARGS(` ') %0 %v1 = select %2, %1, %v store %v1, * %0 ret void diff --git a/builtins/target-sse2-common.ll b/builtins/target-sse2-common.ll index 88e6f43d..7ca94c23 100644 --- a/builtins/target-sse2-common.ll +++ b/builtins/target-sse2-common.ll @@ -97,7 +97,7 @@ define void @__fastmath() nounwind alwaysinline { %ptr = alloca i32 %ptr8 = bitcast i32 * %ptr to i8 * call void @llvm.x86.sse.stmxcsr(i8 * %ptr8) - %oldval = load PTR_OP_ARGS(`i32 ',`%ptr') + %oldval = load PTR_OP_ARGS(`i32 ') %ptr ; turn on DAZ (64)/FTZ (32768) -> 32832 %update = or i32 %oldval, 32832 diff --git a/builtins/target-sse2-x2.ll b/builtins/target-sse2-x2.ll index 5b40c046..c1e99912 100644 --- a/builtins/target-sse2-x2.ll +++ b/builtins/target-sse2-x2.ll @@ -576,7 +576,7 @@ masked_store_blend_8_16_by_8() define void @__masked_store_blend_i32(<8 x i32>* nocapture, <8 x i32>, <8 x i32> %mask) nounwind alwaysinline { - %val = load PTR_OP_ARGS(`<8 x i32> ',` %0, align 4') + %val = load PTR_OP_ARGS(`<8 x i32> ') %0, align 4 %newval = call <8 x i32> @__vselect_i32(<8 x i32> %val, <8 x i32> %1, <8 x i32> %mask) store <8 x i32> %newval, <8 x i32> * %0, align 4 ret void @@ -584,7 +584,7 @@ define void @__masked_store_blend_i32(<8 x i32>* nocapture, <8 x i32>, define void @__masked_store_blend_i64(<8 x i64>* nocapture %ptr, <8 x i64> %new, <8 x i32> %mask) nounwind alwaysinline { - %oldValue = load PTR_OP_ARGS(`<8 x i64>',` %ptr, align 8') + %oldValue = load PTR_OP_ARGS(`<8 x i64>') %ptr, align 8 ; Do 8x64-bit blends by doing two <8 x i32> blends, where the <8 x i32> values ; are actually bitcast <2 x i64> values diff --git a/builtins/target-sse2.ll b/builtins/target-sse2.ll index fa42bcea..946e50bc 100644 --- a/builtins/target-sse2.ll +++ b/builtins/target-sse2.ll @@ -399,7 +399,7 @@ reduce_equal(4) define void @__masked_store_blend_i32(<4 x i32>* nocapture, <4 x i32>, <4 x i32> %mask) nounwind alwaysinline { - %val = load PTR_OP_ARGS(`<4 x i32> ',` %0, align 4') + %val = load PTR_OP_ARGS(`<4 x i32> ') %0, align 4 %newval = call <4 x i32> @__vselect_i32(<4 x i32> %val, <4 x i32> %1, <4 x i32> %mask) store <4 x i32> %newval, <4 x i32> * %0, align 4 ret void @@ -407,7 +407,7 @@ define void @__masked_store_blend_i32(<4 x i32>* nocapture, <4 x i32>, define void @__masked_store_blend_i64(<4 x i64>* nocapture %ptr, <4 x i64> %new, <4 x i32> %mask) nounwind alwaysinline { - %oldValue = load PTR_OP_ARGS(`<4 x i64>',` %ptr, align 8') + %oldValue = load PTR_OP_ARGS(`<4 x i64>') %ptr, align 8 ; Do 4x64-bit blends by doing two <4 x i32> blends, where the <4 x i32> values ; are actually bitcast <2 x i64> values diff --git a/builtins/target-sse4-16.ll b/builtins/target-sse4-16.ll index 4b2b6b19..2434fa58 100644 --- a/builtins/target-sse4-16.ll +++ b/builtins/target-sse4-16.ll @@ -390,7 +390,7 @@ define void @__masked_store_blend_i64(<8 x i64>* nocapture, <8 x i64>, <8 x MASK> %mask) nounwind alwaysinline { %mask_as_i1 = trunc <8 x MASK> %mask to <8 x i1> - %old = load PTR_OP_ARGS(`<8 x i64>',` %0, align 4') + %old = load PTR_OP_ARGS(`<8 x i64>') %0, align 4 %blend = select <8 x i1> %mask_as_i1, <8 x i64> %1, <8 x i64> %old store <8 x i64> %blend, <8 x i64>* %0, align 4 ret void @@ -399,7 +399,7 @@ define void @__masked_store_blend_i64(<8 x i64>* nocapture, <8 x i64>, define void @__masked_store_blend_i32(<8 x i32>* nocapture, <8 x i32>, <8 x MASK> %mask) nounwind alwaysinline { %mask_as_i1 = trunc <8 x MASK> %mask to <8 x i1> - %old = load PTR_OP_ARGS(`<8 x i32>',` %0, align 4') + %old = load PTR_OP_ARGS(`<8 x i32>') %0, align 4 %blend = select <8 x i1> %mask_as_i1, <8 x i32> %1, <8 x i32> %old store <8 x i32> %blend, <8 x i32>* %0, align 4 ret void @@ -408,7 +408,7 @@ define void @__masked_store_blend_i32(<8 x i32>* nocapture, <8 x i32>, define void @__masked_store_blend_i16(<8 x i16>* nocapture, <8 x i16>, <8 x MASK> %mask) nounwind alwaysinline { %mask_as_i1 = trunc <8 x MASK> %mask to <8 x i1> - %old = load PTR_OP_ARGS(`<8 x i16>',` %0, align 4') + %old = load PTR_OP_ARGS(`<8 x i16>') %0, align 4 %blend = select <8 x i1> %mask_as_i1, <8 x i16> %1, <8 x i16> %old store <8 x i16> %blend, <8 x i16>* %0, align 4 ret void @@ -417,7 +417,7 @@ define void @__masked_store_blend_i16(<8 x i16>* nocapture, <8 x i16>, define void @__masked_store_blend_i8(<8 x i8>* nocapture, <8 x i8>, <8 x MASK> %mask) nounwind alwaysinline { %mask_as_i1 = trunc <8 x MASK> %mask to <8 x i1> - %old = load PTR_OP_ARGS(`<8 x i8>',` %0, align 4') + %old = load PTR_OP_ARGS(`<8 x i8>') %0, align 4 %blend = select <8 x i1> %mask_as_i1, <8 x i8> %1, <8 x i8> %old store <8 x i8> %blend, <8 x i8>* %0, align 4 ret void diff --git a/builtins/target-sse4-8.ll b/builtins/target-sse4-8.ll index 65024bcf..d654c90a 100644 --- a/builtins/target-sse4-8.ll +++ b/builtins/target-sse4-8.ll @@ -396,7 +396,7 @@ define void @__masked_store_blend_i64(<16 x i64>* nocapture, <16 x i64>, <16 x i8> %mask) nounwind alwaysinline { %mask_as_i1 = trunc <16 x MASK> %mask to <16 x i1> - %old = load PTR_OP_ARGS(`<16 x i64>',` %0, align 4') + %old = load PTR_OP_ARGS(`<16 x i64>') %0, align 4 %blend = select <16 x i1> %mask_as_i1, <16 x i64> %1, <16 x i64> %old store <16 x i64> %blend, <16 x i64>* %0, align 4 ret void @@ -405,7 +405,7 @@ define void @__masked_store_blend_i64(<16 x i64>* nocapture, <16 x i64>, define void @__masked_store_blend_i32(<16 x i32>* nocapture, <16 x i32>, <16 x MASK> %mask) nounwind alwaysinline { %mask_as_i1 = trunc <16 x MASK> %mask to <16 x i1> - %old = load PTR_OP_ARGS(`<16 x i32>',` %0, align 4') + %old = load PTR_OP_ARGS(`<16 x i32>') %0, align 4 %blend = select <16 x i1> %mask_as_i1, <16 x i32> %1, <16 x i32> %old store <16 x i32> %blend, <16 x i32>* %0, align 4 ret void @@ -414,7 +414,7 @@ define void @__masked_store_blend_i32(<16 x i32>* nocapture, <16 x i32>, define void @__masked_store_blend_i16(<16 x i16>* nocapture, <16 x i16>, <16 x MASK> %mask) nounwind alwaysinline { %mask_as_i1 = trunc <16 x MASK> %mask to <16 x i1> - %old = load PTR_OP_ARGS(`<16 x i16>',` %0, align 4') + %old = load PTR_OP_ARGS(`<16 x i16>') %0, align 4 %blend = select <16 x i1> %mask_as_i1, <16 x i16> %1, <16 x i16> %old store <16 x i16> %blend, <16 x i16>* %0, align 4 ret void @@ -424,7 +424,7 @@ declare <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8>, <16 x i8>, <16 x i8>) noun define void @__masked_store_blend_i8(<16 x i8>* nocapture, <16 x i8>, <16 x MASK> %mask) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<16 x i8>',` %0, align 4') + %old = load PTR_OP_ARGS(`<16 x i8>') %0, align 4 %blend = call <16 x i8> @llvm.x86.sse41.pblendvb(<16 x i8> %old, <16 x i8> %1, <16 x i8> %mask) store <16 x i8> %blend, <16 x i8>* %0, align 4 diff --git a/builtins/target-sse4-common.ll b/builtins/target-sse4-common.ll index 887c804c..0d60c632 100644 --- a/builtins/target-sse4-common.ll +++ b/builtins/target-sse4-common.ll @@ -187,7 +187,7 @@ define void @__fastmath() nounwind alwaysinline { %ptr = alloca i32 %ptr8 = bitcast i32 * %ptr to i8 * call void @llvm.x86.sse.stmxcsr(i8 * %ptr8) - %oldval = load PTR_OP_ARGS(`i32 ',`%ptr') + %oldval = load PTR_OP_ARGS(`i32 ') %ptr ; turn on DAZ (64)/FTZ (32768) -> 32832 %update = or i32 %oldval, 32832 diff --git a/builtins/target-sse4-x2.ll b/builtins/target-sse4-x2.ll index 240573c9..90b1dff4 100644 --- a/builtins/target-sse4-x2.ll +++ b/builtins/target-sse4-x2.ll @@ -471,7 +471,7 @@ define void @__masked_store_blend_i32(<8 x i32>* nocapture, <8 x i32>, <4 x i32> %mask_b = shufflevector <8 x float> %mask_as_float, <8 x float> undef, <4 x i32> - %oldValue = load PTR_OP_ARGS(`<8 x i32>',` %0, align 4') + %oldValue = load PTR_OP_ARGS(`<8 x i32>') %0, align 4 %oldAsFloat = bitcast <8 x i32> %oldValue to <8 x float> %newAsFloat = bitcast <8 x i32> %1 to <8 x float> %old_a = shufflevector <8 x float> %oldAsFloat, <8 x float> undef, @@ -500,7 +500,7 @@ define void @__masked_store_blend_i64(<8 x i64>* nocapture %ptr, <8 x i64> %new, %mask_as_float = bitcast <8 x i32> %mask to <8 x float> - %old = load PTR_OP_ARGS(`<8 x i64>',` %ptr, align 8') + %old = load PTR_OP_ARGS(`<8 x i64>') %ptr, align 8 ; set up the first two 64-bit values %old01 = shufflevector <8 x i64> %old, <8 x i64> undef, <2 x i32> diff --git a/builtins/target-sse4.ll b/builtins/target-sse4.ll index 2c561aa0..21d1c4c1 100644 --- a/builtins/target-sse4.ll +++ b/builtins/target-sse4.ll @@ -433,7 +433,7 @@ declare <4 x float> @llvm.x86.sse41.blendvps(<4 x float>, <4 x float>, define void @__masked_store_blend_i32(<4 x i32>* nocapture, <4 x i32>, <4 x i32> %mask) nounwind alwaysinline { %mask_as_float = bitcast <4 x i32> %mask to <4 x float> - %oldValue = load PTR_OP_ARGS(`<4 x i32>',` %0, align 4') + %oldValue = load PTR_OP_ARGS(`<4 x i32>') %0, align 4 %oldAsFloat = bitcast <4 x i32> %oldValue to <4 x float> %newAsFloat = bitcast <4 x i32> %1 to <4 x float> %blend = call <4 x float> @llvm.x86.sse41.blendvps(<4 x float> %oldAsFloat, @@ -447,7 +447,7 @@ define void @__masked_store_blend_i32(<4 x i32>* nocapture, <4 x i32>, define void @__masked_store_blend_i64(<4 x i64>* nocapture %ptr, <4 x i64> %new, <4 x i32> %i32mask) nounwind alwaysinline { - %oldValue = load PTR_OP_ARGS(`<4 x i64>',` %ptr, align 8') + %oldValue = load PTR_OP_ARGS(`<4 x i64>') %ptr, align 8 %mask = bitcast <4 x i32> %i32mask to <4 x float> ; Do 4x64-bit blends by doing two <4 x i32> blends, where the <4 x i32> values diff --git a/builtins/util-nvptx.m4 b/builtins/util-nvptx.m4 index b1b0f39e..55ce3ee3 100644 --- a/builtins/util-nvptx.m4 +++ b/builtins/util-nvptx.m4 @@ -2142,7 +2142,7 @@ declare void @_aligned_free(i8 *) define noalias i8 * @__new_uniform_32rt(i64 %size) { %conv = trunc i64 %size to i32 - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment %ptr = tail call i8* @_aligned_malloc(i32 %conv, i32 %alignment) ret i8* %ptr } @@ -2151,16 +2151,16 @@ define @__new_varying32_32rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i32 %sz_LANE_ID, i32 %alignment) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 - %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64', `%ret64, i32 LANE') + %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64') %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') - %r = load PTR_OP_ARGS(` ',` %ret') + %r = load PTR_OP_ARGS(` ') %ret ret %r } @@ -2195,7 +2195,7 @@ declare i8* @_aligned_malloc(i64, i64) declare void @_aligned_free(i8 *) define noalias i8 * @__new_uniform_64rt(i64 %size) { - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment %alignment64 = sext i32 %alignment to i64 %ptr = tail call i8* @_aligned_malloc(i64 %size, i64 %alignment64) ret i8* %ptr @@ -2205,7 +2205,7 @@ define @__new_varying32_64rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment %alignment64 = sext i32 %alignment to i64 per_lane(WIDTH, %mask, ` @@ -2213,10 +2213,10 @@ define @__new_varying32_64rt( %size, % %sz64_LANE_ID = zext i32 %sz_LANE_ID to i64 %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i64 %sz64_LANE_ID, i64 %alignment64) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 - %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64', `%ret64, i32 LANE') + %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64') %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') - %r = load PTR_OP_ARGS(` ',` %ret') + %r = load PTR_OP_ARGS(` ') %ret ret %r } @@ -2224,17 +2224,17 @@ define @__new_varying64_64rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment %alignment64 = sext i32 %alignment to i64 per_lane(WIDTH, %mask, ` %sz64_LANE_ID = extractelement %size, i32 LANE %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i64 %sz64_LANE_ID, i64 %alignment64) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 - %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64', `%ret64, i32 LANE') + %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64') %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') - %r = load PTR_OP_ARGS(` ',` %ret') + %r = load PTR_OP_ARGS(` ') %ret ret %r } @@ -2433,11 +2433,11 @@ define <$1 x i64> @__$2_varying_$3(<$1 x i64>, <$1 x i64>) nounwind alwaysinline %v1_`'i = extractelement <$1 x i64> %1, i32 i %c_`'i = icmp $4 i64 %v0_`'i, %v1_`'i %v_`'i = select i1 %c_`'i, i64 %v0_`'i, i64 %v1_`'i - %ptr_`'i = getelementptr PTR_OP_ARGS(`i64', `%r64ptr, i32 i') + %ptr_`'i = getelementptr PTR_OP_ARGS(`i64') %r64ptr, i32 i store i64 %v_`'i, i64 * %ptr_`'i ') - %ret = load PTR_OP_ARGS(`<$1 x i64> ',` %rptr') + %ret = load PTR_OP_ARGS(`<$1 x i64> ') %rptr ret <$1 x i64> %ret } ') @@ -2483,7 +2483,7 @@ entry: load: %ptr = bitcast i8 * %0 to * - %valall = load PTR_OP_ARGS(` ',` %ptr, align $2') + %valall = load PTR_OP_ARGS(` ') %ptr, align $2 ret %valall loop: @@ -2499,9 +2499,9 @@ load_lane: ; yes! do the load and store the result into the appropriate place in the ; allocaed memory above %ptr32 = bitcast i8 * %0 to $1 * - %lane_ptr = getelementptr PTR_OP_ARGS(`$1', `%ptr32, i32 %lane') - %val = load PTR_OP_ARGS(`$1 ',` %lane_ptr') - %store_ptr = getelementptr PTR_OP_ARGS(`$1', `%retptr32, i32 %lane') + %lane_ptr = getelementptr PTR_OP_ARGS(`$1') %ptr32, i32 %lane + %val = load PTR_OP_ARGS(`$1 ') %lane_ptr + %store_ptr = getelementptr PTR_OP_ARGS(`$1') %retptr32, i32 %lane store $1 %val, $1 * %store_ptr br label %lane_done @@ -2511,7 +2511,7 @@ lane_done: br i1 %done, label %return, label %loop return: - %r = load PTR_OP_ARGS(` ',` %retptr') + %r = load PTR_OP_ARGS(` ') %retptr ret %r } ') @@ -2525,7 +2525,7 @@ return: define(`gen_masked_store', ` define void @__masked_store_$1(* nocapture, , ) nounwind alwaysinline { per_lane(WIDTH, %2, ` - %ptr_LANE_ID = getelementptr PTR_OP_ARGS(`', `%0, i32 0, i32 LANE') + %ptr_LANE_ID = getelementptr PTR_OP_ARGS(`') %0, i32 0, i32 LANE %storeval_LANE_ID = extractelement %1, i32 LANE store $1 %storeval_LANE_ID, $1 * %ptr_LANE_ID') ret void @@ -2535,7 +2535,7 @@ define void @__masked_store_$1(* nocapture, , * nocapture, <4 x i8>, <4 x i32>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<4 x i8> ',` %0, align 1') + %old = load PTR_OP_ARGS(`<4 x i8> ') %0, align 1 ifelse(LLVM_VERSION,LLVM_3_0,` %old32 = bitcast <4 x i8> %old to i32 %new32 = bitcast <4 x i8> %1 to i32 @@ -2559,7 +2559,7 @@ define void @__masked_store_blend_i8(<4 x i8>* nocapture, <4 x i8>, define void @__masked_store_blend_i16(<4 x i16>* nocapture, <4 x i16>, <4 x i32>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<4 x i16> ',` %0, align 2') + %old = load PTR_OP_ARGS(`<4 x i16> ') %0, align 2 ifelse(LLVM_VERSION,LLVM_3_0,` %old64 = bitcast <4 x i16> %old to i64 %new64 = bitcast <4 x i16> %1 to i64 @@ -2585,7 +2585,7 @@ define void @__masked_store_blend_i16(<4 x i16>* nocapture, <4 x i16>, define(`masked_store_blend_8_16_by_4_mask64', ` define void @__masked_store_blend_i8(<4 x i8>* nocapture, <4 x i8>, <4 x i64>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<4 x i8> ',` %0, align 1') + %old = load PTR_OP_ARGS(`<4 x i8> ') %0, align 1 ifelse(LLVM_VERSION,LLVM_3_0,` %old32 = bitcast <4 x i8> %old to i32 %new32 = bitcast <4 x i8> %1 to i32 @@ -2609,7 +2609,7 @@ define void @__masked_store_blend_i8(<4 x i8>* nocapture, <4 x i8>, define void @__masked_store_blend_i16(<4 x i16>* nocapture, <4 x i16>, <4 x i64>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<4 x i16> ',` %0, align 2') + %old = load PTR_OP_ARGS(`<4 x i16> ') %0, align 2 ifelse(LLVM_VERSION,LLVM_3_0,` %old64 = bitcast <4 x i16> %old to i64 %new64 = bitcast <4 x i16> %1 to i64 @@ -2635,7 +2635,7 @@ define void @__masked_store_blend_i16(<4 x i16>* nocapture, <4 x i16>, define(`masked_store_blend_8_16_by_8', ` define void @__masked_store_blend_i8(<8 x i8>* nocapture, <8 x i8>, <8 x i32>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<8 x i8> ',` %0, align 1') + %old = load PTR_OP_ARGS(`<8 x i8> ') %0, align 1 ifelse(LLVM_VERSION,LLVM_3_0,` %old64 = bitcast <8 x i8> %old to i64 %new64 = bitcast <8 x i8> %1 to i64 @@ -2659,7 +2659,7 @@ define void @__masked_store_blend_i8(<8 x i8>* nocapture, <8 x i8>, define void @__masked_store_blend_i16(<8 x i16>* nocapture, <8 x i16>, <8 x i32>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<8 x i16> ',` %0, align 2') + %old = load PTR_OP_ARGS(`<8 x i16> ') %0, align 2 ifelse(LLVM_VERSION,LLVM_3_0,` %old128 = bitcast <8 x i16> %old to i128 %new128 = bitcast <8 x i16> %1 to i128 @@ -2686,7 +2686,7 @@ define void @__masked_store_blend_i16(<8 x i16>* nocapture, <8 x i16>, define(`masked_store_blend_8_16_by_16', ` define void @__masked_store_blend_i8(<16 x i8>* nocapture, <16 x i8>, <16 x i32>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<16 x i8> ',` %0, align 1') + %old = load PTR_OP_ARGS(`<16 x i8> ') %0, align 1 ifelse(LLVM_VERSION,LLVM_3_0,` %old128 = bitcast <16 x i8> %old to i128 %new128 = bitcast <16 x i8> %1 to i128 @@ -2710,7 +2710,7 @@ define void @__masked_store_blend_i8(<16 x i8>* nocapture, <16 x i8>, define void @__masked_store_blend_i16(<16 x i16>* nocapture, <16 x i16>, <16 x i32>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<16 x i16> ',` %0, align 2') + %old = load PTR_OP_ARGS(`<16 x i16> ') %0, align 2 ifelse(LLVM_VERSION,LLVM_3_0,` %old256 = bitcast <16 x i16> %old to i256 %new256 = bitcast <16 x i16> %1 to i256 @@ -2759,8 +2759,8 @@ entry: if.then: ; preds = %entry %idxprom = ashr i64 %call, 32 - %arrayidx = getelementptr inbounds PTR_OP_ARGS(`i32', `startptr, i64 %idxprom') - %val = load PTR_OP_ARGS(`i32',` %arrayidx, align 4') + %arrayidx = getelementptr inbounds PTR_OP_ARGS(`i32') startptr, i64 %idxprom + %val = load PTR_OP_ARGS(`i32') %arrayidx, align 4 %valvec = insertelement <1 x i32> undef, i32 %val, i32 0 store <1 x i32> %valvec, <1 x i32>* %val_ptr, align 4 br label %if.end @@ -2780,7 +2780,7 @@ entry: if.then: ; preds = %entry %idxprom = ashr i64 %call, 32 - %arrayidx = getelementptr inbounds PTR_OP_ARGS(`i32', `startptr, i64 %idxprom') + %arrayidx = getelementptr inbounds PTR_OP_ARGS(`i32') startptr, i64 %idxprom %val = extractelement <1 x i32> %vals, i32 0 store i32 %val, i32* %arrayidx, align 4 br label %if.end @@ -2849,7 +2849,7 @@ domixed: %castptr = bitcast <$1 x $2> * %ptr to <$1 x $4> * %castv = bitcast <$1 x $2> %v to <$1 x $4> call void @__masked_store_blend_i$6(<$1 x $4> * %castptr, <$1 x $4> %castv, <$1 x MASK> %mask) - %blendvec = load PTR_OP_ARGS(`<$1 x $2> ',` %ptr') + %blendvec = load PTR_OP_ARGS(`<$1 x $2> ') %ptr br label %check_neighbors check_neighbors: @@ -2985,12 +2985,12 @@ define @__gather32_$1( %ptrs, per_lane(WIDTH, %vecmask, ` %iptr_LANE_ID = extractelement %ptrs, i32 LANE %ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $1 * - %val_LANE_ID = load PTR_OP_ARGS(`$1 ',` %ptr_LANE_ID') - %store_ptr_LANE_ID = getelementptr PTR_OP_ARGS(`', `%ret_ptr, i32 0, i32 LANE') + %val_LANE_ID = load PTR_OP_ARGS(`$1 ') %ptr_LANE_ID + %store_ptr_LANE_ID = getelementptr PTR_OP_ARGS(`') %ret_ptr, i32 0, i32 LANE store $1 %val_LANE_ID, $1 * %store_ptr_LANE_ID ') - %ret = load PTR_OP_ARGS(` ',` %ret_ptr') + %ret = load PTR_OP_ARGS(` ') %ret_ptr ret %ret } @@ -3001,12 +3001,12 @@ define @__gather64_$1( %ptrs, per_lane(WIDTH, %vecmask, ` %iptr_LANE_ID = extractelement %ptrs, i32 LANE %ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $1 * - %val_LANE_ID = load PTR_OP_ARGS(`$1 ',` %ptr_LANE_ID') - %store_ptr_LANE_ID = getelementptr PTR_OP_ARGS(`', `%ret_ptr, i32 0, i32 LANE') + %val_LANE_ID = load PTR_OP_ARGS(`$1 ') %ptr_LANE_ID + %store_ptr_LANE_ID = getelementptr PTR_OP_ARGS(`') %ret_ptr, i32 0, i32 LANE store $1 %val_LANE_ID, $1 * %store_ptr_LANE_ID ') - %ret = load PTR_OP_ARGS(` ',` %ret_ptr') + %ret = load PTR_OP_ARGS(` ') %ret_ptr ret %ret } ') @@ -3025,15 +3025,15 @@ define @__gather_elt32_$1(i8 * %ptr, %offsets, i32 %o %offset64 = sext i32 %offset32 to i64 %scale64 = sext i32 %offset_scale to i64 %offset = mul i64 %offset64, %scale64 - %ptroffset = getelementptr PTR_OP_ARGS(`i8', `%ptr, i64 %offset') + %ptroffset = getelementptr PTR_OP_ARGS(`i8') %ptr, i64 %offset %delta = extractelement %offset_delta, i32 %lane %delta64 = sext i32 %delta to i64 - %finalptr = getelementptr PTR_OP_ARGS(`i8', `%ptroffset, i64 %delta64') + %finalptr = getelementptr PTR_OP_ARGS(`i8') %ptroffset, i64 %delta64 ; load value and insert into returned value %ptrcast = bitcast i8 * %finalptr to $1 * - %val = load PTR_OP_ARGS(`$1 ',`%ptrcast') + %val = load PTR_OP_ARGS(`$1 ') %ptrcast %updatedret = insertelement %ret, $1 %val, i32 %lane ret %updatedret } @@ -3047,14 +3047,14 @@ define @__gather_elt64_$1(i8 * %ptr, %offsets, i32 %o ; patterns that apply the free x86 2x/4x/8x scaling in addressing calculations %offset_scale64 = sext i32 %offset_scale to i64 %offset = mul i64 %offset64, %offset_scale64 - %ptroffset = getelementptr PTR_OP_ARGS(`i8', `%ptr, i64 %offset') + %ptroffset = getelementptr PTR_OP_ARGS(`i8') %ptr, i64 %offset %delta64 = extractelement %offset_delta, i32 %lane - %finalptr = getelementptr PTR_OP_ARGS(`i8', `%ptroffset, i64 %delta64') + %finalptr = getelementptr PTR_OP_ARGS(`i8') %ptroffset, i64 %delta64 ; load value and insert into returned value %ptrcast = bitcast i8 * %finalptr to $1 * - %val = load PTR_OP_ARGS(`$1 ',`%ptrcast') + %val = load PTR_OP_ARGS(`$1 ') %ptrcast %updatedret = insertelement %ret, $1 %val, i32 %lane ret %updatedret } @@ -3072,13 +3072,13 @@ define @__gather_factored_base_offsets32_$1(i8 * %ptr, zeroinitializer, * %offsetsPtr call void @__masked_store_blend_i32( * %offsetsPtr, %offsets, %vecmask) - %newOffsets = load PTR_OP_ARGS(` ',` %offsetsPtr') + %newOffsets = load PTR_OP_ARGS(` ') %offsetsPtr %deltaPtr = alloca store zeroinitializer, * %deltaPtr call void @__masked_store_blend_i32( * %deltaPtr, %offset_delta, %vecmask) - %newDelta = load PTR_OP_ARGS(` ',` %deltaPtr') + %newDelta = load PTR_OP_ARGS(` ') %deltaPtr %ret0 = call @__gather_elt32_$1(i8 * %ptr, %newOffsets, i32 %offset_scale, %newDelta, @@ -3103,13 +3103,13 @@ define @__gather_factored_base_offsets64_$1(i8 * %ptr, zeroinitializer, * %offsetsPtr call void @__masked_store_blend_i64( * %offsetsPtr, %offsets, %vecmask) - %newOffsets = load PTR_OP_ARGS(` ',` %offsetsPtr') + %newOffsets = load PTR_OP_ARGS(` ') %offsetsPtr %deltaPtr = alloca store zeroinitializer, * %deltaPtr call void @__masked_store_blend_i64( * %deltaPtr, %offset_delta, %vecmask) - %newDelta = load PTR_OP_ARGS(` ',` %deltaPtr') + %newDelta = load PTR_OP_ARGS(` ') %deltaPtr %ret0 = call @__gather_elt64_$1(i8 * %ptr, %newOffsets, i32 %offset_scale, %newDelta, @@ -3180,11 +3180,11 @@ define void @__scatter_elt32_$1(i8 * %ptr, %offsets, i32 %offset_s %offset64 = sext i32 %offset32 to i64 %scale64 = sext i32 %offset_scale to i64 %offset = mul i64 %offset64, %scale64 - %ptroffset = getelementptr PTR_OP_ARGS(`i8', `%ptr, i64 %offset') + %ptroffset = getelementptr PTR_OP_ARGS(`i8') %ptr, i64 %offset %delta = extractelement %offset_delta, i32 %lane %delta64 = sext i32 %delta to i64 - %finalptr = getelementptr PTR_OP_ARGS(`i8', `%ptroffset, i64 %delta64') + %finalptr = getelementptr PTR_OP_ARGS(`i8') %ptroffset, i64 %delta64 %ptrcast = bitcast i8 * %finalptr to $1 * %storeval = extractelement %values, i32 %lane @@ -3200,10 +3200,10 @@ define void @__scatter_elt64_$1(i8 * %ptr, %offsets, i32 %offset_s ; patterns that apply the free x86 2x/4x/8x scaling in addressing calculations %scale64 = sext i32 %offset_scale to i64 %offset = mul i64 %offset64, %scale64 - %ptroffset = getelementptr PTR_OP_ARGS(`i8', `%ptr, i64 %offset') + %ptroffset = getelementptr PTR_OP_ARGS(`i8') %ptr, i64 %offset %delta64 = extractelement %offset_delta, i32 %lane - %finalptr = getelementptr PTR_OP_ARGS(`i8', `%ptroffset, i64 %delta64') + %finalptr = getelementptr PTR_OP_ARGS(`i8') %ptroffset, i64 %delta64 %ptrcast = bitcast i8 * %finalptr to $1 * %storeval = extractelement %values, i32 %lane diff --git a/builtins/util.m4 b/builtins/util.m4 index 7aef5429..8951605b 100644 --- a/builtins/util.m4 +++ b/builtins/util.m4 @@ -51,10 +51,10 @@ define(`MASK_HIGH_BIT_ON', ;; LLVM has different IR for different versions since 3.7 -define(`PTR_OP_ARGS', - ifelse(LLVM_VERSION, LLVM_3_7, - ``$1 , $1 * $2'', - ``$1 * $2'' +define(`PTR_OP_ARGS', + ifelse(LLVM_VERSION, LLVM_3_7, + ``$1 , $1 *'', + ``$1 *'' ) ) @@ -1204,34 +1204,34 @@ forloop(i, 1, eval(WIDTH-1), ` %ret_`'i = insertelement %ret_`'eva not_const: ; store two instances of the vector into memory %ptr = alloca , i32 2 - %ptr0 = getelementptr PTR_OP_ARGS(`', `%ptr, i32 0') + %ptr0 = getelementptr PTR_OP_ARGS(`') %ptr, i32 0 store %0, * %ptr0 - %ptr1 = getelementptr PTR_OP_ARGS(`', `%ptr, i32 1') + %ptr1 = getelementptr PTR_OP_ARGS(`') %ptr, i32 1 store %0, * %ptr1 ; compute offset in [0,vectorwidth-1], then index into the doubled-up vector %offset = and i32 %1, eval(WIDTH-1) %ptr_as_elt_array = bitcast * %ptr to [eval(2*WIDTH) x $1] * - %load_ptr = getelementptr PTR_OP_ARGS(`[eval(2*WIDTH) x $1]', `%ptr_as_elt_array, i32 0, i32 %offset') + %load_ptr = getelementptr PTR_OP_ARGS(`[eval(2*WIDTH) x $1]') %ptr_as_elt_array, i32 0, i32 %offset %load_ptr_vec = bitcast $1 * %load_ptr to * - %result = load PTR_OP_ARGS(` ',` %load_ptr_vec, align $2') + %result = load PTR_OP_ARGS(` ') %load_ptr_vec, align $2 ret %result } define @__shift_$1(, i32) nounwind readnone alwaysinline { %ptr = alloca , i32 3 - %ptr0 = getelementptr PTR_OP_ARGS(`', `%ptr, i32 0') + %ptr0 = getelementptr PTR_OP_ARGS(`') %ptr, i32 0 store zeroinitializer, * %ptr0 - %ptr1 = getelementptr PTR_OP_ARGS(`', `%ptr, i32 1') + %ptr1 = getelementptr PTR_OP_ARGS(`') %ptr, i32 1 store %0, * %ptr1 - %ptr2 = getelementptr PTR_OP_ARGS(`', `%ptr, i32 2') + %ptr2 = getelementptr PTR_OP_ARGS(`') %ptr, i32 2 store zeroinitializer, * %ptr2 %offset = add i32 %1, WIDTH %ptr_as_elt_array = bitcast * %ptr to [eval(3*WIDTH) x $1] * - %load_ptr = getelementptr PTR_OP_ARGS(`[eval(3*WIDTH) x $1]', `%ptr_as_elt_array, i32 0, i32 %offset') + %load_ptr = getelementptr PTR_OP_ARGS(`[eval(3*WIDTH) x $1]') %ptr_as_elt_array, i32 0, i32 %offset %load_ptr_vec = bitcast $1 * %load_ptr to * - %result = load PTR_OP_ARGS(` ',` %load_ptr_vec, align $2') + %result = load PTR_OP_ARGS(` ') %load_ptr_vec, align $2 ret %result } @@ -1276,13 +1276,13 @@ not_const: store %v2, * %ptr %baseptr = bitcast * %ptr to $1 * - %ptr_0 = getelementptr PTR_OP_ARGS(`$1', `%baseptr, i32 %index_0') - %val_0 = load PTR_OP_ARGS(`$1 ',` %ptr_0') + %ptr_0 = getelementptr PTR_OP_ARGS(`$1') %baseptr, i32 %index_0 + %val_0 = load PTR_OP_ARGS(`$1 ') %ptr_0 %result_0 = insertelement undef, $1 %val_0, i32 0 forloop(i, 1, eval(WIDTH-1), ` - %ptr_`'i = getelementptr PTR_OP_ARGS(`$1', `%baseptr, i32 %index_`'i') - %val_`'i = load PTR_OP_ARGS(`$1 ',` %ptr_`'i') + %ptr_`'i = getelementptr PTR_OP_ARGS(`$1') %baseptr, i32 %index_`'i + %val_`'i = load PTR_OP_ARGS(`$1 ') %ptr_`'i %result_`'i = insertelement %result_`'eval(i-1), $1 %val_`'i, i32 i ') @@ -1522,10 +1522,10 @@ define <$1 x $2> @__atomic_compare_exchange_$3_global($2* %ptr, <$1 x $2> %cmp, ',` %r_LANE_ID = cmpxchg $2 * %ptr, $2 %cmp_LANE_ID, $2 %val_LANE_ID seq_cst ') - %rp_LANE_ID = getelementptr PTR_OP_ARGS(`$2', `%rptr32, i32 LANE') + %rp_LANE_ID = getelementptr PTR_OP_ARGS(`$2') %rptr32, i32 LANE store $2 %r_LANE_ID, $2 * %rp_LANE_ID') - %r = load PTR_OP_ARGS(`<$1 x $2> ',` %rptr') + %r = load PTR_OP_ARGS(`<$1 x $2> ') %rptr ret <$1 x $2> %r } @@ -1790,13 +1790,13 @@ define void ;; Similarly for the output pointers %out0a = bitcast <8 x float> * %out0 to <4 x float> * - %out0b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 1') + %out0b = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 1 %out1a = bitcast <8 x float> * %out1 to <4 x float> * - %out1b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 1') + %out1b = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 1 %out2a = bitcast <8 x float> * %out2 to <4 x float> * - %out2b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 1') + %out2b = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 1 %out3a = bitcast <8 x float> * %out3 to <4 x float> * - %out3b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out3a, i32 1') + %out3b = getelementptr PTR_OP_ARGS(`<4 x float>') %out3a, i32 1 ;; Do the first part--given input vectors like ;; , @@ -1839,13 +1839,13 @@ define void <4 x i32> %out0a = bitcast <8 x float> * %out0 to <4 x float> * - %out0b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 1') + %out0b = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 1 %out1a = bitcast <8 x float> * %out1 to <4 x float> * - %out1b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 1') + %out1b = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 1 %out2a = bitcast <8 x float> * %out2 to <4 x float> * - %out2b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 1') + %out2b = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 1 %out3a = bitcast <8 x float> * %out3 to <4 x float> * - %out3b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out3a, i32 1') + %out3b = getelementptr PTR_OP_ARGS(`<4 x float>') %out3a, i32 1 ;; First part--given input vectors ;; @@ -1882,11 +1882,11 @@ define void <4 x i32> %out0a = bitcast <8 x float> * %out0 to <4 x float> * - %out0b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 1') + %out0b = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 1 %out1a = bitcast <8 x float> * %out1 to <4 x float> * - %out1b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 1') + %out1b = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 1 %out2a = bitcast <8 x float> * %out2 to <4 x float> * - %out2b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 1') + %out2b = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 1 call void @__aos_to_soa3_float4(<4 x float> %v0a, <4 x float> %v0b, <4 x float> %v1a, <4 x float> * %out0a, <4 x float> * %out1a, @@ -1916,11 +1916,11 @@ define void <4 x i32> %out0a = bitcast <8 x float> * %out0 to <4 x float> * - %out0b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 1') + %out0b = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 1 %out1a = bitcast <8 x float> * %out1 to <4 x float> * - %out1b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 1') + %out1b = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 1 %out2a = bitcast <8 x float> * %out2 to <4 x float> * - %out2b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 1') + %out2b = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 1 call void @__soa_to_aos3_float4(<4 x float> %v0a, <4 x float> %v1a, <4 x float> %v2a, <4 x float> * %out0a, <4 x float> * %out0b, @@ -1972,21 +1972,21 @@ define void <4 x i32> %out0a = bitcast <16 x float> * %out0 to <4 x float> * - %out0b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 1') - %out0c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 2') - %out0d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 3') + %out0b = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 1 + %out0c = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 2 + %out0d = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 3 %out1a = bitcast <16 x float> * %out1 to <4 x float> * - %out1b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 1') - %out1c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 2') - %out1d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 3') + %out1b = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 1 + %out1c = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 2 + %out1d = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 3 %out2a = bitcast <16 x float> * %out2 to <4 x float> * - %out2b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 1') - %out2c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 2') - %out2d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 3') + %out2b = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 1 + %out2c = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 2 + %out2d = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 3 %out3a = bitcast <16 x float> * %out3 to <4 x float> * - %out3b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out3a, i32 1') - %out3c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out3a, i32 2') - %out3d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out3a, i32 3') + %out3b = getelementptr PTR_OP_ARGS(`<4 x float>') %out3a, i32 1 + %out3c = getelementptr PTR_OP_ARGS(`<4 x float>') %out3a, i32 2 + %out3d = getelementptr PTR_OP_ARGS(`<4 x float>') %out3a, i32 3 call void @__aos_to_soa4_float4(<4 x float> %v0a, <4 x float> %v0b, <4 x float> %v0c, <4 x float> %v0d, <4 x float> * %out0a, @@ -2043,21 +2043,21 @@ define void <4 x i32> %out0a = bitcast <16 x float> * %out0 to <4 x float> * - %out0b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 1') - %out0c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 2') - %out0d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 3') + %out0b = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 1 + %out0c = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 2 + %out0d = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 3 %out1a = bitcast <16 x float> * %out1 to <4 x float> * - %out1b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 1') - %out1c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 2') - %out1d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 3') + %out1b = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 1 + %out1c = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 2 + %out1d = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 3 %out2a = bitcast <16 x float> * %out2 to <4 x float> * - %out2b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 1') - %out2c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 2') - %out2d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 3') + %out2b = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 1 + %out2c = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 2 + %out2d = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 3 %out3a = bitcast <16 x float> * %out3 to <4 x float> * - %out3b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out3a, i32 1') - %out3c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out3a, i32 2') - %out3d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out3a, i32 3') + %out3b = getelementptr PTR_OP_ARGS(`<4 x float>') %out3a, i32 1 + %out3c = getelementptr PTR_OP_ARGS(`<4 x float>') %out3a, i32 2 + %out3d = getelementptr PTR_OP_ARGS(`<4 x float>') %out3a, i32 3 call void @__soa_to_aos4_float4(<4 x float> %v0a, <4 x float> %v1a, <4 x float> %v2a, <4 x float> %v3a, <4 x float> * %out0a, @@ -2105,17 +2105,17 @@ define void <4 x i32> %out0a = bitcast <16 x float> * %out0 to <4 x float> * - %out0b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 1') - %out0c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 2') - %out0d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 3') + %out0b = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 1 + %out0c = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 2 + %out0d = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 3 %out1a = bitcast <16 x float> * %out1 to <4 x float> * - %out1b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 1') - %out1c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 2') - %out1d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 3') + %out1b = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 1 + %out1c = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 2 + %out1d = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 3 %out2a = bitcast <16 x float> * %out2 to <4 x float> * - %out2b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 1') - %out2c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 2') - %out2d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 3') + %out2b = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 1 + %out2c = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 2 + %out2d = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 3 call void @__aos_to_soa3_float4(<4 x float> %v0a, <4 x float> %v0b, <4 x float> %v0c, <4 x float> * %out0a, <4 x float> * %out1a, @@ -2163,17 +2163,17 @@ define void <4 x i32> %out0a = bitcast <16 x float> * %out0 to <4 x float> * - %out0b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 1') - %out0c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 2') - %out0d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out0a, i32 3') + %out0b = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 1 + %out0c = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 2 + %out0d = getelementptr PTR_OP_ARGS(`<4 x float>') %out0a, i32 3 %out1a = bitcast <16 x float> * %out1 to <4 x float> * - %out1b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 1') - %out1c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 2') - %out1d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out1a, i32 3') + %out1b = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 1 + %out1c = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 2 + %out1d = getelementptr PTR_OP_ARGS(`<4 x float>') %out1a, i32 3 %out2a = bitcast <16 x float> * %out2 to <4 x float> * - %out2b = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 1') - %out2c = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 2') - %out2d = getelementptr PTR_OP_ARGS(`<4 x float>', `%out2a, i32 3') + %out2b = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 1 + %out2c = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 2 + %out2d = getelementptr PTR_OP_ARGS(`<4 x float>') %out2a, i32 3 call void @__soa_to_aos3_float4(<4 x float> %v0a, <4 x float> %v1a, <4 x float> %v2a, <4 x float> * %out0a, <4 x float> * %out0b, @@ -2198,13 +2198,13 @@ define void * noalias %out2, * noalias %out3) nounwind alwaysinline { %p0 = bitcast float * %p to * - %v0 = load PTR_OP_ARGS(` ',` %p0, align 4') - %p1 = getelementptr PTR_OP_ARGS(`', `%p0, i32 1') - %v1 = load PTR_OP_ARGS(` ',` %p1, align 4') - %p2 = getelementptr PTR_OP_ARGS(`', `%p0, i32 2') - %v2 = load PTR_OP_ARGS(` ',` %p2, align 4') - %p3 = getelementptr PTR_OP_ARGS(`', `%p0, i32 3') - %v3 = load PTR_OP_ARGS(` ',` %p3, align 4') + %v0 = load PTR_OP_ARGS(` ') %p0, align 4 + %p1 = getelementptr PTR_OP_ARGS(`') %p0, i32 1 + %v1 = load PTR_OP_ARGS(` ') %p1, align 4 + %p2 = getelementptr PTR_OP_ARGS(`') %p0, i32 2 + %v2 = load PTR_OP_ARGS(` ') %p2, align 4 + %p3 = getelementptr PTR_OP_ARGS(`') %p0, i32 3 + %v3 = load PTR_OP_ARGS(` ') %p3, align 4 call void @__aos_to_soa4_float`'WIDTH ( %v0, %v1, %v2, %v3, * %out0, * %out1, * %out2, * %out3) @@ -2216,9 +2216,9 @@ define void @__soa_to_aos4_float( %v0, %v1, %v2, %v3, float * noalias %p) nounwind alwaysinline { %out0 = bitcast float * %p to * - %out1 = getelementptr PTR_OP_ARGS(`', `%out0, i32 1') - %out2 = getelementptr PTR_OP_ARGS(`', `%out0, i32 2') - %out3 = getelementptr PTR_OP_ARGS(`', `%out0, i32 3') + %out1 = getelementptr PTR_OP_ARGS(`') %out0, i32 1 + %out2 = getelementptr PTR_OP_ARGS(`') %out0, i32 2 + %out3 = getelementptr PTR_OP_ARGS(`') %out0, i32 3 call void @__soa_to_aos4_float`'WIDTH ( %v0, %v1, %v2, %v3, * %out0, * %out1, * %out2, * %out3) @@ -2231,11 +2231,11 @@ define void * %out0, * %out1, * %out2) nounwind alwaysinline { %p0 = bitcast float * %p to * - %v0 = load PTR_OP_ARGS(` ',` %p0, align 4') - %p1 = getelementptr PTR_OP_ARGS(`', `%p0, i32 1') - %v1 = load PTR_OP_ARGS(` ',` %p1, align 4') - %p2 = getelementptr PTR_OP_ARGS(`', `%p0, i32 2') - %v2 = load PTR_OP_ARGS(` ',` %p2, align 4') + %v0 = load PTR_OP_ARGS(` ') %p0, align 4 + %p1 = getelementptr PTR_OP_ARGS(`') %p0, i32 1 + %v1 = load PTR_OP_ARGS(` ') %p1, align 4 + %p2 = getelementptr PTR_OP_ARGS(`') %p0, i32 2 + %v2 = load PTR_OP_ARGS(` ') %p2, align 4 call void @__aos_to_soa3_float`'WIDTH ( %v0, %v1, %v2, * %out0, * %out1, * %out2) @@ -2247,8 +2247,8 @@ define void @__soa_to_aos3_float( %v0, %v1, %v2, float * noalias %p) nounwind alwaysinline { %out0 = bitcast float * %p to * - %out1 = getelementptr PTR_OP_ARGS(`', `%out0, i32 1') - %out2 = getelementptr PTR_OP_ARGS(`', `%out0, i32 2') + %out1 = getelementptr PTR_OP_ARGS(`') %out0, i32 1 + %out2 = getelementptr PTR_OP_ARGS(`') %out0, i32 2 call void @__soa_to_aos3_float`'WIDTH ( %v0, %v1, %v2, * %out0, * %out1, * %out2) @@ -3397,9 +3397,9 @@ declare void @free(i8 *) define noalias i8 * @__new_uniform_32rt(i64 %size) { %ptr = alloca i8* %conv = trunc i64 %size to i32 - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment %call1 = call i32 @posix_memalign(i8** %ptr, i32 %alignment, i32 %conv) - %ptr_val = load PTR_OP_ARGS(`i8*',` %ptr') + %ptr_val = load PTR_OP_ARGS(`i8*') %ptr ret i8* %ptr_val } @@ -3407,15 +3407,15 @@ define @__new_varying32_32rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE - %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64', `%ret64, i32 LANE') + %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64') %ret64, i32 LANE %ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8** %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i32 %alignment, i32 %sz_LANE_ID)') - %r = load PTR_OP_ARGS(` ',` %ret') + %r = load PTR_OP_ARGS(` ') %ret ret %r } @@ -3451,10 +3451,10 @@ declare void @free(i8 *) define noalias i8 * @__new_uniform_64rt(i64 %size) { %ptr = alloca i8* - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment %alignment64 = sext i32 %alignment to i64 %call1 = call i32 @posix_memalign(i8** %ptr, i64 %alignment64, i64 %size) - %ptr_val = load PTR_OP_ARGS(`i8*',`%ptr') + %ptr_val = load PTR_OP_ARGS(`i8*') %ptr ret i8* %ptr_val } @@ -3462,17 +3462,17 @@ define @__new_varying32_64rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment %alignment64 = sext i32 %alignment to i64 per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE %sz64_LANE_ID = zext i32 %sz_LANE_ID to i64 - %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64', `%ret64, i32 LANE') + %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64') %ret64, i32 LANE %ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8** %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i64 %alignment64, i64 %sz64_LANE_ID)') - %r = load PTR_OP_ARGS(` ',` %ret') + %r = load PTR_OP_ARGS(` ') %ret ret %r } @@ -3480,16 +3480,16 @@ define @__new_varying64_64rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment %alignment64 = sext i32 %alignment to i64 per_lane(WIDTH, %mask, ` %sz64_LANE_ID = extractelement %size, i32 LANE - %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64', `%ret64, i32 LANE') + %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64') %ret64, i32 LANE %ptr_LANE_ID = bitcast i64* %store_LANE_ID to i8** %call_LANE_ID = call i32 @posix_memalign(i8** %ptr_LANE_ID, i64 %alignment64, i64 %sz64_LANE_ID)') - %r = load PTR_OP_ARGS(` ',` %ret') + %r = load PTR_OP_ARGS(` ') %ret ret %r } @@ -3533,7 +3533,7 @@ declare void @_aligned_free(i8 *) define noalias i8 * @__new_uniform_32rt(i64 %size) { %conv = trunc i64 %size to i32 - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment %ptr = tail call i8* @_aligned_malloc(i32 %conv, i32 %alignment) ret i8* %ptr } @@ -3542,16 +3542,16 @@ define @__new_varying32_32rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment per_lane(WIDTH, %mask, ` %sz_LANE_ID = extractelement %size, i32 LANE %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i32 %sz_LANE_ID, i32 %alignment) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 - %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64', `%ret64, i32 LANE') + %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64') %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') - %r = load PTR_OP_ARGS(` ',` %ret') + %r = load PTR_OP_ARGS(` ') %ret ret %r } @@ -3586,7 +3586,7 @@ declare i8* @_aligned_malloc(i64, i64) declare void @_aligned_free(i8 *) define noalias i8 * @__new_uniform_64rt(i64 %size) { - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment %alignment64 = sext i32 %alignment to i64 %ptr = tail call i8* @_aligned_malloc(i64 %size, i64 %alignment64) ret i8* %ptr @@ -3596,7 +3596,7 @@ define @__new_varying32_64rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment %alignment64 = sext i32 %alignment to i64 per_lane(WIDTH, %mask, ` @@ -3604,10 +3604,10 @@ define @__new_varying32_64rt( %size, % %sz64_LANE_ID = zext i32 %sz_LANE_ID to i64 %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i64 %sz64_LANE_ID, i64 %alignment64) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 - %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64', `%ret64, i32 LANE') + %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64') %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') - %r = load PTR_OP_ARGS(` ',` %ret') + %r = load PTR_OP_ARGS(` ') %ret ret %r } @@ -3615,17 +3615,17 @@ define @__new_varying64_64rt( %size, % %ret = alloca store zeroinitializer, * %ret %ret64 = bitcast * %ret to i64 * - %alignment = load PTR_OP_ARGS(`i32',` @memory_alignment') + %alignment = load PTR_OP_ARGS(`i32') @memory_alignment %alignment64 = sext i32 %alignment to i64 per_lane(WIDTH, %mask, ` %sz64_LANE_ID = extractelement %size, i32 LANE %ptr_LANE_ID = call noalias i8 * @_aligned_malloc(i64 %sz64_LANE_ID, i64 %alignment64) %ptr_int_LANE_ID = ptrtoint i8 * %ptr_LANE_ID to i64 - %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64', `%ret64, i32 LANE') + %store_LANE_ID = getelementptr PTR_OP_ARGS(`i64') %ret64, i32 LANE store i64 %ptr_int_LANE_ID, i64 * %store_LANE_ID') - %r = load PTR_OP_ARGS(` ',` %ret') + %r = load PTR_OP_ARGS(` ') %ret ret %r } @@ -3940,11 +3940,11 @@ define <$1 x i64> @__$2_varying_$3(<$1 x i64>, <$1 x i64>) nounwind alwaysinline %v1_`'i = extractelement <$1 x i64> %1, i32 i %c_`'i = icmp $4 i64 %v0_`'i, %v1_`'i %v_`'i = select i1 %c_`'i, i64 %v0_`'i, i64 %v1_`'i - %ptr_`'i = getelementptr PTR_OP_ARGS(`i64', `%r64ptr, i32 i') + %ptr_`'i = getelementptr PTR_OP_ARGS(`i64') %r64ptr, i32 i store i64 %v_`'i, i64 * %ptr_`'i ') - %ret = load PTR_OP_ARGS(`<$1 x i64> ',` %rptr') + %ret = load PTR_OP_ARGS(`<$1 x i64> ') %rptr ret <$1 x i64> %ret } ') @@ -3990,7 +3990,7 @@ entry: load: %ptr = bitcast i8 * %0 to * - %valall = load PTR_OP_ARGS(` ',` %ptr, align $2') + %valall = load PTR_OP_ARGS(` ') %ptr, align $2 ret %valall loop: @@ -4006,9 +4006,9 @@ load_lane: ; yes! do the load and store the result into the appropriate place in the ; allocaed memory above %ptr32 = bitcast i8 * %0 to $1 * - %lane_ptr = getelementptr PTR_OP_ARGS(`$1', `%ptr32, i32 %lane') - %val = load PTR_OP_ARGS(`$1 ',` %lane_ptr') - %store_ptr = getelementptr PTR_OP_ARGS(`$1', `%retptr32, i32 %lane') + %lane_ptr = getelementptr PTR_OP_ARGS(`$1') %ptr32, i32 %lane + %val = load PTR_OP_ARGS(`$1 ') %lane_ptr + %store_ptr = getelementptr PTR_OP_ARGS(`$1') %retptr32, i32 %lane store $1 %val, $1 * %store_ptr br label %lane_done @@ -4018,7 +4018,7 @@ lane_done: br i1 %done, label %return, label %loop return: - %r = load PTR_OP_ARGS(` ',` %retptr') + %r = load PTR_OP_ARGS(` ') %retptr ret %r } ') @@ -4032,7 +4032,7 @@ return: define(`gen_masked_store', ` define void @__masked_store_$1(* nocapture, , ) nounwind alwaysinline { per_lane(WIDTH, %2, ` - %ptr_LANE_ID = getelementptr PTR_OP_ARGS(`', `%0, i32 0, i32 LANE') + %ptr_LANE_ID = getelementptr PTR_OP_ARGS(`') %0, i32 0, i32 LANE %storeval_LANE_ID = extractelement %1, i32 LANE store $1 %storeval_LANE_ID, $1 * %ptr_LANE_ID') ret void @@ -4042,7 +4042,7 @@ define void @__masked_store_$1(* nocapture, , * nocapture, <4 x i8>, <4 x i32>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<4 x i8> ',` %0, align 1') + %old = load PTR_OP_ARGS(`<4 x i8> ') %0, align 1 %m = trunc <4 x i32> %2 to <4 x i1> %resultvec = select <4 x i1> %m, <4 x i8> %1, <4 x i8> %old @@ -4053,7 +4053,7 @@ define void @__masked_store_blend_i8(<4 x i8>* nocapture, <4 x i8>, define void @__masked_store_blend_i16(<4 x i16>* nocapture, <4 x i16>, <4 x i32>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<4 x i16> ',` %0, align 2') + %old = load PTR_OP_ARGS(`<4 x i16> ') %0, align 2 %m = trunc <4 x i32> %2 to <4 x i1> %resultvec = select <4 x i1> %m, <4 x i16> %1, <4 x i16> %old @@ -4066,7 +4066,7 @@ define void @__masked_store_blend_i16(<4 x i16>* nocapture, <4 x i16>, define(`masked_store_blend_8_16_by_4_mask64', ` define void @__masked_store_blend_i8(<4 x i8>* nocapture, <4 x i8>, <4 x i64>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<4 x i8> ',` %0, align 1') + %old = load PTR_OP_ARGS(`<4 x i8> ') %0, align 1 %m = trunc <4 x i64> %2 to <4 x i1> %resultvec = select <4 x i1> %m, <4 x i8> %1, <4 x i8> %old @@ -4077,7 +4077,7 @@ define void @__masked_store_blend_i8(<4 x i8>* nocapture, <4 x i8>, define void @__masked_store_blend_i16(<4 x i16>* nocapture, <4 x i16>, <4 x i64>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<4 x i16> ',` %0, align 2') + %old = load PTR_OP_ARGS(`<4 x i16> ') %0, align 2 %m = trunc <4 x i64> %2 to <4 x i1> %resultvec = select <4 x i1> %m, <4 x i16> %1, <4 x i16> %old @@ -4090,7 +4090,7 @@ define void @__masked_store_blend_i16(<4 x i16>* nocapture, <4 x i16>, define(`masked_store_blend_8_16_by_8', ` define void @__masked_store_blend_i8(<8 x i8>* nocapture, <8 x i8>, <8 x i32>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<8 x i8> ',` %0, align 1') + %old = load PTR_OP_ARGS(`<8 x i8> ') %0, align 1 %m = trunc <8 x i32> %2 to <8 x i1> %resultvec = select <8 x i1> %m, <8 x i8> %1, <8 x i8> %old @@ -4101,7 +4101,7 @@ define void @__masked_store_blend_i8(<8 x i8>* nocapture, <8 x i8>, define void @__masked_store_blend_i16(<8 x i16>* nocapture, <8 x i16>, <8 x i32>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<8 x i16> ',` %0, align 2') + %old = load PTR_OP_ARGS(`<8 x i16> ') %0, align 2 %m = trunc <8 x i32> %2 to <8 x i1> %resultvec = select <8 x i1> %m, <8 x i16> %1, <8 x i16> %old @@ -4115,7 +4115,7 @@ define void @__masked_store_blend_i16(<8 x i16>* nocapture, <8 x i16>, define(`masked_store_blend_8_16_by_16', ` define void @__masked_store_blend_i8(<16 x i8>* nocapture, <16 x i8>, <16 x i32>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<16 x i8> ',` %0, align 1') + %old = load PTR_OP_ARGS(`<16 x i8> ') %0, align 1 %m = trunc <16 x i32> %2 to <16 x i1> %resultvec = select <16 x i1> %m, <16 x i8> %1, <16 x i8> %old @@ -4126,7 +4126,7 @@ define void @__masked_store_blend_i8(<16 x i8>* nocapture, <16 x i8>, define void @__masked_store_blend_i16(<16 x i16>* nocapture, <16 x i16>, <16 x i32>) nounwind alwaysinline { - %old = load PTR_OP_ARGS(`<16 x i16> ',` %0, align 2') + %old = load PTR_OP_ARGS(`<16 x i16> ') %0, align 2 %m = trunc <16 x i32> %2 to <16 x i1> %resultvec = select <16 x i1> %m, <16 x i16> %1, <16 x i16> %old @@ -4167,7 +4167,7 @@ all_on: ;; everyone wants to load, so just load an entire vector width in a single ;; vector load %vecptr = bitcast i32 *%startptr to * - %vec_load = load PTR_OP_ARGS(` ',`%vecptr, align 4') + %vec_load = load PTR_OP_ARGS(` ') %vecptr, align 4 store %vec_load, * %val_ptr, align 4 ret i32 WIDTH @@ -4185,10 +4185,10 @@ loop: br i1 %do_load, label %load, label %loopend load: - %loadptr = getelementptr PTR_OP_ARGS(`i32', `%startptr, i32 %offset') - %loadval = load PTR_OP_ARGS(`i32 ',`%loadptr') + %loadptr = getelementptr PTR_OP_ARGS(`i32') %startptr, i32 %offset + %loadval = load PTR_OP_ARGS(`i32 ') %loadptr %val_ptr_i32 = bitcast * %val_ptr to i32 * - %storeptr = getelementptr PTR_OP_ARGS(`i32', `%val_ptr_i32, i32 %lane') + %storeptr = getelementptr PTR_OP_ARGS(`i32') %val_ptr_i32, i32 %lane store i32 %loadval, i32 *%storeptr %offset1 = add i32 %offset, 1 br label %loopend @@ -4237,7 +4237,7 @@ loop: store: %storeval = extractelement %vals, i32 %lane - %storeptr = getelementptr PTR_OP_ARGS(`i32', `%startptr, i32 %offset') + %storeptr = getelementptr PTR_OP_ARGS(`i32') %startptr, i32 %offset store i32 %storeval, i32 *%storeptr %offset1 = add i32 %offset, 1 br label %loopend @@ -4283,9 +4283,9 @@ loop: ;; zero or sign extending it, while zero extend is free. Also do nothing for ;; i64 MASK, as we need i64 value. ifelse(MASK, `i64', -` %storeptr = getelementptr PTR_OP_ARGS(`i32', `%startptr, MASK %offset')', +` %storeptr = getelementptr PTR_OP_ARGS(`i32') %startptr, MASK %offset', ` %offset1 = zext MASK %offset to i64 - %storeptr = getelementptr PTR_OP_ARGS(`i32', `%startptr, i64 %offset1')') + %storeptr = getelementptr PTR_OP_ARGS(`i32') %startptr, i64 %offset1') store i32 %storeval, i32 *%storeptr %mull_mask = extractelement %full_mask, i32 %i @@ -4351,7 +4351,7 @@ domixed: %castptr = bitcast <$1 x $2> * %ptr to <$1 x $4> * %castv = bitcast <$1 x $2> %v to <$1 x $4> call void @__masked_store_blend_i$6(<$1 x $4> * %castptr, <$1 x $4> %castv, <$1 x MASK> %mask) - %blendvec = load PTR_OP_ARGS(`<$1 x $2> ',` %ptr') + %blendvec = load PTR_OP_ARGS(`<$1 x $2> ') %ptr br label %check_neighbors check_neighbors: @@ -4427,7 +4427,7 @@ define <$1 x $2> @__exclusive_scan_$6(<$1 x $2> %v, %vi = bitcast <$1 x $2> %v to <$1 x i`'$3> call void @__masked_store_blend_i$3(<$1 x i`'$3> * %ptr`'$3, <$1 x i`'$3> %vi, <$1 x MASK> %mask) - %v_id = load PTR_OP_ARGS(`<$1 x $2> ',` %ptr') + %v_id = load PTR_OP_ARGS(`<$1 x $2> ') %ptr ; extract elements of the vector to use in computing the scan forloop(i, 0, eval($1-1), ` @@ -4547,12 +4547,12 @@ define @__gather32_$1( %ptrs, per_lane(WIDTH, %vecmask, ` %iptr_LANE_ID = extractelement %ptrs, i32 LANE %ptr_LANE_ID = inttoptr i32 %iptr_LANE_ID to $1 * - %val_LANE_ID = load PTR_OP_ARGS(`$1 ',` %ptr_LANE_ID') - %store_ptr_LANE_ID = getelementptr PTR_OP_ARGS(`', `%ret_ptr, i32 0, i32 LANE') + %val_LANE_ID = load PTR_OP_ARGS(`$1 ') %ptr_LANE_ID + %store_ptr_LANE_ID = getelementptr PTR_OP_ARGS(`') %ret_ptr, i32 0, i32 LANE store $1 %val_LANE_ID, $1 * %store_ptr_LANE_ID ') - %ret = load PTR_OP_ARGS(` ',` %ret_ptr') + %ret = load PTR_OP_ARGS(` ') %ret_ptr ret %ret } @@ -4563,12 +4563,12 @@ define @__gather64_$1( %ptrs, per_lane(WIDTH, %vecmask, ` %iptr_LANE_ID = extractelement %ptrs, i32 LANE %ptr_LANE_ID = inttoptr i64 %iptr_LANE_ID to $1 * - %val_LANE_ID = load PTR_OP_ARGS(`$1 ',` %ptr_LANE_ID') - %store_ptr_LANE_ID = getelementptr PTR_OP_ARGS(`', `%ret_ptr, i32 0, i32 LANE') + %val_LANE_ID = load PTR_OP_ARGS(`$1 ') %ptr_LANE_ID + %store_ptr_LANE_ID = getelementptr PTR_OP_ARGS(`') %ret_ptr, i32 0, i32 LANE store $1 %val_LANE_ID, $1 * %store_ptr_LANE_ID ') - %ret = load PTR_OP_ARGS(` ',` %ret_ptr') + %ret = load PTR_OP_ARGS(` ') %ret_ptr ret %ret } ') @@ -4587,15 +4587,15 @@ define @__gather_elt32_$1(i8 * %ptr, %offsets, i32 %o %offset64 = sext i32 %offset32 to i64 %scale64 = sext i32 %offset_scale to i64 %offset = mul i64 %offset64, %scale64 - %ptroffset = getelementptr PTR_OP_ARGS(`i8', `%ptr, i64 %offset') + %ptroffset = getelementptr PTR_OP_ARGS(`i8') %ptr, i64 %offset %delta = extractelement %offset_delta, i32 %lane %delta64 = sext i32 %delta to i64 - %finalptr = getelementptr PTR_OP_ARGS(`i8', `%ptroffset, i64 %delta64') + %finalptr = getelementptr PTR_OP_ARGS(`i8') %ptroffset, i64 %delta64 ; load value and insert into returned value %ptrcast = bitcast i8 * %finalptr to $1 * - %val = load PTR_OP_ARGS(`$1 ',`%ptrcast') + %val = load PTR_OP_ARGS(`$1 ') %ptrcast %updatedret = insertelement %ret, $1 %val, i32 %lane ret %updatedret } @@ -4609,14 +4609,14 @@ define @__gather_elt64_$1(i8 * %ptr, %offsets, i32 %o ; patterns that apply the free x86 2x/4x/8x scaling in addressing calculations %offset_scale64 = sext i32 %offset_scale to i64 %offset = mul i64 %offset64, %offset_scale64 - %ptroffset = getelementptr PTR_OP_ARGS(`i8', `%ptr, i64 %offset') + %ptroffset = getelementptr PTR_OP_ARGS(`i8') %ptr, i64 %offset %delta64 = extractelement %offset_delta, i32 %lane - %finalptr = getelementptr PTR_OP_ARGS(`i8', `%ptroffset, i64 %delta64') + %finalptr = getelementptr PTR_OP_ARGS(`i8') %ptroffset, i64 %delta64 ; load value and insert into returned value %ptrcast = bitcast i8 * %finalptr to $1 * - %val = load PTR_OP_ARGS(`$1 ',`%ptrcast') + %val = load PTR_OP_ARGS(`$1 ') %ptrcast %updatedret = insertelement %ret, $1 %val, i32 %lane ret %updatedret } @@ -4634,13 +4634,13 @@ define @__gather_factored_base_offsets32_$1(i8 * %ptr, zeroinitializer, * %offsetsPtr call void @__masked_store_blend_i32( * %offsetsPtr, %offsets, %vecmask) - %newOffsets = load PTR_OP_ARGS(` ',` %offsetsPtr') + %newOffsets = load PTR_OP_ARGS(` ') %offsetsPtr %deltaPtr = alloca store zeroinitializer, * %deltaPtr call void @__masked_store_blend_i32( * %deltaPtr, %offset_delta, %vecmask) - %newDelta = load PTR_OP_ARGS(` ',` %deltaPtr') + %newDelta = load PTR_OP_ARGS(` ') %deltaPtr %ret0 = call @__gather_elt32_$1(i8 * %ptr, %newOffsets, i32 %offset_scale, %newDelta, @@ -4665,13 +4665,13 @@ define @__gather_factored_base_offsets64_$1(i8 * %ptr, zeroinitializer, * %offsetsPtr call void @__masked_store_blend_i64( * %offsetsPtr, %offsets, %vecmask) - %newOffsets = load PTR_OP_ARGS(` ',` %offsetsPtr') + %newOffsets = load PTR_OP_ARGS(` ') %offsetsPtr %deltaPtr = alloca store zeroinitializer, * %deltaPtr call void @__masked_store_blend_i64( * %deltaPtr, %offset_delta, %vecmask) - %newDelta = load PTR_OP_ARGS(` ',` %deltaPtr') + %newDelta = load PTR_OP_ARGS(` ') %deltaPtr %ret0 = call @__gather_elt64_$1(i8 * %ptr, %newOffsets, i32 %offset_scale, %newDelta, @@ -4742,11 +4742,11 @@ define void @__scatter_elt32_$1(i8 * %ptr, %offsets, i32 %offset_s %offset64 = sext i32 %offset32 to i64 %scale64 = sext i32 %offset_scale to i64 %offset = mul i64 %offset64, %scale64 - %ptroffset = getelementptr PTR_OP_ARGS(`i8', `%ptr, i64 %offset') + %ptroffset = getelementptr PTR_OP_ARGS(`i8') %ptr, i64 %offset %delta = extractelement %offset_delta, i32 %lane %delta64 = sext i32 %delta to i64 - %finalptr = getelementptr PTR_OP_ARGS(`i8', `%ptroffset, i64 %delta64') + %finalptr = getelementptr PTR_OP_ARGS(`i8') %ptroffset, i64 %delta64 %ptrcast = bitcast i8 * %finalptr to $1 * %storeval = extractelement %values, i32 %lane @@ -4762,10 +4762,10 @@ define void @__scatter_elt64_$1(i8 * %ptr, %offsets, i32 %offset_s ; patterns that apply the free x86 2x/4x/8x scaling in addressing calculations %scale64 = sext i32 %offset_scale to i64 %offset = mul i64 %offset64, %scale64 - %ptroffset = getelementptr PTR_OP_ARGS(`i8', `%ptr, i64 %offset') + %ptroffset = getelementptr PTR_OP_ARGS(`i8') %ptr, i64 %offset %delta64 = extractelement %offset_delta, i32 %lane - %finalptr = getelementptr PTR_OP_ARGS(`i8', `%ptroffset, i64 %delta64') + %finalptr = getelementptr PTR_OP_ARGS(`i8') %ptroffset, i64 %delta64 %ptrcast = bitcast i8 * %finalptr to $1 * %storeval = extractelement %values, i32 %lane