-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[NFC][PowerPC] Cleaning up test file and removing redundant front-end test #151971
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: main
Are you sure you want to change the base?
Conversation
@llvm/pr-subscribers-backend-powerpc Author: None (Himadhith) ChangesNFC patch to clean up extra lines of code in the file Patch is 24.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151971.diff 2 Files Affected:
diff --git a/clang/test/CodeGen/PowerPC/check-zero-vector.c b/clang/test/CodeGen/PowerPC/check-zero-vector.c
deleted file mode 100644
index cb6c826641366..0000000000000
--- a/clang/test/CodeGen/PowerPC/check-zero-vector.c
+++ /dev/null
@@ -1,143 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple powerpc64-ibm-aix -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_64
-// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_64LE
-// RUN: %clang_cc1 -triple powerpc-ibm-aix -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_32
-
-// POWERPC_64-LABEL: define signext i32 @test_Greater_than(
-// POWERPC_64-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
-// POWERPC_64-NEXT: [[ENTRY:.*:]]
-// POWERPC_64-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8
-// POWERPC_64-NEXT: [[RESULT:%.*]] = alloca i16, align 2
-// POWERPC_64-NEXT: [[I:%.*]] = alloca i32, align 4
-// POWERPC_64-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8
-// POWERPC_64-NEXT: store i16 0, ptr [[RESULT]], align 2
-// POWERPC_64-NEXT: store i32 0, ptr [[I]], align 4
-// POWERPC_64-NEXT: br label %[[FOR_COND:.*]]
-// POWERPC_64: [[FOR_COND]]:
-// POWERPC_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
-// POWERPC_64-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
-// POWERPC_64: [[FOR_BODY]]:
-// POWERPC_64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8
-// POWERPC_64-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
-// POWERPC_64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]]
-// POWERPC_64-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-// POWERPC_64-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32
-// POWERPC_64-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
-// POWERPC_64-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
-// POWERPC_64: [[IF_THEN]]:
-// POWERPC_64-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_64-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1
-// POWERPC_64-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2
-// POWERPC_64-NEXT: br label %[[IF_END]]
-// POWERPC_64: [[IF_END]]:
-// POWERPC_64-NEXT: br label %[[FOR_INC:.*]]
-// POWERPC_64: [[FOR_INC]]:
-// POWERPC_64-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
-// POWERPC_64-NEXT: store i32 [[INC3]], ptr [[I]], align 4
-// POWERPC_64-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
-// POWERPC_64: [[FOR_END]]:
-// POWERPC_64-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_64-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
-// POWERPC_64-NEXT: ret i32 [[CONV4]]
-//
-// POWERPC_64LE-LABEL: define dso_local signext i32 @test_Greater_than(
-// POWERPC_64LE-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
-// POWERPC_64LE-NEXT: [[ENTRY:.*:]]
-// POWERPC_64LE-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8
-// POWERPC_64LE-NEXT: [[RESULT:%.*]] = alloca i16, align 2
-// POWERPC_64LE-NEXT: [[I:%.*]] = alloca i32, align 4
-// POWERPC_64LE-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8
-// POWERPC_64LE-NEXT: store i16 0, ptr [[RESULT]], align 2
-// POWERPC_64LE-NEXT: store i32 0, ptr [[I]], align 4
-// POWERPC_64LE-NEXT: br label %[[FOR_COND:.*]]
-// POWERPC_64LE: [[FOR_COND]]:
-// POWERPC_64LE-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64LE-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
-// POWERPC_64LE-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
-// POWERPC_64LE: [[FOR_BODY]]:
-// POWERPC_64LE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8
-// POWERPC_64LE-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64LE-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
-// POWERPC_64LE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]]
-// POWERPC_64LE-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-// POWERPC_64LE-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32
-// POWERPC_64LE-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
-// POWERPC_64LE-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
-// POWERPC_64LE: [[IF_THEN]]:
-// POWERPC_64LE-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_64LE-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1
-// POWERPC_64LE-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2
-// POWERPC_64LE-NEXT: br label %[[IF_END]]
-// POWERPC_64LE: [[IF_END]]:
-// POWERPC_64LE-NEXT: br label %[[FOR_INC:.*]]
-// POWERPC_64LE: [[FOR_INC]]:
-// POWERPC_64LE-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64LE-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
-// POWERPC_64LE-NEXT: store i32 [[INC3]], ptr [[I]], align 4
-// POWERPC_64LE-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
-// POWERPC_64LE: [[FOR_END]]:
-// POWERPC_64LE-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_64LE-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
-// POWERPC_64LE-NEXT: ret i32 [[CONV4]]
-//
-// POWERPC_32-LABEL: define i32 @test_Greater_than(
-// POWERPC_32-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
-// POWERPC_32-NEXT: [[ENTRY:.*:]]
-// POWERPC_32-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 4
-// POWERPC_32-NEXT: [[RESULT:%.*]] = alloca i16, align 2
-// POWERPC_32-NEXT: [[I:%.*]] = alloca i32, align 4
-// POWERPC_32-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 4
-// POWERPC_32-NEXT: store i16 0, ptr [[RESULT]], align 2
-// POWERPC_32-NEXT: store i32 0, ptr [[I]], align 4
-// POWERPC_32-NEXT: br label %[[FOR_COND:.*]]
-// POWERPC_32: [[FOR_COND]]:
-// POWERPC_32-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
-// POWERPC_32-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
-// POWERPC_32: [[FOR_BODY]]:
-// POWERPC_32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 4
-// POWERPC_32-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 [[TMP2]]
-// POWERPC_32-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-// POWERPC_32-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32
-// POWERPC_32-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
-// POWERPC_32-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
-// POWERPC_32: [[IF_THEN]]:
-// POWERPC_32-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_32-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1
-// POWERPC_32-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2
-// POWERPC_32-NEXT: br label %[[IF_END]]
-// POWERPC_32: [[IF_END]]:
-// POWERPC_32-NEXT: br label %[[FOR_INC:.*]]
-// POWERPC_32: [[FOR_INC]]:
-// POWERPC_32-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_32-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
-// POWERPC_32-NEXT: store i32 [[INC3]], ptr [[I]], align 4
-// POWERPC_32-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
-// POWERPC_32: [[FOR_END]]:
-// POWERPC_32-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_32-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
-// POWERPC_32-NEXT: ret i32 [[CONV4]]
-//
-int test_Greater_than(unsigned short *colauths) {
- unsigned short result = 0;
- for (int i = 0; i < 4; i++) {
- if (colauths[i] > 0) {
- result++;
- }
- }
- return result;
-}
-//.
-// POWERPC_64: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
-// POWERPC_64: [[META3]] = !{!"llvm.loop.mustprogress"}
-//.
-// POWERPC_64LE: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
-// POWERPC_64LE: [[META3]] = !{!"llvm.loop.mustprogress"}
-//.
-// POWERPC_32: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
-// POWERPC_32: [[META3]] = !{!"llvm.loop.mustprogress"}
-//.
diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
index 59173e22edf26..23371535edadd 100644
--- a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64LE
@@ -7,240 +8,87 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-ibm-aix \
; RUN: < %s | FileCheck %s --check-prefix=POWERPC_32
-define i32 @test_Greater_than(ptr %colauths, i32 signext %ncols) {
-; This testcase is manually reduced to isolate the critical code blocks.
-; It is designed to check for vector comparison specifically for zero vectors.
-; In the vector.body section, we are expecting a comparison instruction (vcmpequh),
-; merge instructions (vmrghh and vmrglh) which use exactly 2 vectors.
-; The output of the merge instruction is being used by xxland and finally
-; accumulated by vadduwm instruction.
-
+define dso_local signext range(i32 0, 5) i32 @test_Greater_than(ptr noundef readonly captures(none) %colauths) {
; POWERPC_64LE-LABEL: test_Greater_than:
-; POWERPC_64LE: .LBB0_6: # %vector.body
-; POWERPC_64LE-NEXT: #
-; POWERPC_64LE-NEXT: lxv [[R1:[0-9]+]], -64(4)
-; POWERPC_64LE-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]]
-; POWERPC_64LE-NEXT: xxlnor [[R1]], [[R1]], [[R1]]
-; POWERPC_64LE-NEXT: vmrghh [[R4:[0-9]+]], [[R2]], [[R2]]
-; POWERPC_64LE-NEXT: vmrglh [[R2]], [[R2]], [[R2]]
-; POWERPC_64LE-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]]
-; POWERPC_64LE-NEXT: xxland [[R1]], [[R1]], [[R6]]
-; POWERPC_64LE-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]]
-; POWERPC_64LE: .LBB0_10: # %vec.epilog.vector.body
-; POWERPC_64LE-NEXT: #
-; POWERPC_64LE-NEXT: lxv [[R8:[0-9]+]], 0(4)
-; POWERPC_64LE-NEXT: addi 4, 4, 16
-; POWERPC_64LE-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]]
-; POWERPC_64LE-NEXT: xxlnor [[R8]], [[R8]], [[R8]]
-; POWERPC_64LE-NEXT: vmrglh [[R11:[0-9]+]], [[R9]], [[R9]]
-; POWERPC_64LE-NEXT: vmrghh [[R9]], [[R9]], [[R9]]
-; POWERPC_64LE-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]]
-; POWERPC_64LE-NEXT: xxland [[R8]], [[R8]], [[R6]]
-; POWERPC_64LE-NEXT: vadduwm [[R7]], [[R7]], [[R9]]
-; POWERPC_64LE-NEXT: vadduwm [[R3]], [[R3]], [[R11]]
-; POWERPC_64LE-NEXT: bdnz .LBB0_10
-; POWERPC_64LE: blr
+; POWERPC_64LE: # %bb.0: # %entry
+; POWERPC_64LE-NEXT: lfd 0, 0(3)
+; POWERPC_64LE-NEXT: xxlxor 35, 35, 35
+; POWERPC_64LE-NEXT: li 4, 0
+; POWERPC_64LE-NEXT: li 3, 4
+; POWERPC_64LE-NEXT: xxswapd 34, 0
+; POWERPC_64LE-NEXT: vcmpequh 2, 2, 3
+; POWERPC_64LE-NEXT: xxlnor 34, 34, 34
+; POWERPC_64LE-NEXT: vmrglh 3, 2, 2
+; POWERPC_64LE-NEXT: vextuwrx 4, 4, 2
+; POWERPC_64LE-NEXT: vextuwrx 3, 3, 3
+; POWERPC_64LE-NEXT: clrlwi 4, 4, 31
+; POWERPC_64LE-NEXT: rlwimi 4, 3, 1, 30, 30
+; POWERPC_64LE-NEXT: mfvsrwz 3, 35
+; POWERPC_64LE-NEXT: rlwimi 4, 3, 2, 29, 29
+; POWERPC_64LE-NEXT: li 3, 12
+; POWERPC_64LE-NEXT: vextuwrx 3, 3, 3
+; POWERPC_64LE-NEXT: rlwimi 4, 3, 3, 28, 28
+; POWERPC_64LE-NEXT: stb 4, -1(1)
+; POWERPC_64LE-NEXT: lbz 3, -1(1)
+; POWERPC_64LE-NEXT: popcntd 3, 3
+; POWERPC_64LE-NEXT: blr
;
; POWERPC_64-LABEL: test_Greater_than:
-; POWERPC_64: L..BB0_6: # %vector.body
-; POWERPC_64-NEXT: #
-; POWERPC_64-NEXT: lxv [[R1:[0-9]+]], -64(4)
-; POWERPC_64-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]]
-; POWERPC_64-NEXT: xxlnor [[R1]], [[R1]], [[R1]]
-; POWERPC_64-NEXT: vmrglh [[R4:[0-9]+]], [[R2]], [[R2]]
-; POWERPC_64-NEXT: vmrghh [[R2]], [[R2]], [[R2]]
-; POWERPC_64-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]]
-; POWERPC_64-NEXT: xxland [[R1]], [[R1]], [[R6]]
-; POWERPC_64-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]]
-; POWERPC_64: L..BB0_10: # %vec.epilog.vector.body
-; POWERPC_64-NEXT: #
-; POWERPC_64-NEXT: lxv [[R8:[0-9]+]], 0(4)
-; POWERPC_64-NEXT: addi 4, 4, 16
-; POWERPC_64-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]]
-; POWERPC_64-NEXT: xxlnor [[R8]], [[R8]], [[R8]]
-; POWERPC_64-NEXT: vmrghh [[R11:[0-9]+]], [[R9]], [[R9]]
-; POWERPC_64-NEXT: vmrglh [[R9]], [[R9]], [[R9]]
-; POWERPC_64-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]]
-; POWERPC_64-NEXT: xxland [[R8]], [[R8]], [[R6]]
-; POWERPC_64-NEXT: vadduwm [[R7]], [[R7]], [[R9]]
-; POWERPC_64-NEXT: vadduwm [[R3]], [[R3]], [[R11]]
-; POWERPC_64-NEXT: bdnz L..BB0_10
-; POWERPC_64: blr
+; POWERPC_64: # %bb.0: # %entry
+; POWERPC_64-NEXT: lxsd 2, 0(3)
+; POWERPC_64-NEXT: xxlxor 35, 35, 35
+; POWERPC_64-NEXT: li 4, 12
+; POWERPC_64-NEXT: li 3, 8
+; POWERPC_64-NEXT: vcmpequh 2, 2, 3
+; POWERPC_64-NEXT: xxlnor 34, 34, 34
+; POWERPC_64-NEXT: vmrghh 2, 2, 2
+; POWERPC_64-NEXT: vextuwlx 4, 4, 2
+; POWERPC_64-NEXT: vextuwlx 3, 3, 2
+; POWERPC_64-NEXT: clrlwi 4, 4, 31
+; POWERPC_64-NEXT: rlwimi 4, 3, 1, 30, 30
+; POWERPC_64-NEXT: mfvsrwz 3, 34
+; POWERPC_64-NEXT: rlwimi 4, 3, 2, 29, 29
+; POWERPC_64-NEXT: li 3, 0
+; POWERPC_64-NEXT: vextuwlx 3, 3, 2
+; POWERPC_64-NEXT: rlwimi 4, 3, 3, 28, 28
+; POWERPC_64-NEXT: stb 4, -1(1)
+; POWERPC_64-NEXT: lbz 3, -1(1)
+; POWERPC_64-NEXT: popcntd 3, 3
+; POWERPC_64-NEXT: blr
;
; POWERPC_32-LABEL: test_Greater_than:
-; POWERPC_32: L..BB0_7: # %vector.body
-; POWERPC_32-NEXT: #
-; POWERPC_32-NEXT: lxv [[R1:[0-9]+]], 0(10)
-; POWERPC_32-NEXT: addic [[R13:[0-9]+]], [[R13]], 64
-; POWERPC_32-NEXT: addze [[R14:[0-9]+]], [[R14]]
-; POWERPC_32-NEXT: xor [[R15:[0-9]+]], [[R13]], [[R16:[0-9]+]]
-; POWERPC_32-NEXT: or. [[R15]], [[R15]], [[R14]]
-; POWERPC_32-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]]
-; POWERPC_32-NEXT: xxlnor [[R1]], [[R1]], [[R1]]
-; POWERPC_32-NEXT: vmrglh [[R4:[0-9]+]], [[R2]], [[R2]]
-; POWERPC_32-NEXT: vmrghh [[R2]], [[R2]], [[R2]]
-; POWERPC_32-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]]
-; POWERPC_32-NEXT: xxland [[R1]], [[R1]], [[R6]]
-; POWERPC_32-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]]
-; POWERPC_32: L..BB0_11: # %vec.epilog.vector.body
-; POWERPC_32-NEXT: #
-; POWERPC_32-NEXT: slwi [[R14]], [[R13]], 1
-; POWERPC_32-NEXT: addic [[R13]], [[R13]], 8
-; POWERPC_32-NEXT: addze [[R17:[0-9]+]], [[R17]]
-; POWERPC_32-NEXT: lxvx [[R8:[0-9]+]], [[R18:[0-9]+]], [[R14]]
-; POWERPC_32-NEXT: xor [[R14]], [[R13]], [[R16]]
-; POWERPC_32-NEXT: or. [[R14]], [[R14]], [[R17]]
-; POWERPC_32-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R3]]
-; POWERPC_32-NEXT: xxlnor [[R8]], [[R8]], [[R8]]
-; POWERPC_32-NEXT: vmrghh [[R11:[0-9]+]], [[R9]], [[R9]]
-; POWERPC_32-NEXT: vmrglh [[R9]], [[R9]], [[R9]]
-; POWERPC_32-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]]
-; POWERPC_32-NEXT: xxland [[R8]], [[R8]], [[R6]]
-; POWERPC_32-NEXT: vadduwm [[R7]], [[R7]], [[R9]]
-; POWERPC_32-NEXT: vadduwm [[R19:[0-9]+]], [[R19]], [[R11]]
-; POWERPC_32-NEXT: bne 0, L..BB0_11
-; POWERPC_32: blr
- entry:
- %cmp5 = icmp sgt i32 %ncols, 0
- br i1 %cmp5, label %iter.check, label %for.cond.cleanup
-
-iter.check: ; preds = %entry
- %wide.trip.count = zext nneg i32 %ncols to i64
- %min.iters.check = icmp ult i32 %ncols, 8
- br i1 %min.iters.check, label %for.body.preheader, label %vector.main.loop.iter.check
-
-for.body.preheader: ; preds = %vec.epilog.iter.check, %vec.epilog.middle.block, %iter.check
- %indvars.iv.ph = phi i64 [ 0, %iter.check ], [ %n.vec, %vec.epilog.iter.check ], [ %n.vec31, %vec.epilog.middle.block ]
- %num_cols_needed.06.ph = phi i32 [ 0, %iter.check ], [ %33, %vec.epilog.iter.check ], [ %40, %vec.epilog.middle.block ]
- br label %for.body
-
-vector.main.loop.iter.check: ; preds = %iter.check
- %min.iters.check9 = icmp ult i32 %ncols, 64
- br i1 %min.iters.check9, label %vec.epilog.ph, label %vector.ph
-
-vector.ph: ; preds = %vector.main.loop.iter.check
- %n.vec = and i64 %wide.trip.count, 2147483584
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %vec.phi = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %24, %vector.body ]
- %vec.phi10 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %25, %vector.body ]
- %vec.phi11 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %26, %vector.body ]
- %vec.phi12 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %27, %vector.body ]
- %vec.phi13 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %28, %vector.body ]
- %vec.phi14 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %29, %vector.body ]
- %vec.phi15 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %30, %vector.body ]
- %vec.phi16 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %31, %vector.body ]
- %0 = getelementptr inbounds nuw i16, ptr %colauths, i64 %index
- %1 = getelementptr inbounds nuw i8, ptr %0, i64 16
- %2 = getelementptr inbounds nuw i8, ptr %0, i64 32
- %3 = getelementptr inbounds nuw i8, ptr %0, i64 48
- %4 = getelementptr inbounds nuw i8, ptr %0, i64 64
- %5 = getelementptr inbounds nuw i8, ptr %0, i64 80
- %6 = getelementptr inbounds nuw i8, ptr %0, i64 96
- %7 = getelementptr inbounds nuw i8, ptr %0, i64 112
- %wide.load = load <8 x i16>, ptr %0, align 2, !tbaa !5
- %wide.load17 = load <8 x i16>, ptr %1, align 2, !tbaa !5
- %wide.load18 = load <8 x i16>, ptr %2, align 2, !tbaa !5
- %wide.load19 = load <8 x i16>, ptr %3, align 2, !tbaa !5
- %wide.load20 = load <8 x i16>, ptr %4, align 2, !tbaa !5
- %wide.load21 = load <8 x i16>, ptr %5, align 2, !tbaa !5
- %wide.load22 = load <8 x i16>, ptr %6, align 2, !tbaa !5
- %wide.load23 = load <8 x i16>, ptr %7, align 2, !tbaa !5
- %8 = icmp ne <8 x i16> %wide.load, zeroinitializer
- %9 = icmp ne <8 x i16> %wide.load17, zeroinitializer
- %10 = icmp ne <8 x i16> %wide.load18, zeroinitializer
- %11 = icmp ne <8 x i16> %wide.load19, zeroinitializer
- %12 = icmp ne <8 x i16> %wide.load20, zeroinitializer
- %13 = icmp ne <8 x i16> %wide.load21, zeroinitializer
- %14 = icmp ne <8 x i16> %wide.load22, zeroinitializer
- %15 = icmp ne <8 x i16> %wide.load23, zeroinitializer
- %16 = zext <8 x i1> %8 to <8 x i32>
- %17 = zext <8 x i1> %9 to <8 x i32>
- %18 = zext <8 x i1> %10 to <8 x i32>
- %19 = zext <8 x i1> %11 to <8 x i32>
- %20 = zext <8 x i1> %12 to <8 x i32>
- %21 = zext <8 x i1> %13 to <8 x i32>
- %22 = zext <8 x i1> %14 to <8 x i32>
- %23 = zext <8 x i1> %15 to <8 x i32>
- %24 = add <8 x i32> %vec.phi, %16
- %25 = add <8 x i32> %vec.phi10, %17
- %26 = add <8 x i32> %vec.phi11, %18
- %27 = add <8 x i32> %vec.phi12, %19
- %28 = add <8 x i32> %vec.phi13, %20
- %29 = add <8 x i32> %vec.phi14, %21
- %30 = add <8 x i32> %vec.phi15, %22
- %31 = add <8 x i32> %vec.phi16, %23
- %index.next = add nuw i64 %index, 64
- %32 = icmp eq i64 %index.next, %n.vec
- br i1 %32, label %middle.block, label %vector.body, !llvm.loop !9
-
-middle.block: ; preds = %vector.body
- %bin.rdx = add <8 x i32> %25, %24
- %bin.rdx24 = add <8 x i32> %26, %bin.rdx
- %bin.rdx25 = add <8 x i32> %27, %bin.rdx24
- %bin.rdx26 = add <8 x i32> %28, %bin.rdx25
- %bin.rdx27 = add <8 x i32> %29, %bin.rdx26
- %bin.rdx28 = add <8 x i32> %30, %bin.rdx27
- %bin.rdx29 = add <8 x i32> %31, %bin.rdx28
- %33 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %bin.rdx29)
- %cmp.n = icmp eq i64 %n.vec, %wide.trip.count
- br i1 %cmp.n, label %for.cond.cleanup, label %vec.epilog.iter.check
-
-vec.epilog.iter.check: ; preds = %middle.block
- %n.vec.remaining = and i64 %wide.trip.count, 56
- %min...
[truncated]
|
@llvm/pr-subscribers-clang Author: None (Himadhith) ChangesNFC patch to clean up extra lines of code in the file Patch is 24.07 KiB, truncated to 20.00 KiB below, full version: https://github.com/llvm/llvm-project/pull/151971.diff 2 Files Affected:
diff --git a/clang/test/CodeGen/PowerPC/check-zero-vector.c b/clang/test/CodeGen/PowerPC/check-zero-vector.c
deleted file mode 100644
index cb6c826641366..0000000000000
--- a/clang/test/CodeGen/PowerPC/check-zero-vector.c
+++ /dev/null
@@ -1,143 +0,0 @@
-// NOTE: Assertions have been autogenerated by utils/update_cc_test_checks.py UTC_ARGS: --version 5
-// RUN: %clang_cc1 -triple powerpc64-ibm-aix -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_64
-// RUN: %clang_cc1 -triple powerpc64le-unknown-linux-gnu -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_64LE
-// RUN: %clang_cc1 -triple powerpc-ibm-aix -emit-llvm %s -o - | FileCheck %s --check-prefix=POWERPC_32
-
-// POWERPC_64-LABEL: define signext i32 @test_Greater_than(
-// POWERPC_64-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
-// POWERPC_64-NEXT: [[ENTRY:.*:]]
-// POWERPC_64-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8
-// POWERPC_64-NEXT: [[RESULT:%.*]] = alloca i16, align 2
-// POWERPC_64-NEXT: [[I:%.*]] = alloca i32, align 4
-// POWERPC_64-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8
-// POWERPC_64-NEXT: store i16 0, ptr [[RESULT]], align 2
-// POWERPC_64-NEXT: store i32 0, ptr [[I]], align 4
-// POWERPC_64-NEXT: br label %[[FOR_COND:.*]]
-// POWERPC_64: [[FOR_COND]]:
-// POWERPC_64-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
-// POWERPC_64-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
-// POWERPC_64: [[FOR_BODY]]:
-// POWERPC_64-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8
-// POWERPC_64-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
-// POWERPC_64-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]]
-// POWERPC_64-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-// POWERPC_64-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32
-// POWERPC_64-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
-// POWERPC_64-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
-// POWERPC_64: [[IF_THEN]]:
-// POWERPC_64-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_64-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1
-// POWERPC_64-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2
-// POWERPC_64-NEXT: br label %[[IF_END]]
-// POWERPC_64: [[IF_END]]:
-// POWERPC_64-NEXT: br label %[[FOR_INC:.*]]
-// POWERPC_64: [[FOR_INC]]:
-// POWERPC_64-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
-// POWERPC_64-NEXT: store i32 [[INC3]], ptr [[I]], align 4
-// POWERPC_64-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
-// POWERPC_64: [[FOR_END]]:
-// POWERPC_64-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_64-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
-// POWERPC_64-NEXT: ret i32 [[CONV4]]
-//
-// POWERPC_64LE-LABEL: define dso_local signext i32 @test_Greater_than(
-// POWERPC_64LE-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
-// POWERPC_64LE-NEXT: [[ENTRY:.*:]]
-// POWERPC_64LE-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 8
-// POWERPC_64LE-NEXT: [[RESULT:%.*]] = alloca i16, align 2
-// POWERPC_64LE-NEXT: [[I:%.*]] = alloca i32, align 4
-// POWERPC_64LE-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 8
-// POWERPC_64LE-NEXT: store i16 0, ptr [[RESULT]], align 2
-// POWERPC_64LE-NEXT: store i32 0, ptr [[I]], align 4
-// POWERPC_64LE-NEXT: br label %[[FOR_COND:.*]]
-// POWERPC_64LE: [[FOR_COND]]:
-// POWERPC_64LE-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64LE-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
-// POWERPC_64LE-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
-// POWERPC_64LE: [[FOR_BODY]]:
-// POWERPC_64LE-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 8
-// POWERPC_64LE-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64LE-NEXT: [[IDXPROM:%.*]] = sext i32 [[TMP2]] to i64
-// POWERPC_64LE-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i64 [[IDXPROM]]
-// POWERPC_64LE-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-// POWERPC_64LE-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32
-// POWERPC_64LE-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
-// POWERPC_64LE-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
-// POWERPC_64LE: [[IF_THEN]]:
-// POWERPC_64LE-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_64LE-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1
-// POWERPC_64LE-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2
-// POWERPC_64LE-NEXT: br label %[[IF_END]]
-// POWERPC_64LE: [[IF_END]]:
-// POWERPC_64LE-NEXT: br label %[[FOR_INC:.*]]
-// POWERPC_64LE: [[FOR_INC]]:
-// POWERPC_64LE-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_64LE-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
-// POWERPC_64LE-NEXT: store i32 [[INC3]], ptr [[I]], align 4
-// POWERPC_64LE-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
-// POWERPC_64LE: [[FOR_END]]:
-// POWERPC_64LE-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_64LE-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
-// POWERPC_64LE-NEXT: ret i32 [[CONV4]]
-//
-// POWERPC_32-LABEL: define i32 @test_Greater_than(
-// POWERPC_32-SAME: ptr noundef [[COLAUTHS:%.*]]) #[[ATTR0:[0-9]+]] {
-// POWERPC_32-NEXT: [[ENTRY:.*:]]
-// POWERPC_32-NEXT: [[COLAUTHS_ADDR:%.*]] = alloca ptr, align 4
-// POWERPC_32-NEXT: [[RESULT:%.*]] = alloca i16, align 2
-// POWERPC_32-NEXT: [[I:%.*]] = alloca i32, align 4
-// POWERPC_32-NEXT: store ptr [[COLAUTHS]], ptr [[COLAUTHS_ADDR]], align 4
-// POWERPC_32-NEXT: store i16 0, ptr [[RESULT]], align 2
-// POWERPC_32-NEXT: store i32 0, ptr [[I]], align 4
-// POWERPC_32-NEXT: br label %[[FOR_COND:.*]]
-// POWERPC_32: [[FOR_COND]]:
-// POWERPC_32-NEXT: [[TMP0:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_32-NEXT: [[CMP:%.*]] = icmp slt i32 [[TMP0]], 4
-// POWERPC_32-NEXT: br i1 [[CMP]], label %[[FOR_BODY:.*]], label %[[FOR_END:.*]]
-// POWERPC_32: [[FOR_BODY]]:
-// POWERPC_32-NEXT: [[TMP1:%.*]] = load ptr, ptr [[COLAUTHS_ADDR]], align 4
-// POWERPC_32-NEXT: [[TMP2:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_32-NEXT: [[ARRAYIDX:%.*]] = getelementptr inbounds i16, ptr [[TMP1]], i32 [[TMP2]]
-// POWERPC_32-NEXT: [[TMP3:%.*]] = load i16, ptr [[ARRAYIDX]], align 2
-// POWERPC_32-NEXT: [[CONV:%.*]] = zext i16 [[TMP3]] to i32
-// POWERPC_32-NEXT: [[CMP1:%.*]] = icmp sgt i32 [[CONV]], 0
-// POWERPC_32-NEXT: br i1 [[CMP1]], label %[[IF_THEN:.*]], label %[[IF_END:.*]]
-// POWERPC_32: [[IF_THEN]]:
-// POWERPC_32-NEXT: [[TMP4:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_32-NEXT: [[INC:%.*]] = add i16 [[TMP4]], 1
-// POWERPC_32-NEXT: store i16 [[INC]], ptr [[RESULT]], align 2
-// POWERPC_32-NEXT: br label %[[IF_END]]
-// POWERPC_32: [[IF_END]]:
-// POWERPC_32-NEXT: br label %[[FOR_INC:.*]]
-// POWERPC_32: [[FOR_INC]]:
-// POWERPC_32-NEXT: [[TMP5:%.*]] = load i32, ptr [[I]], align 4
-// POWERPC_32-NEXT: [[INC3:%.*]] = add nsw i32 [[TMP5]], 1
-// POWERPC_32-NEXT: store i32 [[INC3]], ptr [[I]], align 4
-// POWERPC_32-NEXT: br label %[[FOR_COND]], !llvm.loop [[LOOP2:![0-9]+]]
-// POWERPC_32: [[FOR_END]]:
-// POWERPC_32-NEXT: [[TMP6:%.*]] = load i16, ptr [[RESULT]], align 2
-// POWERPC_32-NEXT: [[CONV4:%.*]] = zext i16 [[TMP6]] to i32
-// POWERPC_32-NEXT: ret i32 [[CONV4]]
-//
-int test_Greater_than(unsigned short *colauths) {
- unsigned short result = 0;
- for (int i = 0; i < 4; i++) {
- if (colauths[i] > 0) {
- result++;
- }
- }
- return result;
-}
-//.
-// POWERPC_64: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
-// POWERPC_64: [[META3]] = !{!"llvm.loop.mustprogress"}
-//.
-// POWERPC_64LE: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
-// POWERPC_64LE: [[META3]] = !{!"llvm.loop.mustprogress"}
-//.
-// POWERPC_32: [[LOOP2]] = distinct !{[[LOOP2]], [[META3:![0-9]+]]}
-// POWERPC_32: [[META3]] = !{!"llvm.loop.mustprogress"}
-//.
diff --git a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
index 59173e22edf26..23371535edadd 100644
--- a/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
+++ b/llvm/test/CodeGen/PowerPC/check-zero-vector.ll
@@ -1,3 +1,4 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc64le-unknown-linux-gnu \
; RUN: < %s | FileCheck %s --check-prefix=POWERPC_64LE
@@ -7,240 +8,87 @@
; RUN: llc -verify-machineinstrs -mcpu=pwr9 -mtriple=powerpc-ibm-aix \
; RUN: < %s | FileCheck %s --check-prefix=POWERPC_32
-define i32 @test_Greater_than(ptr %colauths, i32 signext %ncols) {
-; This testcase is manually reduced to isolate the critical code blocks.
-; It is designed to check for vector comparison specifically for zero vectors.
-; In the vector.body section, we are expecting a comparison instruction (vcmpequh),
-; merge instructions (vmrghh and vmrglh) which use exactly 2 vectors.
-; The output of the merge instruction is being used by xxland and finally
-; accumulated by vadduwm instruction.
-
+define dso_local signext range(i32 0, 5) i32 @test_Greater_than(ptr noundef readonly captures(none) %colauths) {
; POWERPC_64LE-LABEL: test_Greater_than:
-; POWERPC_64LE: .LBB0_6: # %vector.body
-; POWERPC_64LE-NEXT: #
-; POWERPC_64LE-NEXT: lxv [[R1:[0-9]+]], -64(4)
-; POWERPC_64LE-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]]
-; POWERPC_64LE-NEXT: xxlnor [[R1]], [[R1]], [[R1]]
-; POWERPC_64LE-NEXT: vmrghh [[R4:[0-9]+]], [[R2]], [[R2]]
-; POWERPC_64LE-NEXT: vmrglh [[R2]], [[R2]], [[R2]]
-; POWERPC_64LE-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]]
-; POWERPC_64LE-NEXT: xxland [[R1]], [[R1]], [[R6]]
-; POWERPC_64LE-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]]
-; POWERPC_64LE: .LBB0_10: # %vec.epilog.vector.body
-; POWERPC_64LE-NEXT: #
-; POWERPC_64LE-NEXT: lxv [[R8:[0-9]+]], 0(4)
-; POWERPC_64LE-NEXT: addi 4, 4, 16
-; POWERPC_64LE-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]]
-; POWERPC_64LE-NEXT: xxlnor [[R8]], [[R8]], [[R8]]
-; POWERPC_64LE-NEXT: vmrglh [[R11:[0-9]+]], [[R9]], [[R9]]
-; POWERPC_64LE-NEXT: vmrghh [[R9]], [[R9]], [[R9]]
-; POWERPC_64LE-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]]
-; POWERPC_64LE-NEXT: xxland [[R8]], [[R8]], [[R6]]
-; POWERPC_64LE-NEXT: vadduwm [[R7]], [[R7]], [[R9]]
-; POWERPC_64LE-NEXT: vadduwm [[R3]], [[R3]], [[R11]]
-; POWERPC_64LE-NEXT: bdnz .LBB0_10
-; POWERPC_64LE: blr
+; POWERPC_64LE: # %bb.0: # %entry
+; POWERPC_64LE-NEXT: lfd 0, 0(3)
+; POWERPC_64LE-NEXT: xxlxor 35, 35, 35
+; POWERPC_64LE-NEXT: li 4, 0
+; POWERPC_64LE-NEXT: li 3, 4
+; POWERPC_64LE-NEXT: xxswapd 34, 0
+; POWERPC_64LE-NEXT: vcmpequh 2, 2, 3
+; POWERPC_64LE-NEXT: xxlnor 34, 34, 34
+; POWERPC_64LE-NEXT: vmrglh 3, 2, 2
+; POWERPC_64LE-NEXT: vextuwrx 4, 4, 2
+; POWERPC_64LE-NEXT: vextuwrx 3, 3, 3
+; POWERPC_64LE-NEXT: clrlwi 4, 4, 31
+; POWERPC_64LE-NEXT: rlwimi 4, 3, 1, 30, 30
+; POWERPC_64LE-NEXT: mfvsrwz 3, 35
+; POWERPC_64LE-NEXT: rlwimi 4, 3, 2, 29, 29
+; POWERPC_64LE-NEXT: li 3, 12
+; POWERPC_64LE-NEXT: vextuwrx 3, 3, 3
+; POWERPC_64LE-NEXT: rlwimi 4, 3, 3, 28, 28
+; POWERPC_64LE-NEXT: stb 4, -1(1)
+; POWERPC_64LE-NEXT: lbz 3, -1(1)
+; POWERPC_64LE-NEXT: popcntd 3, 3
+; POWERPC_64LE-NEXT: blr
;
; POWERPC_64-LABEL: test_Greater_than:
-; POWERPC_64: L..BB0_6: # %vector.body
-; POWERPC_64-NEXT: #
-; POWERPC_64-NEXT: lxv [[R1:[0-9]+]], -64(4)
-; POWERPC_64-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]]
-; POWERPC_64-NEXT: xxlnor [[R1]], [[R1]], [[R1]]
-; POWERPC_64-NEXT: vmrglh [[R4:[0-9]+]], [[R2]], [[R2]]
-; POWERPC_64-NEXT: vmrghh [[R2]], [[R2]], [[R2]]
-; POWERPC_64-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]]
-; POWERPC_64-NEXT: xxland [[R1]], [[R1]], [[R6]]
-; POWERPC_64-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]]
-; POWERPC_64: L..BB0_10: # %vec.epilog.vector.body
-; POWERPC_64-NEXT: #
-; POWERPC_64-NEXT: lxv [[R8:[0-9]+]], 0(4)
-; POWERPC_64-NEXT: addi 4, 4, 16
-; POWERPC_64-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R10:[0-9]+]]
-; POWERPC_64-NEXT: xxlnor [[R8]], [[R8]], [[R8]]
-; POWERPC_64-NEXT: vmrghh [[R11:[0-9]+]], [[R9]], [[R9]]
-; POWERPC_64-NEXT: vmrglh [[R9]], [[R9]], [[R9]]
-; POWERPC_64-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]]
-; POWERPC_64-NEXT: xxland [[R8]], [[R8]], [[R6]]
-; POWERPC_64-NEXT: vadduwm [[R7]], [[R7]], [[R9]]
-; POWERPC_64-NEXT: vadduwm [[R3]], [[R3]], [[R11]]
-; POWERPC_64-NEXT: bdnz L..BB0_10
-; POWERPC_64: blr
+; POWERPC_64: # %bb.0: # %entry
+; POWERPC_64-NEXT: lxsd 2, 0(3)
+; POWERPC_64-NEXT: xxlxor 35, 35, 35
+; POWERPC_64-NEXT: li 4, 12
+; POWERPC_64-NEXT: li 3, 8
+; POWERPC_64-NEXT: vcmpequh 2, 2, 3
+; POWERPC_64-NEXT: xxlnor 34, 34, 34
+; POWERPC_64-NEXT: vmrghh 2, 2, 2
+; POWERPC_64-NEXT: vextuwlx 4, 4, 2
+; POWERPC_64-NEXT: vextuwlx 3, 3, 2
+; POWERPC_64-NEXT: clrlwi 4, 4, 31
+; POWERPC_64-NEXT: rlwimi 4, 3, 1, 30, 30
+; POWERPC_64-NEXT: mfvsrwz 3, 34
+; POWERPC_64-NEXT: rlwimi 4, 3, 2, 29, 29
+; POWERPC_64-NEXT: li 3, 0
+; POWERPC_64-NEXT: vextuwlx 3, 3, 2
+; POWERPC_64-NEXT: rlwimi 4, 3, 3, 28, 28
+; POWERPC_64-NEXT: stb 4, -1(1)
+; POWERPC_64-NEXT: lbz 3, -1(1)
+; POWERPC_64-NEXT: popcntd 3, 3
+; POWERPC_64-NEXT: blr
;
; POWERPC_32-LABEL: test_Greater_than:
-; POWERPC_32: L..BB0_7: # %vector.body
-; POWERPC_32-NEXT: #
-; POWERPC_32-NEXT: lxv [[R1:[0-9]+]], 0(10)
-; POWERPC_32-NEXT: addic [[R13:[0-9]+]], [[R13]], 64
-; POWERPC_32-NEXT: addze [[R14:[0-9]+]], [[R14]]
-; POWERPC_32-NEXT: xor [[R15:[0-9]+]], [[R13]], [[R16:[0-9]+]]
-; POWERPC_32-NEXT: or. [[R15]], [[R15]], [[R14]]
-; POWERPC_32-NEXT: vcmpequh [[R2:[0-9]+]], [[R2]], [[R3:[0-9]+]]
-; POWERPC_32-NEXT: xxlnor [[R1]], [[R1]], [[R1]]
-; POWERPC_32-NEXT: vmrglh [[R4:[0-9]+]], [[R2]], [[R2]]
-; POWERPC_32-NEXT: vmrghh [[R2]], [[R2]], [[R2]]
-; POWERPC_32-NEXT: xxland [[R5:[0-9]+]], [[R5]], [[R6:[0-9]+]]
-; POWERPC_32-NEXT: xxland [[R1]], [[R1]], [[R6]]
-; POWERPC_32-NEXT: vadduwm [[R7:[0-9]+]], [[R7]], [[R4]]
-; POWERPC_32: L..BB0_11: # %vec.epilog.vector.body
-; POWERPC_32-NEXT: #
-; POWERPC_32-NEXT: slwi [[R14]], [[R13]], 1
-; POWERPC_32-NEXT: addic [[R13]], [[R13]], 8
-; POWERPC_32-NEXT: addze [[R17:[0-9]+]], [[R17]]
-; POWERPC_32-NEXT: lxvx [[R8:[0-9]+]], [[R18:[0-9]+]], [[R14]]
-; POWERPC_32-NEXT: xor [[R14]], [[R13]], [[R16]]
-; POWERPC_32-NEXT: or. [[R14]], [[R14]], [[R17]]
-; POWERPC_32-NEXT: vcmpequh [[R9:[0-9]+]], [[R9]], [[R3]]
-; POWERPC_32-NEXT: xxlnor [[R8]], [[R8]], [[R8]]
-; POWERPC_32-NEXT: vmrghh [[R11:[0-9]+]], [[R9]], [[R9]]
-; POWERPC_32-NEXT: vmrglh [[R9]], [[R9]], [[R9]]
-; POWERPC_32-NEXT: xxland [[R12:[0-9]+]], [[R12]], [[R6]]
-; POWERPC_32-NEXT: xxland [[R8]], [[R8]], [[R6]]
-; POWERPC_32-NEXT: vadduwm [[R7]], [[R7]], [[R9]]
-; POWERPC_32-NEXT: vadduwm [[R19:[0-9]+]], [[R19]], [[R11]]
-; POWERPC_32-NEXT: bne 0, L..BB0_11
-; POWERPC_32: blr
- entry:
- %cmp5 = icmp sgt i32 %ncols, 0
- br i1 %cmp5, label %iter.check, label %for.cond.cleanup
-
-iter.check: ; preds = %entry
- %wide.trip.count = zext nneg i32 %ncols to i64
- %min.iters.check = icmp ult i32 %ncols, 8
- br i1 %min.iters.check, label %for.body.preheader, label %vector.main.loop.iter.check
-
-for.body.preheader: ; preds = %vec.epilog.iter.check, %vec.epilog.middle.block, %iter.check
- %indvars.iv.ph = phi i64 [ 0, %iter.check ], [ %n.vec, %vec.epilog.iter.check ], [ %n.vec31, %vec.epilog.middle.block ]
- %num_cols_needed.06.ph = phi i32 [ 0, %iter.check ], [ %33, %vec.epilog.iter.check ], [ %40, %vec.epilog.middle.block ]
- br label %for.body
-
-vector.main.loop.iter.check: ; preds = %iter.check
- %min.iters.check9 = icmp ult i32 %ncols, 64
- br i1 %min.iters.check9, label %vec.epilog.ph, label %vector.ph
-
-vector.ph: ; preds = %vector.main.loop.iter.check
- %n.vec = and i64 %wide.trip.count, 2147483584
- br label %vector.body
-
-vector.body: ; preds = %vector.body, %vector.ph
- %index = phi i64 [ 0, %vector.ph ], [ %index.next, %vector.body ]
- %vec.phi = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %24, %vector.body ]
- %vec.phi10 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %25, %vector.body ]
- %vec.phi11 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %26, %vector.body ]
- %vec.phi12 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %27, %vector.body ]
- %vec.phi13 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %28, %vector.body ]
- %vec.phi14 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %29, %vector.body ]
- %vec.phi15 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %30, %vector.body ]
- %vec.phi16 = phi <8 x i32> [ zeroinitializer, %vector.ph ], [ %31, %vector.body ]
- %0 = getelementptr inbounds nuw i16, ptr %colauths, i64 %index
- %1 = getelementptr inbounds nuw i8, ptr %0, i64 16
- %2 = getelementptr inbounds nuw i8, ptr %0, i64 32
- %3 = getelementptr inbounds nuw i8, ptr %0, i64 48
- %4 = getelementptr inbounds nuw i8, ptr %0, i64 64
- %5 = getelementptr inbounds nuw i8, ptr %0, i64 80
- %6 = getelementptr inbounds nuw i8, ptr %0, i64 96
- %7 = getelementptr inbounds nuw i8, ptr %0, i64 112
- %wide.load = load <8 x i16>, ptr %0, align 2, !tbaa !5
- %wide.load17 = load <8 x i16>, ptr %1, align 2, !tbaa !5
- %wide.load18 = load <8 x i16>, ptr %2, align 2, !tbaa !5
- %wide.load19 = load <8 x i16>, ptr %3, align 2, !tbaa !5
- %wide.load20 = load <8 x i16>, ptr %4, align 2, !tbaa !5
- %wide.load21 = load <8 x i16>, ptr %5, align 2, !tbaa !5
- %wide.load22 = load <8 x i16>, ptr %6, align 2, !tbaa !5
- %wide.load23 = load <8 x i16>, ptr %7, align 2, !tbaa !5
- %8 = icmp ne <8 x i16> %wide.load, zeroinitializer
- %9 = icmp ne <8 x i16> %wide.load17, zeroinitializer
- %10 = icmp ne <8 x i16> %wide.load18, zeroinitializer
- %11 = icmp ne <8 x i16> %wide.load19, zeroinitializer
- %12 = icmp ne <8 x i16> %wide.load20, zeroinitializer
- %13 = icmp ne <8 x i16> %wide.load21, zeroinitializer
- %14 = icmp ne <8 x i16> %wide.load22, zeroinitializer
- %15 = icmp ne <8 x i16> %wide.load23, zeroinitializer
- %16 = zext <8 x i1> %8 to <8 x i32>
- %17 = zext <8 x i1> %9 to <8 x i32>
- %18 = zext <8 x i1> %10 to <8 x i32>
- %19 = zext <8 x i1> %11 to <8 x i32>
- %20 = zext <8 x i1> %12 to <8 x i32>
- %21 = zext <8 x i1> %13 to <8 x i32>
- %22 = zext <8 x i1> %14 to <8 x i32>
- %23 = zext <8 x i1> %15 to <8 x i32>
- %24 = add <8 x i32> %vec.phi, %16
- %25 = add <8 x i32> %vec.phi10, %17
- %26 = add <8 x i32> %vec.phi11, %18
- %27 = add <8 x i32> %vec.phi12, %19
- %28 = add <8 x i32> %vec.phi13, %20
- %29 = add <8 x i32> %vec.phi14, %21
- %30 = add <8 x i32> %vec.phi15, %22
- %31 = add <8 x i32> %vec.phi16, %23
- %index.next = add nuw i64 %index, 64
- %32 = icmp eq i64 %index.next, %n.vec
- br i1 %32, label %middle.block, label %vector.body, !llvm.loop !9
-
-middle.block: ; preds = %vector.body
- %bin.rdx = add <8 x i32> %25, %24
- %bin.rdx24 = add <8 x i32> %26, %bin.rdx
- %bin.rdx25 = add <8 x i32> %27, %bin.rdx24
- %bin.rdx26 = add <8 x i32> %28, %bin.rdx25
- %bin.rdx27 = add <8 x i32> %29, %bin.rdx26
- %bin.rdx28 = add <8 x i32> %30, %bin.rdx27
- %bin.rdx29 = add <8 x i32> %31, %bin.rdx28
- %33 = tail call i32 @llvm.vector.reduce.add.v8i32(<8 x i32> %bin.rdx29)
- %cmp.n = icmp eq i64 %n.vec, %wide.trip.count
- br i1 %cmp.n, label %for.cond.cleanup, label %vec.epilog.iter.check
-
-vec.epilog.iter.check: ; preds = %middle.block
- %n.vec.remaining = and i64 %wide.trip.count, 56
- %min...
[truncated]
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with minor comments to be addressed.
|
||
define i32 @test_Greater_than(ptr %colauths) { | ||
; This testcase is for the special case of zero-vector comparisions. | ||
; Currently the generated code does a comparision (vcmpequh) and then a negation (xxlnor). |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
nit: comparison
NFC patch to clean up extra lines of code in the file
llvm/test/CodeGen/PowerPC/check-zero-vector.ll
as the current one has loop unrolled.Also removing the file
clang/test/CodeGen/PowerPC/check-zero-vector.c
as the patch affects only the backend.