Skip to content

Commit 8843515

Browse files
author
Kai Luo
committed
[PowerPC] Fix MI peephole optimization for splats
Summary: This patch fixes an issue where the PPC MI peephole optimization pass incorrectly remove a vector swap. Specifically, the pass can combine a splat/swap to a splat/copy. It uses `TargetRegisterInfo::lookThruCopyLike` to determine that the operands to the splat are the same. However, the current logic only compares the operands based on register numbers. In the case where the splat operands are ultimately feed from the same physical register, the pass can incorrectly remove a swap if the feed register for one of the operands has been clobbered. This patch adds a check to ensure that the registers feeding are both virtual registers or the operands to the splat or swap are both the same register. Here is an example in pseudo-MIR of what happens in the test cased added in this patch: Before PPC MI peephole optimization: ``` %arg = XVADDDP %0, %1 $f1 = COPY %arg.sub_64 call double rint(double) %res.first = COPY $f1 %vec.res.first = SUBREG_TO_REG 1, %res.first, %subreg.sub_64 %arg.swapped = XXPERMDI %arg, %arg, 2 $f1 = COPY %arg.swapped.sub_64 call double rint(double) %res.second = COPY $f1 %vec.res.second = SUBREG_TO_REG 1, %res.second, %subreg.sub_64 %vec.res.splat = XXPERMDI %vec.res.first, %vec.res.second, 0 %vec.res = XXPERMDI %vec.res.splat, %vec.res.splat, 2 ; %vec.res == [ %vec.res.second[0], %vec.res.first[0] ] ``` After optimization: ``` ; ... %vec.res.splat = XXPERMDI %vec.res.first, %vec.res.second, 0 ; lookThruCopyLike(%vec.res.first) == lookThruCopyLike(%vec.res.second) == $f1 ; so the pass replaces the swap with a copy: %vec.res = COPY %vec.res.splat ; %vec.res == [ %vec.res.first[0], %vec.res.second[0] ] ``` As best as I can tell, this has occurred since r288152, which added support for lowering certain vector operations to direct moves in the form of a splat. Committed for vddvss (Colin Samples). Thanks Colin for the patch! Differential Revision: https://reviews.llvm.org/D69497
1 parent edf6717 commit 8843515

File tree

3 files changed

+136
-55
lines changed

3 files changed

+136
-55
lines changed

llvm/lib/Target/PowerPC/PPCMIPeephole.cpp

Lines changed: 20 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -382,13 +382,23 @@ bool PPCMIPeephole::simplifyCode(void) {
382382
// If this is a splat or a swap fed by another splat, we
383383
// can replace it with a copy.
384384
if (DefOpc == PPC::XXPERMDI) {
385-
unsigned FeedImmed = DefMI->getOperand(3).getImm();
386-
unsigned FeedReg1 =
387-
TRI->lookThruCopyLike(DefMI->getOperand(1).getReg(), MRI);
388-
unsigned FeedReg2 =
389-
TRI->lookThruCopyLike(DefMI->getOperand(2).getReg(), MRI);
385+
unsigned DefReg1 = DefMI->getOperand(1).getReg();
386+
unsigned DefReg2 = DefMI->getOperand(2).getReg();
387+
unsigned DefImmed = DefMI->getOperand(3).getImm();
388+
389+
// If the two inputs are not the same register, check to see if
390+
// they originate from the same virtual register after only
391+
// copy-like instructions.
392+
if (DefReg1 != DefReg2) {
393+
unsigned FeedReg1 = TRI->lookThruCopyLike(DefReg1, MRI);
394+
unsigned FeedReg2 = TRI->lookThruCopyLike(DefReg2, MRI);
395+
396+
if (FeedReg1 != FeedReg2 ||
397+
Register::isPhysicalRegister(FeedReg1))
398+
break;
399+
}
390400

391-
if ((FeedImmed == 0 || FeedImmed == 3) && FeedReg1 == FeedReg2) {
401+
if (DefImmed == 0 || DefImmed == 3) {
392402
LLVM_DEBUG(dbgs() << "Optimizing splat/swap or splat/splat "
393403
"to splat/copy: ");
394404
LLVM_DEBUG(MI.dump());
@@ -402,19 +412,18 @@ bool PPCMIPeephole::simplifyCode(void) {
402412
// If this is a splat fed by a swap, we can simplify modify
403413
// the splat to splat the other value from the swap's input
404414
// parameter.
405-
else if ((Immed == 0 || Immed == 3)
406-
&& FeedImmed == 2 && FeedReg1 == FeedReg2) {
415+
else if ((Immed == 0 || Immed == 3) && DefImmed == 2) {
407416
LLVM_DEBUG(dbgs() << "Optimizing swap/splat => splat: ");
408417
LLVM_DEBUG(MI.dump());
409-
MI.getOperand(1).setReg(DefMI->getOperand(1).getReg());
410-
MI.getOperand(2).setReg(DefMI->getOperand(2).getReg());
418+
MI.getOperand(1).setReg(DefReg1);
419+
MI.getOperand(2).setReg(DefReg2);
411420
MI.getOperand(3).setImm(3 - Immed);
412421
Simplified = true;
413422
}
414423

415424
// If this is a swap fed by a swap, we can replace it
416425
// with a copy from the first swap's input.
417-
else if (Immed == 2 && FeedImmed == 2 && FeedReg1 == FeedReg2) {
426+
else if (Immed == 2 && DefImmed == 2) {
418427
LLVM_DEBUG(dbgs() << "Optimizing swap/swap => copy: ");
419428
LLVM_DEBUG(MI.dump());
420429
BuildMI(MBB, &MI, MI.getDebugLoc(), TII->get(PPC::COPY),
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu < %s \
2+
; RUN: | FileCheck --check-prefix=CHECK-LE %s
3+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mattr=+vsx < %s \
4+
; RUN: | FileCheck --check-prefix=CHECK-BE %s
5+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64le-linux-gnu -mcpu=pwr9 < %s \
6+
; RUN: | FileCheck --check-prefix=CHECK-P9LE %s
7+
; RUN: llc -verify-machineinstrs -mtriple=powerpc64-linux-gnu -mcpu=pwr9 < %s \
8+
; RUN: | FileCheck --check-prefix=CHECK-P9BE %s
9+
10+
define double @splat_swap(<2 x double> %x, <2 x double> %y) nounwind {
11+
%added = fadd <2 x double> %x, %y
12+
%call = tail call <2 x double> @llvm.rint.v2f64(<2 x double> %added) nounwind readnone
13+
%res1 = extractelement <2 x double> %call, i32 0
14+
%res2 = extractelement <2 x double> %call, i32 1
15+
%ret = fsub double %res1, %res2
16+
ret double %ret
17+
18+
; CHECK-LE-LABEL: splat_swap:
19+
; CHECK-LE: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]]
20+
; CHECK-LE-NEXT: xxswapd [[XREG2]], [[XREG1]]
21+
; CHECK-LE-NEXT: xssubdp [[XREG2]], [[XREG2]], [[XREG1]]
22+
; CHECK-LE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}}
23+
;
24+
; CHECK-BE-LABEL: splat_swap:
25+
; CHECK-BE: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]]
26+
; CHECK-BE-NEXT: xxswapd [[XREG2]], [[XREG1]]
27+
; CHECK-BE-NEXT: xssubdp [[XREG2]], [[XREG1]], [[XREG2]]
28+
; CHECK-BE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}}
29+
;
30+
; CHECK-P9LE-LABEL: splat_swap:
31+
; CHECK-P9LE-DAG: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]]
32+
; CHECK-P9LE: xxswapd [[XREG2]], [[XREG1]]
33+
; CHECK-P9LE-NEXT: xssubdp [[XREG2]], [[XREG2]], [[XREG1]]
34+
; CHECK-P9LE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}}
35+
;
36+
; CHECK-P9BE-LABEL: splat_swap:
37+
; CHECK-P9BE-DAG: xxmrghd [[XREG1:[0-9]+]], [[XREG1]], [[XREG2:[0-9]+]]
38+
; CHECK-P9BE: xxswapd [[XREG2]], [[XREG1]]
39+
; CHECK-P9BE-NEXT: xssubdp [[XREG2]], [[XREG1]], [[XREG2]]
40+
; CHECK-P9BE-NEXT: addi [[REG1:[0-9]+]], [[REG1]], {{[0-9]+}}
41+
}
42+
43+
declare <2 x double> @llvm.rint.v2f64(<2 x double>)
44+

0 commit comments

Comments
 (0)