Skip to content

Commit cc14917

Browse files
committed
AMDGPU/GlobalISel: Fix selection of scalar f64 G_FABS
This wasn't covered by existing tablegen patterns, but also suffers the same issues as G_FNEG. Workaround them by manually selecting, like G_FNEG.
1 parent cb5dc37 commit cc14917

File tree

4 files changed

+113
-4
lines changed

4 files changed

+113
-4
lines changed

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp

Lines changed: 46 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1969,6 +1969,48 @@ bool AMDGPUInstructionSelector::selectG_FNEG(MachineInstr &MI) const {
19691969
return true;
19701970
}
19711971

1972+
// FIXME: This is a workaround for the same tablegen problems as G_FNEG
1973+
bool AMDGPUInstructionSelector::selectG_FABS(MachineInstr &MI) const {
1974+
Register Dst = MI.getOperand(0).getReg();
1975+
const RegisterBank *DstRB = RBI.getRegBank(Dst, *MRI, TRI);
1976+
if (DstRB->getID() != AMDGPU::SGPRRegBankID ||
1977+
MRI->getType(Dst) != LLT::scalar(64))
1978+
return false;
1979+
1980+
Register Src = MI.getOperand(1).getReg();
1981+
MachineBasicBlock *BB = MI.getParent();
1982+
const DebugLoc &DL = MI.getDebugLoc();
1983+
Register LoReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1984+
Register HiReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1985+
Register ConstReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1986+
Register OpReg = MRI->createVirtualRegister(&AMDGPU::SReg_32RegClass);
1987+
1988+
if (!RBI.constrainGenericRegister(Src, AMDGPU::SReg_64RegClass, *MRI) ||
1989+
!RBI.constrainGenericRegister(Dst, AMDGPU::SReg_64RegClass, *MRI))
1990+
return false;
1991+
1992+
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), LoReg)
1993+
.addReg(Src, 0, AMDGPU::sub0);
1994+
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::COPY), HiReg)
1995+
.addReg(Src, 0, AMDGPU::sub1);
1996+
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_MOV_B32), ConstReg)
1997+
.addImm(0x7fffffff);
1998+
1999+
// Clear sign bit.
2000+
// TODO: Should this used S_BITSET0_*?
2001+
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::S_AND_B32), OpReg)
2002+
.addReg(HiReg)
2003+
.addReg(ConstReg);
2004+
BuildMI(*BB, &MI, DL, TII.get(AMDGPU::REG_SEQUENCE), Dst)
2005+
.addReg(LoReg)
2006+
.addImm(AMDGPU::sub0)
2007+
.addReg(OpReg)
2008+
.addImm(AMDGPU::sub1);
2009+
2010+
MI.eraseFromParent();
2011+
return true;
2012+
}
2013+
19722014
static bool isConstant(const MachineInstr &MI) {
19732015
return MI.getOpcode() == TargetOpcode::G_CONSTANT;
19742016
}
@@ -2609,6 +2651,10 @@ bool AMDGPUInstructionSelector::select(MachineInstr &I) {
26092651
if (selectImpl(I, *CoverageInfo))
26102652
return true;
26112653
return selectG_FNEG(I);
2654+
case TargetOpcode::G_FABS:
2655+
if (selectImpl(I, *CoverageInfo))
2656+
return true;
2657+
return selectG_FABS(I);
26122658
case TargetOpcode::G_EXTRACT:
26132659
return selectG_EXTRACT(I);
26142660
case TargetOpcode::G_MERGE_VALUES:

llvm/lib/Target/AMDGPU/AMDGPUInstructionSelector.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -92,6 +92,7 @@ class AMDGPUInstructionSelector : public InstructionSelector {
9292
bool selectG_SZA_EXT(MachineInstr &I) const;
9393
bool selectG_CONSTANT(MachineInstr &I) const;
9494
bool selectG_FNEG(MachineInstr &I) const;
95+
bool selectG_FABS(MachineInstr &I) const;
9596
bool selectG_AND_OR_XOR(MachineInstr &I) const;
9697
bool selectG_ADD_SUB(MachineInstr &I) const;
9798
bool selectG_UADDO_USUBO_UADDE_USUBE(MachineInstr &I) const;

llvm/lib/Target/AMDGPU/SIInstructions.td

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1255,7 +1255,7 @@ def : GCNPat <
12551255
(S_OR_B32 SReg_32:$src, (S_MOV_B32 (i32 0x80008000))) // Set sign bit
12561256
>;
12571257

1258-
// FIXME: The implicit-def of scc from S_[X]OR_B32 is mishandled
1258+
// FIXME: The implicit-def of scc from S_[X]OR/AND_B32 is mishandled
12591259
// def : GCNPat <
12601260
// (fneg (f64 SReg_64:$src)),
12611261
// (REG_SEQUENCE SReg_64,
@@ -1276,6 +1276,17 @@ def : GCNPat <
12761276
// sub1)
12771277
// >;
12781278

1279+
// FIXME: Use S_BITSET0_B32/B64?
1280+
// def : GCNPat <
1281+
// (fabs (f64 SReg_64:$src)),
1282+
// (REG_SEQUENCE SReg_64,
1283+
// (i32 (EXTRACT_SUBREG SReg_64:$src, sub0)),
1284+
// sub0,
1285+
// (S_AND_B32 (i32 (EXTRACT_SUBREG SReg_64:$src, sub1)),
1286+
// (i32 (S_MOV_B32 (i32 0x7fffffff)))),
1287+
// sub1)
1288+
// >;
1289+
12791290
} // End let AddedComplexity = 1
12801291

12811292
def : GCNPat <

llvm/test/CodeGen/AMDGPU/GlobalISel/inst-select-fabs.mir

Lines changed: 54 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -200,9 +200,13 @@ body: |
200200
liveins: $sgpr0_sgpr1
201201
; GCN-LABEL: name: fabs_s64_ss
202202
; GCN: liveins: $sgpr0_sgpr1
203-
; GCN: [[COPY:%[0-9]+]]:sgpr(s64) = COPY $sgpr0_sgpr1
204-
; GCN: [[FABS:%[0-9]+]]:sgpr(s64) = G_FABS [[COPY]]
205-
; GCN: S_ENDPGM 0, implicit [[FABS]](s64)
203+
; GCN: [[COPY:%[0-9]+]]:sreg_64 = COPY $sgpr0_sgpr1
204+
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub0
205+
; GCN: [[COPY2:%[0-9]+]]:sreg_32 = COPY [[COPY]].sub1
206+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
207+
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY2]], [[S_MOV_B32_]], implicit-def $scc
208+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
209+
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
206210
%0:sgpr(s64) = COPY $sgpr0_sgpr1
207211
%1:sgpr(s64) = G_FABS %0
208212
S_ENDPGM 0, implicit %1
@@ -249,3 +253,50 @@ body: |
249253
%1:vgpr(s64) = G_FABS %0
250254
S_ENDPGM 0, implicit %1
251255
...
256+
257+
# Make sure the source register is constrained
258+
---
259+
name: fabs_s64_vv_no_src_constraint
260+
legalized: true
261+
regBankSelected: true
262+
tracksRegLiveness: true
263+
264+
body: |
265+
bb.0:
266+
liveins: $vgpr0_vgpr1
267+
; GCN-LABEL: name: fabs_s64_vv_no_src_constraint
268+
; GCN: liveins: $vgpr0_vgpr1
269+
; GCN: [[DEF:%[0-9]+]]:vreg_64 = IMPLICIT_DEF
270+
; GCN: [[V_MOV_B32_e32_:%[0-9]+]]:vgpr_32 = V_MOV_B32_e32 2147483647, implicit $exec
271+
; GCN: [[COPY:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub1
272+
; GCN: [[V_AND_B32_e64_:%[0-9]+]]:vgpr_32 = V_AND_B32_e64 [[COPY]], [[V_MOV_B32_e32_]], implicit $exec
273+
; GCN: [[COPY1:%[0-9]+]]:vgpr_32 = COPY [[DEF]].sub0
274+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:vreg_64 = REG_SEQUENCE [[COPY1]], %subreg.sub0, [[V_AND_B32_e64_]], %subreg.sub1
275+
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
276+
%0:vgpr(s64) = IMPLICIT_DEF
277+
%1:vgpr(s64) = G_FABS %0:vgpr(s64)
278+
S_ENDPGM 0, implicit %1
279+
...
280+
281+
---
282+
name: fabs_s64_ss_no_src_constraint
283+
legalized: true
284+
regBankSelected: true
285+
tracksRegLiveness: true
286+
287+
body: |
288+
bb.0:
289+
liveins: $sgpr0_sgpr1
290+
; GCN-LABEL: name: fabs_s64_ss_no_src_constraint
291+
; GCN: liveins: $sgpr0_sgpr1
292+
; GCN: [[DEF:%[0-9]+]]:sreg_64 = IMPLICIT_DEF
293+
; GCN: [[COPY:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub0
294+
; GCN: [[COPY1:%[0-9]+]]:sreg_32 = COPY [[DEF]].sub1
295+
; GCN: [[S_MOV_B32_:%[0-9]+]]:sreg_32 = S_MOV_B32 2147483647
296+
; GCN: [[S_AND_B32_:%[0-9]+]]:sreg_32 = S_AND_B32 [[COPY1]], [[S_MOV_B32_]], implicit-def $scc
297+
; GCN: [[REG_SEQUENCE:%[0-9]+]]:sreg_64 = REG_SEQUENCE [[COPY]], %subreg.sub0, [[S_AND_B32_]], %subreg.sub1
298+
; GCN: S_ENDPGM 0, implicit [[REG_SEQUENCE]]
299+
%0:sgpr(s64) = IMPLICIT_DEF
300+
%1:sgpr(s64) = G_FABS %0:sgpr(s64)
301+
S_ENDPGM 0, implicit %1
302+
...

0 commit comments

Comments
 (0)