Skip to content

Commit 34e11f6

Browse files
committed
AMDGPU/GlobalISel: Legalize fast unsafe FDIV
Reviewers: arsenm Reviewed By: arsenm Subscribers: kzhuravl, jvesely, wdng, nhaehnle, yaxunl, rovka, dstuttard, tpr, t-tye, hiraditya, Petar.Avramovic, llvm-commits Tags: #llvm Differential Revision: https://reviews.llvm.org/D69231 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@375460 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 6556cd0 commit 34e11f6

File tree

4 files changed

+891
-8
lines changed

4 files changed

+891
-8
lines changed

include/llvm/CodeGen/GlobalISel/MachineIRBuilder.h

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1386,8 +1386,9 @@ class MachineIRBuilder {
13861386
}
13871387

13881388
/// Build and insert \p Res = G_FNEG \p Op0
1389-
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0) {
1390-
return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0});
1389+
MachineInstrBuilder buildFNeg(const DstOp &Dst, const SrcOp &Src0,
1390+
Optional<unsigned> Flags = None) {
1391+
return buildInstr(TargetOpcode::G_FNEG, {Dst}, {Src0}, Flags);
13911392
}
13921393

13931394
/// Build and insert \p Res = G_FABS \p Op0

lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp

Lines changed: 84 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -336,6 +336,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
336336
.legalFor({S32, S64});
337337
auto &TrigActions = getActionDefinitionsBuilder({G_FSIN, G_FCOS})
338338
.customFor({S32, S64});
339+
auto &FDIVActions = getActionDefinitionsBuilder(G_FDIV)
340+
.customFor({S32, S64});
339341

340342
if (ST.has16BitInsts()) {
341343
if (ST.hasVOP3PInsts())
@@ -344,6 +346,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
344346
FPOpActions.legalFor({S16});
345347

346348
TrigActions.customFor({S16});
349+
FDIVActions.customFor({S16});
347350
}
348351

349352
auto &MinNumMaxNum = getActionDefinitionsBuilder({
@@ -375,6 +378,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
375378
.scalarize(0)
376379
.clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
377380

381+
FDIVActions
382+
.scalarize(0)
383+
.clampScalar(0, ST.has16BitInsts() ? S16 : S32, S64);
384+
378385
getActionDefinitionsBuilder({G_FNEG, G_FABS})
379386
.legalFor(FPTypesPK16)
380387
.clampMaxNumElements(0, S16, 2)
@@ -1107,6 +1114,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
11071114
return legalizeLoad(MI, MRI, B, Observer);
11081115
case TargetOpcode::G_FMAD:
11091116
return legalizeFMad(MI, MRI, B);
1117+
case TargetOpcode::G_FDIV:
1118+
return legalizeFDIV(MI, MRI, B);
11101119
default:
11111120
return false;
11121121
}
@@ -1810,9 +1819,80 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
18101819
return false;
18111820
}
18121821

1813-
bool AMDGPULegalizerInfo::legalizeFDIVFast(MachineInstr &MI,
1814-
MachineRegisterInfo &MRI,
1815-
MachineIRBuilder &B) const {
1822+
bool AMDGPULegalizerInfo::legalizeFDIV(MachineInstr &MI,
1823+
MachineRegisterInfo &MRI,
1824+
MachineIRBuilder &B) const {
1825+
B.setInstr(MI);
1826+
1827+
if (legalizeFastUnsafeFDIV(MI, MRI, B))
1828+
return true;
1829+
1830+
return false;
1831+
}
1832+
1833+
bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV(MachineInstr &MI,
1834+
MachineRegisterInfo &MRI,
1835+
MachineIRBuilder &B) const {
1836+
Register Res = MI.getOperand(0).getReg();
1837+
Register LHS = MI.getOperand(1).getReg();
1838+
Register RHS = MI.getOperand(2).getReg();
1839+
1840+
uint16_t Flags = MI.getFlags();
1841+
1842+
LLT ResTy = MRI.getType(Res);
1843+
LLT S32 = LLT::scalar(32);
1844+
LLT S64 = LLT::scalar(64);
1845+
1846+
const MachineFunction &MF = B.getMF();
1847+
bool Unsafe =
1848+
MF.getTarget().Options.UnsafeFPMath || MI.getFlag(MachineInstr::FmArcp);
1849+
1850+
if (!MF.getTarget().Options.UnsafeFPMath && ResTy == S64)
1851+
return false;
1852+
1853+
if (!Unsafe && ResTy == S32 && ST.hasFP32Denormals())
1854+
return false;
1855+
1856+
if (auto CLHS = getConstantFPVRegVal(LHS, MRI)) {
1857+
// 1 / x -> RCP(x)
1858+
if (CLHS->isExactlyValue(1.0)) {
1859+
B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
1860+
.addUse(RHS)
1861+
.setMIFlags(Flags);
1862+
1863+
MI.eraseFromParent();
1864+
return true;
1865+
}
1866+
1867+
// -1 / x -> RCP( FNEG(x) )
1868+
if (CLHS->isExactlyValue(-1.0)) {
1869+
auto FNeg = B.buildFNeg(ResTy, RHS, Flags);
1870+
B.buildIntrinsic(Intrinsic::amdgcn_rcp, Res, false)
1871+
.addUse(FNeg.getReg(0))
1872+
.setMIFlags(Flags);
1873+
1874+
MI.eraseFromParent();
1875+
return true;
1876+
}
1877+
}
1878+
1879+
// x / y -> x * (1.0 / y)
1880+
if (Unsafe) {
1881+
auto RCP = B.buildIntrinsic(Intrinsic::amdgcn_rcp, {ResTy}, false)
1882+
.addUse(RHS)
1883+
.setMIFlags(Flags);
1884+
B.buildFMul(Res, LHS, RCP, Flags);
1885+
1886+
MI.eraseFromParent();
1887+
return true;
1888+
}
1889+
1890+
return false;
1891+
}
1892+
1893+
bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin(MachineInstr &MI,
1894+
MachineRegisterInfo &MRI,
1895+
MachineIRBuilder &B) const {
18161896
B.setInstr(MI);
18171897
Register Res = MI.getOperand(0).getReg();
18181898
Register LHS = MI.getOperand(2).getReg();
@@ -2029,7 +2109,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
20292109
return legalizePreloadedArgIntrin(MI, MRI, B,
20302110
AMDGPUFunctionArgInfo::DISPATCH_ID);
20312111
case Intrinsic::amdgcn_fdiv_fast:
2032-
return legalizeFDIVFast(MI, MRI, B);
2112+
return legalizeFDIVFastIntrin(MI, MRI, B);
20332113
case Intrinsic::amdgcn_is_shared:
20342114
return legalizeIsAddrSpace(MI, MRI, B, AMDGPUAS::LOCAL_ADDRESS);
20352115
case Intrinsic::amdgcn_is_private:

lib/Target/AMDGPU/AMDGPULegalizerInfo.h

Lines changed: 6 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -81,8 +81,12 @@ class AMDGPULegalizerInfo : public LegalizerInfo {
8181
MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B,
8282
AMDGPUFunctionArgInfo::PreloadedValue ArgType) const;
8383

84-
bool legalizeFDIVFast(MachineInstr &MI, MachineRegisterInfo &MRI,
85-
MachineIRBuilder &B) const;
84+
bool legalizeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
85+
MachineIRBuilder &B) const;
86+
bool legalizeFastUnsafeFDIV(MachineInstr &MI, MachineRegisterInfo &MRI,
87+
MachineIRBuilder &B) const;
88+
bool legalizeFDIVFastIntrin(MachineInstr &MI, MachineRegisterInfo &MRI,
89+
MachineIRBuilder &B) const;
8690

8791
bool legalizeImplicitArgPtr(MachineInstr &MI, MachineRegisterInfo &MRI,
8892
MachineIRBuilder &B) const;

0 commit comments

Comments
 (0)