@@ -336,6 +336,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
336
336
.legalFor ({S32, S64});
337
337
auto &TrigActions = getActionDefinitionsBuilder ({G_FSIN, G_FCOS})
338
338
.customFor ({S32, S64});
339
+ auto &FDIVActions = getActionDefinitionsBuilder (G_FDIV)
340
+ .customFor ({S32, S64});
339
341
340
342
if (ST.has16BitInsts ()) {
341
343
if (ST.hasVOP3PInsts ())
@@ -344,6 +346,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
344
346
FPOpActions.legalFor ({S16});
345
347
346
348
TrigActions.customFor ({S16});
349
+ FDIVActions.customFor ({S16});
347
350
}
348
351
349
352
auto &MinNumMaxNum = getActionDefinitionsBuilder ({
@@ -375,6 +378,10 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_,
375
378
.scalarize (0 )
376
379
.clampScalar (0 , ST.has16BitInsts () ? S16 : S32, S64);
377
380
381
+ FDIVActions
382
+ .scalarize (0 )
383
+ .clampScalar (0 , ST.has16BitInsts () ? S16 : S32, S64);
384
+
378
385
getActionDefinitionsBuilder ({G_FNEG, G_FABS})
379
386
.legalFor (FPTypesPK16)
380
387
.clampMaxNumElements (0 , S16, 2 )
@@ -1107,6 +1114,8 @@ bool AMDGPULegalizerInfo::legalizeCustom(MachineInstr &MI,
1107
1114
return legalizeLoad (MI, MRI, B, Observer);
1108
1115
case TargetOpcode::G_FMAD:
1109
1116
return legalizeFMad (MI, MRI, B);
1117
+ case TargetOpcode::G_FDIV:
1118
+ return legalizeFDIV (MI, MRI, B);
1110
1119
default :
1111
1120
return false ;
1112
1121
}
@@ -1810,9 +1819,80 @@ bool AMDGPULegalizerInfo::legalizePreloadedArgIntrin(
1810
1819
return false ;
1811
1820
}
1812
1821
1813
- bool AMDGPULegalizerInfo::legalizeFDIVFast (MachineInstr &MI,
1814
- MachineRegisterInfo &MRI,
1815
- MachineIRBuilder &B) const {
1822
+ bool AMDGPULegalizerInfo::legalizeFDIV (MachineInstr &MI,
1823
+ MachineRegisterInfo &MRI,
1824
+ MachineIRBuilder &B) const {
1825
+ B.setInstr (MI);
1826
+
1827
+ if (legalizeFastUnsafeFDIV (MI, MRI, B))
1828
+ return true ;
1829
+
1830
+ return false ;
1831
+ }
1832
+
1833
+ bool AMDGPULegalizerInfo::legalizeFastUnsafeFDIV (MachineInstr &MI,
1834
+ MachineRegisterInfo &MRI,
1835
+ MachineIRBuilder &B) const {
1836
+ Register Res = MI.getOperand (0 ).getReg ();
1837
+ Register LHS = MI.getOperand (1 ).getReg ();
1838
+ Register RHS = MI.getOperand (2 ).getReg ();
1839
+
1840
+ uint16_t Flags = MI.getFlags ();
1841
+
1842
+ LLT ResTy = MRI.getType (Res);
1843
+ LLT S32 = LLT::scalar (32 );
1844
+ LLT S64 = LLT::scalar (64 );
1845
+
1846
+ const MachineFunction &MF = B.getMF ();
1847
+ bool Unsafe =
1848
+ MF.getTarget ().Options .UnsafeFPMath || MI.getFlag (MachineInstr::FmArcp);
1849
+
1850
+ if (!MF.getTarget ().Options .UnsafeFPMath && ResTy == S64)
1851
+ return false ;
1852
+
1853
+ if (!Unsafe && ResTy == S32 && ST.hasFP32Denormals ())
1854
+ return false ;
1855
+
1856
+ if (auto CLHS = getConstantFPVRegVal (LHS, MRI)) {
1857
+ // 1 / x -> RCP(x)
1858
+ if (CLHS->isExactlyValue (1.0 )) {
1859
+ B.buildIntrinsic (Intrinsic::amdgcn_rcp, Res, false )
1860
+ .addUse (RHS)
1861
+ .setMIFlags (Flags);
1862
+
1863
+ MI.eraseFromParent ();
1864
+ return true ;
1865
+ }
1866
+
1867
+ // -1 / x -> RCP( FNEG(x) )
1868
+ if (CLHS->isExactlyValue (-1.0 )) {
1869
+ auto FNeg = B.buildFNeg (ResTy, RHS, Flags);
1870
+ B.buildIntrinsic (Intrinsic::amdgcn_rcp, Res, false )
1871
+ .addUse (FNeg.getReg (0 ))
1872
+ .setMIFlags (Flags);
1873
+
1874
+ MI.eraseFromParent ();
1875
+ return true ;
1876
+ }
1877
+ }
1878
+
1879
+ // x / y -> x * (1.0 / y)
1880
+ if (Unsafe) {
1881
+ auto RCP = B.buildIntrinsic (Intrinsic::amdgcn_rcp, {ResTy}, false )
1882
+ .addUse (RHS)
1883
+ .setMIFlags (Flags);
1884
+ B.buildFMul (Res, LHS, RCP, Flags);
1885
+
1886
+ MI.eraseFromParent ();
1887
+ return true ;
1888
+ }
1889
+
1890
+ return false ;
1891
+ }
1892
+
1893
+ bool AMDGPULegalizerInfo::legalizeFDIVFastIntrin (MachineInstr &MI,
1894
+ MachineRegisterInfo &MRI,
1895
+ MachineIRBuilder &B) const {
1816
1896
B.setInstr (MI);
1817
1897
Register Res = MI.getOperand (0 ).getReg ();
1818
1898
Register LHS = MI.getOperand (2 ).getReg ();
@@ -2029,7 +2109,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI,
2029
2109
return legalizePreloadedArgIntrin (MI, MRI, B,
2030
2110
AMDGPUFunctionArgInfo::DISPATCH_ID);
2031
2111
case Intrinsic::amdgcn_fdiv_fast:
2032
- return legalizeFDIVFast (MI, MRI, B);
2112
+ return legalizeFDIVFastIntrin (MI, MRI, B);
2033
2113
case Intrinsic::amdgcn_is_shared:
2034
2114
return legalizeIsAddrSpace (MI, MRI, B, AMDGPUAS::LOCAL_ADDRESS);
2035
2115
case Intrinsic::amdgcn_is_private:
0 commit comments