Skip to content

Commit 50cabc6

Browse files
committed
Merge from mainline
Don't dag combine floating point select to max and min intrinsics. Those take v4f32 / v2f64 operands and may end up causing larger spills / restores. Added X86 specific nodes X86ISD::FMAX, X86ISD::FMIN instead. llvm-svn: 31651
1 parent 53a422c commit 50cabc6

File tree

3 files changed

+24
-64
lines changed

3 files changed

+24
-64
lines changed

llvm/lib/Target/X86/X86ISelLowering.cpp

Lines changed: 13 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -4967,6 +4967,8 @@ const char *X86TargetLowering::getTargetNodeName(unsigned Opcode) const {
49674967
case X86ISD::S2VEC: return "X86ISD::S2VEC";
49684968
case X86ISD::PEXTRW: return "X86ISD::PEXTRW";
49694969
case X86ISD::PINSRW: return "X86ISD::PINSRW";
4970+
case X86ISD::FMAX: return "X86ISD::FMAX";
4971+
case X86ISD::FMIN: return "X86ISD::FMIN";
49704972
}
49714973
}
49724974

@@ -5347,7 +5349,7 @@ static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
53475349
SDOperand RHS = N->getOperand(2);
53485350
ISD::CondCode CC = cast<CondCodeSDNode>(Cond.getOperand(2))->get();
53495351

5350-
unsigned IntNo = 0;
5352+
unsigned Opcode = 0;
53515353
if (LHS == Cond.getOperand(0) && RHS == Cond.getOperand(1)) {
53525354
switch (CC) {
53535355
default: break;
@@ -5358,9 +5360,8 @@ static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
53585360
// FALL THROUGH.
53595361
case ISD::SETOLT: // (X olt/lt Y) ? X : Y -> min
53605362
case ISD::SETLT:
5361-
IntNo = LHS.getValueType() == MVT::f32 ? Intrinsic::x86_sse_min_ss :
5362-
Intrinsic::x86_sse2_min_sd;
5363-
break;
5363+
Opcode = X86ISD::FMIN;
5364+
break;
53645365

53655366
case ISD::SETOGT: // (X > Y) ? X : Y -> max
53665367
case ISD::SETUGT:
@@ -5369,9 +5370,8 @@ static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
53695370
// FALL THROUGH.
53705371
case ISD::SETUGE: // (X uge/ge Y) ? X : Y -> max
53715372
case ISD::SETGE:
5372-
IntNo = LHS.getValueType() == MVT::f32 ? Intrinsic::x86_sse_max_ss :
5373-
Intrinsic::x86_sse2_max_sd;
5374-
break;
5373+
Opcode = X86ISD::FMAX;
5374+
break;
53755375
}
53765376
} else if (LHS == Cond.getOperand(1) && RHS == Cond.getOperand(0)) {
53775377
switch (CC) {
@@ -5383,9 +5383,8 @@ static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
53835383
// FALL THROUGH.
53845384
case ISD::SETUGE: // (X uge/ge Y) ? Y : X -> min
53855385
case ISD::SETGE:
5386-
IntNo = LHS.getValueType() == MVT::f32 ? Intrinsic::x86_sse_min_ss :
5387-
Intrinsic::x86_sse2_min_sd;
5388-
break;
5386+
Opcode = X86ISD::FMIN;
5387+
break;
53895388

53905389
case ISD::SETOLE: // (X <= Y) ? Y : X -> max
53915390
case ISD::SETULE:
@@ -5394,30 +5393,12 @@ static SDOperand PerformSELECTCombine(SDNode *N, SelectionDAG &DAG,
53945393
// FALL THROUGH.
53955394
case ISD::SETOLT: // (X olt/lt Y) ? Y : X -> max
53965395
case ISD::SETLT:
5397-
IntNo = LHS.getValueType() == MVT::f32 ? Intrinsic::x86_sse_max_ss :
5398-
Intrinsic::x86_sse2_max_sd;
5399-
break;
5400-
}
5401-
}
5402-
5403-
// minss/maxss take a v4f32 operand.
5404-
if (IntNo) {
5405-
if (LHS.getValueType() == MVT::f32) {
5406-
LHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, LHS);
5407-
RHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v4f32, RHS);
5408-
} else {
5409-
LHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, LHS);
5410-
RHS = DAG.getNode(ISD::SCALAR_TO_VECTOR, MVT::v2f64, RHS);
5396+
Opcode = X86ISD::FMAX;
5397+
break;
54115398
}
5412-
5413-
MVT::ValueType PtrTy = Subtarget->is64Bit() ? MVT::i64 : MVT::i32;
5414-
SDOperand IntNoN = DAG.getConstant(IntNo, PtrTy);
5415-
5416-
SDOperand Val = DAG.getNode(ISD::INTRINSIC_WO_CHAIN, LHS.getValueType(),
5417-
IntNoN, LHS, RHS);
5418-
return DAG.getNode(ISD::EXTRACT_VECTOR_ELT, N->getValueType(0), Val,
5419-
DAG.getConstant(0, PtrTy));
54205399
}
5400+
if (Opcode)
5401+
return DAG.getNode(Opcode, N->getValueType(0), LHS, RHS);
54215402
}
54225403

54235404
}

llvm/lib/Target/X86/X86ISelLowering.h

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -160,7 +160,11 @@ namespace llvm {
160160

161161
/// PINSRW - Insert the lower 16-bits of a 32-bit value to a vector,
162162
/// corresponds to X86::PINSRW.
163-
PINSRW
163+
PINSRW,
164+
165+
/// FMAX, FMIN - Floating point max and min.
166+
///
167+
FMAX, FMIN
164168
};
165169
}
166170

llvm/lib/Target/X86/X86InstrSSE.td

Lines changed: 6 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -20,6 +20,8 @@
2020

2121
def X86loadp : SDNode<"X86ISD::LOAD_PACK", SDTLoad, [SDNPHasChain]>;
2222
def X86loadu : SDNode<"X86ISD::LOAD_UA", SDTLoad, [SDNPHasChain]>;
23+
def X86fmin : SDNode<"X86ISD::FMIN", SDTFPBinOp>;
24+
def X86fmax : SDNode<"X86ISD::FMAX", SDTFPBinOp>;
2325
def X86fand : SDNode<"X86ISD::FAND", SDTFPBinOp,
2426
[SDNPCommutative, SDNPAssociative]>;
2527
def X86fxor : SDNode<"X86ISD::FXOR", SDTFPBinOp,
@@ -375,6 +377,10 @@ defm SUB : scalar_sse12_fp_binop_rm<0x5C, "sub", fsub,
375377
int_x86_sse_sub_ss, int_x86_sse2_sub_sd>;
376378
defm DIV : scalar_sse12_fp_binop_rm<0x5E, "div", fdiv,
377379
int_x86_sse_div_ss, int_x86_sse2_div_sd>;
380+
defm MAX : scalar_sse12_fp_binop_rm<0x5F, "max", X86fmax,
381+
int_x86_sse_max_ss, int_x86_sse2_max_sd>;
382+
defm MIN : scalar_sse12_fp_binop_rm<0x5D, "min", X86fmin,
383+
int_x86_sse_min_ss, int_x86_sse2_min_sd>;
378384

379385

380386
def SQRTSSr : SSI<0x51, MRMSrcReg, (ops FR32:$dst, FR32:$src),
@@ -390,44 +396,13 @@ def SQRTSDm : SDI<0x51, MRMSrcMem, (ops FR64:$dst, f64mem:$src),
390396
"sqrtsd {$src, $dst|$dst, $src}",
391397
[(set FR64:$dst, (fsqrt (loadf64 addr:$src)))]>;
392398

393-
class SS_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
394-
: SSI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
395-
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
396-
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, VR128:$src2)))]>;
397-
class SS_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
398-
: SSI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, ssmem:$src2),
399-
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
400-
[(set VR128:$dst, (v4f32 (IntId VR128:$src1, sse_load_f32:$src2)))]>;
401-
class SD_Intrr<bits<8> o, string OpcodeStr, Intrinsic IntId>
402-
: SDI<o, MRMSrcReg, (ops VR128:$dst, VR128:$src1, VR128:$src2),
403-
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
404-
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, VR128:$src2)))]>;
405-
class SD_Intrm<bits<8> o, string OpcodeStr, Intrinsic IntId>
406-
: SDI<o, MRMSrcMem, (ops VR128:$dst, VR128:$src1, sdmem:$src2),
407-
!strconcat(OpcodeStr, " {$src2, $dst|$dst, $src2}"),
408-
[(set VR128:$dst, (v2f64 (IntId VR128:$src1, sse_load_f64:$src2)))]>;
409-
410-
411399
// Aliases to match intrinsics which expect XMM operand(s).
412400

413401
defm SQRTSS_Int : SS_IntUnary<0x51, "sqrtss" , int_x86_sse_sqrt_ss>;
414402
defm SQRTSD_Int : SD_IntUnary<0x51, "sqrtsd" , int_x86_sse2_sqrt_sd>;
415403
defm RSQRTSS_Int : SS_IntUnary<0x52, "rsqrtss", int_x86_sse_rsqrt_ss>;
416404
defm RCPSS_Int : SS_IntUnary<0x53, "rcpss" , int_x86_sse_rcp_ss>;
417405

418-
let isTwoAddress = 1 in {
419-
let isCommutable = 1 in {
420-
def Int_MAXSSrr : SS_Intrr<0x5F, "maxss", int_x86_sse_max_ss>;
421-
def Int_MAXSDrr : SD_Intrr<0x5F, "maxsd", int_x86_sse2_max_sd>;
422-
def Int_MINSSrr : SS_Intrr<0x5D, "minss", int_x86_sse_min_ss>;
423-
def Int_MINSDrr : SD_Intrr<0x5D, "minsd", int_x86_sse2_min_sd>;
424-
}
425-
def Int_MAXSSrm : SS_Intrm<0x5F, "maxss", int_x86_sse_max_ss>;
426-
def Int_MAXSDrm : SD_Intrm<0x5F, "maxsd", int_x86_sse2_max_sd>;
427-
def Int_MINSSrm : SS_Intrm<0x5D, "minss", int_x86_sse_min_ss>;
428-
def Int_MINSDrm : SD_Intrm<0x5D, "minsd", int_x86_sse2_min_sd>;
429-
}
430-
431406
// Conversion instructions
432407
def CVTTSS2SIrr: SSI<0x2C, MRMSrcReg, (ops GR32:$dst, FR32:$src),
433408
"cvttss2si {$src, $dst|$dst, $src}",

0 commit comments

Comments
 (0)