Skip to content

Commit 1786047

Browse files
committed
[X86] Fix SLM v2i64 ADD/Sub/CMPEQ instruction schedules
Noticed while fixing the reduction costs for D59710 - the SLM model doesn't account for the poor throughput of v2i64 ops. Numbers taken from Intel AOM (+ checked against Agner)
1 parent ad70d5f commit 1786047

File tree

3 files changed

+30
-14
lines changed

3 files changed

+30
-14
lines changed

llvm/lib/Target/X86/X86ScheduleSLM.td

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -511,4 +511,20 @@ defm : X86WriteResUnsupported<WriteCvtPS2PHSt>;
511511
defm : X86WriteResUnsupported<WriteCvtPS2PHYSt>;
512512
defm : X86WriteResUnsupported<WriteCvtPS2PHZSt>;
513513

514+
// Remaining SLM instrs.
515+
516+
def SLMWriteResGroup1rr : SchedWriteRes<[SLM_FPC_RSV01]> {
517+
let Latency = 4;
518+
let NumMicroOps = 2;
519+
let ResourceCycles = [4];
520+
}
521+
def: InstRW<[SLMWriteResGroup1rr], (instrs PADDQrr, PSUBQrr, PCMPEQQrr)>;
522+
523+
def SLMWriteResGroup1rm : SchedWriteRes<[SLM_MEC_RSV,SLM_FPC_RSV01]> {
524+
let Latency = 7;
525+
let NumMicroOps = 3;
526+
let ResourceCycles = [1,4];
527+
}
528+
def: InstRW<[SLMWriteResGroup1rm], (instrs PADDQrm, PSUBQrm, PCMPEQQrm)>;
529+
514530
} // SchedModel

llvm/test/tools/llvm-mca/X86/SLM/resources-sse2.s

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -528,8 +528,8 @@ xorpd (%rax), %xmm2
528528
# CHECK-NEXT: 1 4 1.00 * paddd (%rax), %xmm2
529529
# CHECK-NEXT: 1 1 0.50 paddq %mm0, %mm2
530530
# CHECK-NEXT: 1 4 1.00 * paddq (%rax), %mm2
531-
# CHECK-NEXT: 1 1 0.50 paddq %xmm0, %xmm2
532-
# CHECK-NEXT: 1 4 1.00 * paddq (%rax), %xmm2
531+
# CHECK-NEXT: 2 4 2.00 paddq %xmm0, %xmm2
532+
# CHECK-NEXT: 3 7 2.00 * paddq (%rax), %xmm2
533533
# CHECK-NEXT: 1 1 0.50 paddsb %xmm0, %xmm2
534534
# CHECK-NEXT: 1 4 1.00 * paddsb (%rax), %xmm2
535535
# CHECK-NEXT: 1 1 0.50 paddsw %xmm0, %xmm2
@@ -626,8 +626,8 @@ xorpd (%rax), %xmm2
626626
# CHECK-NEXT: 1 4 1.00 * psubd (%rax), %xmm2
627627
# CHECK-NEXT: 1 1 0.50 psubq %mm0, %mm2
628628
# CHECK-NEXT: 1 4 1.00 * psubq (%rax), %mm2
629-
# CHECK-NEXT: 1 1 0.50 psubq %xmm0, %xmm2
630-
# CHECK-NEXT: 1 4 1.00 * psubq (%rax), %xmm2
629+
# CHECK-NEXT: 2 4 2.00 psubq %xmm0, %xmm2
630+
# CHECK-NEXT: 3 7 2.00 * psubq (%rax), %xmm2
631631
# CHECK-NEXT: 1 1 0.50 psubsb %xmm0, %xmm2
632632
# CHECK-NEXT: 1 4 1.00 * psubsb (%rax), %xmm2
633633
# CHECK-NEXT: 1 1 0.50 psubsw %xmm0, %xmm2
@@ -687,7 +687,7 @@ xorpd (%rax), %xmm2
687687

688688
# CHECK: Resource pressure per iteration:
689689
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
690-
# CHECK-NEXT: - 412.00 12.00 152.50 90.50 3.00 3.00 134.00
690+
# CHECK-NEXT: - 412.00 12.00 158.50 96.50 3.00 3.00 134.00
691691

692692
# CHECK: Resource pressure by instruction:
693693
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -812,8 +812,8 @@ xorpd (%rax), %xmm2
812812
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 paddd (%rax), %xmm2
813813
# CHECK-NEXT: - - - 0.50 0.50 - - - paddq %mm0, %mm2
814814
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 paddq (%rax), %mm2
815-
# CHECK-NEXT: - - - 0.50 0.50 - - - paddq %xmm0, %xmm2
816-
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 paddq (%rax), %xmm2
815+
# CHECK-NEXT: - - - 2.00 2.00 - - - paddq %xmm0, %xmm2
816+
# CHECK-NEXT: - - - 2.00 2.00 - - 1.00 paddq (%rax), %xmm2
817817
# CHECK-NEXT: - - - 0.50 0.50 - - - paddsb %xmm0, %xmm2
818818
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 paddsb (%rax), %xmm2
819819
# CHECK-NEXT: - - - 0.50 0.50 - - - paddsw %xmm0, %xmm2
@@ -910,8 +910,8 @@ xorpd (%rax), %xmm2
910910
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 psubd (%rax), %xmm2
911911
# CHECK-NEXT: - - - 0.50 0.50 - - - psubq %mm0, %mm2
912912
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 psubq (%rax), %mm2
913-
# CHECK-NEXT: - - - 0.50 0.50 - - - psubq %xmm0, %xmm2
914-
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 psubq (%rax), %xmm2
913+
# CHECK-NEXT: - - - 2.00 2.00 - - - psubq %xmm0, %xmm2
914+
# CHECK-NEXT: - - - 2.00 2.00 - - 1.00 psubq (%rax), %xmm2
915915
# CHECK-NEXT: - - - 0.50 0.50 - - - psubsb %xmm0, %xmm2
916916
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 psubsb (%rax), %xmm2
917917
# CHECK-NEXT: - - - 0.50 0.50 - - - psubsw %xmm0, %xmm2

llvm/test/tools/llvm-mca/X86/SLM/resources-sse41.s

Lines changed: 5 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -180,8 +180,8 @@ roundss $1, (%rax), %xmm2
180180
# CHECK-NEXT: 1 4 1.00 * pblendvb %xmm0, (%rax), %xmm2
181181
# CHECK-NEXT: 1 1 1.00 pblendw $11, %xmm0, %xmm2
182182
# CHECK-NEXT: 1 4 1.00 * pblendw $11, (%rax), %xmm2
183-
# CHECK-NEXT: 1 1 0.50 pcmpeqq %xmm0, %xmm2
184-
# CHECK-NEXT: 1 4 1.00 * pcmpeqq (%rax), %xmm2
183+
# CHECK-NEXT: 2 4 2.00 pcmpeqq %xmm0, %xmm2
184+
# CHECK-NEXT: 3 7 2.00 * pcmpeqq (%rax), %xmm2
185185
# CHECK-NEXT: 1 1 1.00 pextrb $1, %xmm0, %ecx
186186
# CHECK-NEXT: 2 4 2.00 * pextrb $1, %xmm0, (%rax)
187187
# CHECK-NEXT: 1 1 1.00 pextrd $1, %xmm0, %ecx
@@ -264,7 +264,7 @@ roundss $1, (%rax), %xmm2
264264

265265
# CHECK: Resource pressure per iteration:
266266
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7]
267-
# CHECK-NEXT: - - - 85.00 22.00 - - 54.00
267+
# CHECK-NEXT: - - - 88.00 25.00 - - 54.00
268268

269269
# CHECK: Resource pressure by instruction:
270270
# CHECK-NEXT: [0] [1] [2] [3] [4] [5] [6] [7] Instructions:
@@ -293,8 +293,8 @@ roundss $1, (%rax), %xmm2
293293
# CHECK-NEXT: - - - 1.00 - - - 1.00 pblendvb %xmm0, (%rax), %xmm2
294294
# CHECK-NEXT: - - - 1.00 - - - - pblendw $11, %xmm0, %xmm2
295295
# CHECK-NEXT: - - - 1.00 - - - 1.00 pblendw $11, (%rax), %xmm2
296-
# CHECK-NEXT: - - - 0.50 0.50 - - - pcmpeqq %xmm0, %xmm2
297-
# CHECK-NEXT: - - - 0.50 0.50 - - 1.00 pcmpeqq (%rax), %xmm2
296+
# CHECK-NEXT: - - - 2.00 2.00 - - - pcmpeqq %xmm0, %xmm2
297+
# CHECK-NEXT: - - - 2.00 2.00 - - 1.00 pcmpeqq (%rax), %xmm2
298298
# CHECK-NEXT: - - - 1.00 - - - - pextrb $1, %xmm0, %ecx
299299
# CHECK-NEXT: - - - 1.00 - - - 2.00 pextrb $1, %xmm0, (%rax)
300300
# CHECK-NEXT: - - - 1.00 - - - - pextrd $1, %xmm0, %ecx

0 commit comments

Comments
 (0)