13
13
//
14
14
//===----------------------------------------------------------------------===//
15
15
16
- class SMX60IsWorstCaseMX<string mx, list<string> MxList> {
17
- string LLMUL = LargestLMUL<MxList>.r;
18
- bit c = !eq(mx, LLMUL);
19
- }
16
+ //===----------------------------------------------------------------------===//
17
+ // Helpers
18
+
19
+ // Maps LMUL string to corresponding value from the Values array
20
+ // LMUL values map to array indices as follows:
21
+ // MF8 -> Values[0], MF4 -> Values[1], MF2 -> Values[2], M1 -> Values[3],
22
+ // M2 -> Values[4], M4 -> Values[5], M8 -> Values[6]
23
+ // Shorter lists are allowed, e.g., widening instructions don't work on M8
24
+ class GetLMULValue<list<int> Values, string LMUL> {
25
+ defvar Index = !cond(
26
+ !eq(LMUL, "MF8"): 0,
27
+ !eq(LMUL, "MF4"): 1,
28
+ !eq(LMUL, "MF2"): 2,
29
+ !eq(LMUL, "M1"): 3,
30
+ !eq(LMUL, "M2"): 4,
31
+ !eq(LMUL, "M4"): 5,
32
+ !eq(LMUL, "M8"): 6,
33
+ );
20
34
21
- class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> {
22
- string LLMUL = LargestLMUL<MxList>.r;
23
- int SSEW = SmallestSEW<mx, isF>.r;
24
- bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
35
+ assert !lt(Index, !size(Values)),
36
+ "Missing LMUL value for '" # LMUL # "'. " #
37
+ "Expected at least " # !add(Index, 1) # " elements, but got " #
38
+ !size(Values) # ".";
39
+
40
+ int c = Values[Index];
25
41
}
26
42
27
- defvar SMX60VLEN = 256;
28
- defvar SMX60DLEN = !div(SMX60VLEN, 2);
43
+ // Returns BaseValue for LMUL values before startLMUL, Value for startLMUL,
44
+ // then doubles Value for each subsequent LMUL
45
+ // Example: ConstValueUntilLMULThenDoubleBase<"M1", 2, 4, "M8"> returns:
46
+ // MF8->2, MF4->2, MF2->2, M1->4, M2->8, M4->16, M8->32
47
+ // This is useful for modeling scheduling parameters that scale with LMUL.
48
+ class ConstValueUntilLMULThenDoubleBase<string startLMUL, int BaseValue, int Value, string currentLMUL> {
49
+ assert !le(BaseValue, Value), "BaseValue must be le to Value";
50
+ defvar startPos = GetLMULValue<[0, 1, 2, 3, 4, 5, 6], startLMUL>.c;
51
+ defvar currentPos = GetLMULValue<[0, 1, 2, 3, 4, 5, 6], currentLMUL>.c;
29
52
30
- class Get1248Latency<string mx> {
53
+ // Calculate the difference in positions
54
+ defvar posDiff = !sub(currentPos, startPos);
55
+
56
+ // Calculate Value * (2^posDiff) using shift left
31
57
int c = !cond(
32
- !eq(mx, "M2") : 2,
33
- !eq(mx, "M4") : 4,
34
- !eq(mx, "M8") : 8,
35
- true: 1
58
+ !lt(posDiff, 0) : BaseValue,
59
+ !eq(posDiff, 0) : Value,
60
+ true: !mul(Value, !shl(1, posDiff))
36
61
);
37
62
}
38
63
39
- // Used for: logical opsz, shifts, sign ext, merge/move, FP sign/recip/convert, mask ops, slides
40
- class Get4816Latency<string mx> {
41
- int c = !cond(
42
- !eq(mx, "M4") : 8,
43
- !eq(mx, "M8") : 16,
44
- true: 4
45
- );
64
+ // Same as the previous function but BaseValue == Value
65
+ class ConstValueUntilLMULThenDouble<string startLMUL, int Value, string currentLMUL> {
66
+ int c = ConstValueUntilLMULThenDoubleBase<startLMUL, Value, Value, currentLMUL>.c;
67
+ }
68
+
69
+ // Returns MF8->1, MF4->1, MF2->2, M1->4, M2->8, M4->16, M8->32
70
+ class ConstOneUntilMF4ThenDouble<string mx> {
71
+ int c = ConstValueUntilLMULThenDouble<"MF4", 1, mx>.c;
72
+ }
73
+
74
+ // Returns MF8->1, MF4->1, MF2->1, M1->2, M2->4, M4->8, M8->16
75
+ class ConstOneUntilMF2ThenDouble<string mx> {
76
+ int c = ConstValueUntilLMULThenDouble<"MF2", 1, mx>.c;
77
+ }
78
+
79
+ // Returns MF8->1, MF4->1, MF2->1, M1->1, M2->2, M4->4, M8->8
80
+ class ConstOneUntilM1ThenDouble<string mx> {
81
+ int c = ConstValueUntilLMULThenDouble<"M1", 1, mx>.c;
46
82
}
47
83
84
+ //===----------------------------------------------------------------------===//
85
+ // Latency helper classes
86
+
48
87
// Used for: arithmetic (add/sub/min/max), saturating/averaging, FP add/sub/min/max
49
- class Get458Latency<string mx> {
50
- int c = !cond(
51
- !eq(mx, "M4") : 5,
52
- !eq(mx, "M8") : 8,
53
- true: 4
54
- );
88
+ class Get4458Latency<string mx> {
89
+ int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/4, /*M4=*/5, /*M8=*/8], mx>.c;
55
90
}
56
91
57
- // Widening scaling pattern (4,4,4,4,5,8,8): plateaus at higher LMULs
58
- // Used for: widening operations
92
+ // Used for: widening operations (no M8)
59
93
class Get4588Latency<string mx> {
60
- int c = !cond(
61
- !eq(mx, "M2") : 5,
62
- !eq(mx, "M4") : 8,
63
- !eq(mx, "M8") : 8, // M8 not supported for most widening, fallback
64
- true: 4
65
- );
94
+ int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/5, /*M4=*/8], mx>.c;
66
95
}
67
96
68
97
// Used for: mask-producing comparisons, carry ops with mask, FP comparisons
69
98
class Get461018Latency<string mx> {
70
- int c = !cond(
71
- !eq(mx, "M2") : 6,
72
- !eq(mx, "M4") : 10,
73
- !eq(mx, "M8") : 18,
74
- true: 4
75
- );
99
+ int c = GetLMULValue<[/*MF8=*/4, /*MF4=*/4, /*MF2=*/4, /*M1=*/4, /*M2=*/6, /*M4=*/10, /*M8=*/18], mx>.c;
76
100
}
77
101
78
- // Used for: e64 multiply pattern, complex ops
79
- class Get781632Latency<string mx> {
80
- int c = !cond(
81
- !eq(mx, "M2") : 8,
82
- !eq(mx, "M4") : 16,
83
- !eq(mx, "M8") : 32,
84
- true: 7
85
- );
102
+ //===----------------------------------------------------------------------===//
103
+
104
+ class SMX60IsWorstCaseMX<string mx, list<string> MxList> {
105
+ string LLMUL = LargestLMUL<MxList>.r;
106
+ bit c = !eq(mx, LLMUL);
86
107
}
87
108
109
+ class SMX60IsWorstCaseMXSEW<string mx, int sew, list<string> MxList, bit isF = 0> {
110
+ string LLMUL = LargestLMUL<MxList>.r;
111
+ int SSEW = SmallestSEW<mx, isF>.r;
112
+ bit c = !and(!eq(mx, LLMUL), !eq(sew, SSEW));
113
+ }
114
+
115
+ defvar SMX60VLEN = 256;
116
+ defvar SMX60DLEN = !div(SMX60VLEN, 2);
117
+
88
118
def SpacemitX60Model : SchedMachineModel {
89
119
let IssueWidth = 2; // dual-issue
90
120
let MicroOpBufferSize = 0; // in-order
@@ -383,12 +413,13 @@ foreach LMul = [1, 2, 4, 8] in {
383
413
foreach mx = SchedMxList in {
384
414
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxList>.c;
385
415
386
- let Latency = Get458Latency <mx>.c, ReleaseAtCycles = [4] in {
416
+ let Latency = Get4458Latency <mx>.c, ReleaseAtCycles = [4] in {
387
417
defm "" : LMULWriteResMX<"WriteVIMinMaxV", [SMX60_VIEU], mx, IsWorstCase>;
388
418
defm "" : LMULWriteResMX<"WriteVIMinMaxX", [SMX60_VIEU], mx, IsWorstCase>;
389
419
}
390
420
391
- let Latency = Get4816Latency<mx>.c, ReleaseAtCycles = [4] in {
421
+ defvar VIALULat = ConstValueUntilLMULThenDouble<"M2", 4, mx>.c;
422
+ let Latency = VIALULat, ReleaseAtCycles = [4] in {
392
423
// Pattern of vadd, vsub, vrsub: 4/4/5/8
393
424
// Pattern of vand, vor, vxor: 4/4/8/16
394
425
// They are grouped together, so we used the worst case 4/4/8/16
@@ -425,7 +456,7 @@ foreach mx = SchedMxList in {
425
456
// Pattern of vmacc, vmadd, vmul, vmulh, etc.: e8/e16 = 4/4/5/8, e32 = 5,5,5,8,
426
457
// e64 = 7,8,16,32. We use the worst-case until we can split the SEW.
427
458
// TODO: change WriteVIMulV, etc to be defined with LMULSEWSchedWrites
428
- let Latency = Get781632Latency< mx>.c, ReleaseAtCycles = [7] in {
459
+ let Latency = ConstValueUntilLMULThenDoubleBase<"M2", 7, 8, mx>.c, ReleaseAtCycles = [7] in {
429
460
defm "" : LMULWriteResMX<"WriteVIMulV", [SMX60_VIEU], mx, IsWorstCase>;
430
461
defm "" : LMULWriteResMX<"WriteVIMulX", [SMX60_VIEU], mx, IsWorstCase>;
431
462
defm "" : LMULWriteResMX<"WriteVIMulAddV", [SMX60_VIEU], mx, IsWorstCase>;
@@ -461,15 +492,8 @@ foreach mx = SchedMxList in {
461
492
foreach sew = SchedSEWSet<mx>.val in {
462
493
defvar IsWorstCase = SMX60IsWorstCaseMXSEW<mx, sew, SchedMxList>.c;
463
494
464
- // Slightly reduced for fractional LMULs
465
- defvar Multiplier = !cond(
466
- !eq(mx, "MF8") : 12,
467
- !eq(mx, "MF4") : 12,
468
- !eq(mx, "MF2") : 12,
469
- true: 24
470
- );
471
-
472
- let Latency = !mul(Get1248Latency<mx>.c, Multiplier), ReleaseAtCycles = [12] in {
495
+ defvar VIDivLat = ConstValueUntilLMULThenDouble<"MF2", 12, mx>.c;
496
+ let Latency = VIDivLat, ReleaseAtCycles = [12] in {
473
497
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivV", [SMX60_VIEU], mx, sew, IsWorstCase>;
474
498
defm "" : LMULSEWWriteResMXSEW<"WriteVIDivX", [SMX60_VIEU], mx, sew, IsWorstCase>;
475
499
}
@@ -480,14 +504,8 @@ foreach mx = SchedMxList in {
480
504
foreach mx = SchedMxListW in {
481
505
defvar IsWorstCase = SMX60IsWorstCaseMX<mx, SchedMxListW>.c;
482
506
483
- // Slightly increased for integer LMULs
484
- defvar Multiplier = !cond(
485
- !eq(mx, "M2") : 2,
486
- !eq(mx, "M4") : 2,
487
- true: 1
488
- );
489
-
490
- let Latency = !mul(Get4816Latency<mx>.c, Multiplier), ReleaseAtCycles = [4] in {
507
+ defvar VNarrowingLat = ConstValueUntilLMULThenDouble<"M1", 4, mx>.c;
508
+ let Latency = VNarrowingLat, ReleaseAtCycles = [4] in {
491
509
defm "" : LMULWriteResMX<"WriteVNShiftV", [SMX60_VIEU], mx, IsWorstCase>;
492
510
defm "" : LMULWriteResMX<"WriteVNShiftX", [SMX60_VIEU], mx, IsWorstCase>;
493
511
defm "" : LMULWriteResMX<"WriteVNShiftI", [SMX60_VIEU], mx, IsWorstCase>;
0 commit comments