Skip to content

Commit 6f86e6b

Browse files
committed
[ARM][MVE] Add target flag for narrowing insts
Add a flag, 'RetainsPreviousHalfElement', for operations that operate on top/bottom halves of their input and only write to half of their destination, leaving the other half to retain its previous value. Differential Revision: https://reviews.llvm.org/D76608
1 parent 733edf9 commit 6f86e6b

File tree

4 files changed

+111
-1
lines changed

4 files changed

+111
-1
lines changed

llvm/lib/Target/ARM/ARMInstrFormats.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -408,6 +408,7 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
408408
bit thumbArithFlagSetting = 0;
409409

410410
bit validForTailPredication = 0;
411+
bit retainsPreviousHalfElement = 0;
411412

412413
// If this is a pseudo instruction, mark it isCodeGenOnly.
413414
let isCodeGenOnly = !eq(!cast<string>(f), "Pseudo");
@@ -421,6 +422,7 @@ class InstTemplate<AddrMode am, int sz, IndexMode im,
421422
let TSFlags{18-15} = D.Value;
422423
let TSFlags{19} = thumbArithFlagSetting;
423424
let TSFlags{20} = validForTailPredication;
425+
let TSFlags{21} = retainsPreviousHalfElement;
424426

425427
let Constraints = cstr;
426428
let Itinerary = itin;

llvm/lib/Target/ARM/ARMInstrMVE.td

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2688,6 +2688,7 @@ class MVE_VxSHRN<string iname, string suffix, bit bit_12, bit bit_28,
26882688
let Inst{4} = 0b0;
26892689
let Inst{0} = 0b1;
26902690
let validForTailPredication = 1;
2691+
let retainsPreviousHalfElement = 1;
26912692
}
26922693

26932694
def MVE_VRSHRNi16bh : MVE_VxSHRN<"vrshrnb", "i16", 0b0, 0b1, shr_imm8> {
@@ -2730,6 +2731,7 @@ class MVE_VxQRSHRUN<string iname, string suffix, bit bit_28, bit bit_12,
27302731
let Inst{4} = 0b0;
27312732
let Inst{0} = 0b0;
27322733
let validForTailPredication = 1;
2734+
let retainsPreviousHalfElement = 1;
27332735
}
27342736

27352737
def MVE_VQRSHRUNs16bh : MVE_VxQRSHRUN<
@@ -2779,6 +2781,7 @@ class MVE_VxQRSHRN<string iname, string suffix, bit bit_0, bit bit_12,
27792781
let Inst{4} = 0b0;
27802782
let Inst{0} = bit_0;
27812783
let validForTailPredication = 1;
2784+
let retainsPreviousHalfElement = 1;
27822785
}
27832786

27842787
multiclass MVE_VxQRSHRN_types<string iname, bit bit_0, bit bit_12> {
@@ -4492,6 +4495,7 @@ class MVE_VxMOVxN<string iname, string suffix, bit bit_28, bit bit_17,
44924495
let Inst{7} = !if(!eq(bit_17, 0), 1, 0);
44934496
let Inst{0} = 0b1;
44944497
let validForTailPredication = 1;
4498+
let retainsPreviousHalfElement = 1;
44954499
}
44964500

44974501
multiclass MVE_VxMOVxN_halves<string iname, string suffix,
@@ -4589,6 +4593,7 @@ class MVE_VCVT_ff<string iname, string suffix, bit op, bit T,
45894593
let Inst{0} = 0b1;
45904594

45914595
let Predicates = [HasMVEFloat];
4596+
let retainsPreviousHalfElement = 1;
45924597
}
45934598

45944599
multiclass MVE_VCVT_f2h_m<string iname, int half> {

llvm/lib/Target/ARM/MCTargetDesc/ARMBaseInfo.h

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -396,6 +396,10 @@ namespace ARMII {
396396
// Whether an instruction can be included in an MVE tail-predicated loop.
397397
ValidForTailPredication = 1 << 20,
398398

399+
// Whether an instruction writes to the top/bottom half of a vector element
400+
// and leaves the other half untouched.
401+
RetainsPreviousHalfElement = 1 << 21,
402+
399403
//===------------------------------------------------------------------===//
400404
// Code ___domain.
401405
DomainShift = 15,

llvm/unittests/Target/ARM/MachineInstrTest.cpp

Lines changed: 100 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -10,11 +10,110 @@
1010

1111
using namespace llvm;
1212

13+
TEST(MachineInstructionRetainsPreviousHalfElement, IsCorrect) {
14+
using namespace ARM;
15+
16+
auto RetainsPreviousHalfElement = [](unsigned Opcode) {
17+
switch (Opcode) {
18+
default:
19+
break;
20+
case MVE_VMOVNi16bh:
21+
case MVE_VMOVNi16th:
22+
case MVE_VMOVNi32bh:
23+
case MVE_VMOVNi32th:
24+
case MVE_VQMOVNs16bh:
25+
case MVE_VQMOVNs16th:
26+
case MVE_VQMOVNs32bh:
27+
case MVE_VQMOVNs32th:
28+
case MVE_VQMOVNu16bh:
29+
case MVE_VQMOVNu16th:
30+
case MVE_VQMOVNu32bh:
31+
case MVE_VQMOVNu32th:
32+
case MVE_VQMOVUNs16bh:
33+
case MVE_VQMOVUNs16th:
34+
case MVE_VQMOVUNs32bh:
35+
case MVE_VQMOVUNs32th:
36+
case MVE_VQRSHRNbhs16:
37+
case MVE_VQRSHRNbhs32:
38+
case MVE_VQRSHRNbhu16:
39+
case MVE_VQRSHRNbhu32:
40+
case MVE_VQRSHRNths16:
41+
case MVE_VQRSHRNths32:
42+
case MVE_VQRSHRNthu16:
43+
case MVE_VQRSHRNthu32:
44+
case MVE_VQRSHRUNs16bh:
45+
case MVE_VQRSHRUNs16th:
46+
case MVE_VQRSHRUNs32bh:
47+
case MVE_VQRSHRUNs32th:
48+
case MVE_VQSHRNbhs16:
49+
case MVE_VQSHRNbhs32:
50+
case MVE_VQSHRNbhu16:
51+
case MVE_VQSHRNbhu32:
52+
case MVE_VQSHRNths16:
53+
case MVE_VQSHRNths32:
54+
case MVE_VQSHRNthu16:
55+
case MVE_VQSHRNthu32:
56+
case MVE_VQSHRUNs16bh:
57+
case MVE_VQSHRUNs16th:
58+
case MVE_VQSHRUNs32bh:
59+
case MVE_VQSHRUNs32th:
60+
case MVE_VRSHRNi16bh:
61+
case MVE_VRSHRNi16th:
62+
case MVE_VRSHRNi32bh:
63+
case MVE_VRSHRNi32th:
64+
case MVE_VSHRNi16bh:
65+
case MVE_VSHRNi16th:
66+
case MVE_VSHRNi32bh:
67+
case MVE_VSHRNi32th:
68+
case MVE_VCVTf16f32bh:
69+
case MVE_VCVTf16f32th:
70+
case MVE_VCVTf32f16bh:
71+
case MVE_VCVTf32f16th:
72+
return true;
73+
}
74+
return false;
75+
};
76+
77+
LLVMInitializeARMTargetInfo();
78+
LLVMInitializeARMTarget();
79+
LLVMInitializeARMTargetMC();
80+
81+
auto TT(Triple::normalize("thumbv8.1m.main-arm-none-eabi"));
82+
std::string Error;
83+
const Target *T = TargetRegistry::lookupTarget(TT, Error);
84+
if (!T) {
85+
dbgs() << Error;
86+
return;
87+
}
88+
89+
TargetOptions Options;
90+
auto TM = std::unique_ptr<LLVMTargetMachine>(
91+
static_cast<LLVMTargetMachine*>(
92+
T->createTargetMachine(TT, "generic", "", Options, None, None,
93+
CodeGenOpt::Default)));
94+
ARMSubtarget ST(TM->getTargetTriple(), std::string(TM->getTargetCPU()),
95+
std::string(TM->getTargetFeatureString()),
96+
*static_cast<const ARMBaseTargetMachine *>(TM.get()), false);
97+
const ARMBaseInstrInfo *TII = ST.getInstrInfo();
98+
auto MII = TM->getMCInstrInfo();
99+
100+
for (unsigned i = 0; i < ARM::INSTRUCTION_LIST_END; ++i) {
101+
const MCInstrDesc &Desc = TII->get(i);
102+
103+
uint64_t Flags = Desc.TSFlags;
104+
if ((Flags & ARMII::DomainMask) != ARMII::DomainMVE)
105+
continue;
106+
107+
bool Valid = (Flags & ARMII::RetainsPreviousHalfElement) != 0;
108+
ASSERT_EQ(RetainsPreviousHalfElement(i), Valid)
109+
<< MII->getName(i)
110+
<< ": mismatched expectation for tail-predicated safety\n";
111+
}
112+
}
13113
// Test for instructions that aren't immediately obviously valid within a
14114
// tail-predicated loop. This should be marked up in their tablegen
15115
// descriptions. Currently we, conservatively, disallow:
16116
// - cross beat carries.
17-
// - narrowing of results.
18117
// - complex operations.
19118
// - horizontal operations.
20119
// - byte swapping.

0 commit comments

Comments
 (0)