@@ -46,6 +46,8 @@ class LoongArch final : public TargetInfo {
46
46
private:
47
47
void tlsdescToIe (uint8_t *loc, const Relocation &rel, uint64_t val) const ;
48
48
void tlsdescToLe (uint8_t *loc, const Relocation &rel, uint64_t val) const ;
49
+ bool tryGotToPCRel (uint8_t *loc, const Relocation &rHi20,
50
+ const Relocation &rLo12, uint64_t secAddr) const ;
49
51
};
50
52
} // end anonymous namespace
51
53
@@ -1155,6 +1157,78 @@ void LoongArch::tlsdescToLe(uint8_t *loc, const Relocation &rel,
1155
1157
}
1156
1158
}
1157
1159
1160
+ // Try GOT indirection to PC relative optimization.
1161
+ // From:
1162
+ // * pcalau12i $a0, %got_pc_hi20(sym_got)
1163
+ // * ld.w/d $a0, $a0, %got_pc_lo12(sym_got)
1164
+ // To:
1165
+ // * pcalau12i $a0, %pc_hi20(sym)
1166
+ // * addi.w/d $a0, $a0, %pc_lo12(sym)
1167
+ //
1168
+ // Note: Althouth the optimization has been performed, the GOT entries still
1169
+ // exists, similarly to AArch64. Eliminating the entries will increase code
1170
+ // complexity.
1171
+ bool LoongArch::tryGotToPCRel (uint8_t *loc, const Relocation &rHi20,
1172
+ const Relocation &rLo12, uint64_t secAddr) const {
1173
+ // Check if the relocations apply to consecutive instructions.
1174
+ if (rHi20.offset + 4 != rLo12.offset )
1175
+ return false ;
1176
+
1177
+ // Check if the relocations reference the same symbol and skip undefined,
1178
+ // preemptible and STT_GNU_IFUNC symbols.
1179
+ if (!rHi20.sym || rHi20.sym != rLo12.sym || !rHi20.sym ->isDefined () ||
1180
+ rHi20.sym ->isPreemptible || rHi20.sym ->isGnuIFunc ())
1181
+ return false ;
1182
+
1183
+ // GOT references to absolute symbols can't be relaxed to use PCALAU12I/ADDI
1184
+ // in position-independent code because these instructions produce a relative
1185
+ // address.
1186
+ if ((ctx.arg .isPic && !cast<Defined>(*rHi20.sym ).section ))
1187
+ return false ;
1188
+
1189
+ // Check if the addends of the both relocations are zero.
1190
+ if (rHi20.addend != 0 || rLo12.addend != 0 )
1191
+ return false ;
1192
+
1193
+ const uint32_t currInsn = read32le (loc);
1194
+ const uint32_t nextInsn = read32le (loc + 4 );
1195
+ const uint32_t ldOpcode = ctx.arg .is64 ? LD_D : LD_W;
1196
+ // Check if the first instruction is PCALAU12I and the second instruction is
1197
+ // LD.
1198
+ if ((currInsn & 0xfe000000 ) != PCALAU12I ||
1199
+ (nextInsn & 0xffc00000 ) != ldOpcode)
1200
+ return false ;
1201
+
1202
+ // Check if use the same register.
1203
+ if (getD5 (currInsn) != getJ5 (nextInsn) || getJ5 (nextInsn) != getD5 (nextInsn))
1204
+ return false ;
1205
+
1206
+ Symbol &sym = *rHi20.sym ;
1207
+ uint64_t symLocal = sym.getVA (ctx);
1208
+ const int64_t displace = symLocal - getLoongArchPage (secAddr + rHi20.offset );
1209
+ // Check if the symbol address is in
1210
+ // [(PC & ~0xfff) - 2GiB - 0x800, (PC & ~0xfff) + 2GiB - 0x800).
1211
+ const int64_t underflow = -0x80000000LL - 0x800 ;
1212
+ const int64_t overflow = 0x80000000LL - 0x800 ;
1213
+ if (!(displace >= underflow && displace < overflow))
1214
+ return false ;
1215
+
1216
+ Relocation newRHi20 = {RE_LOONGARCH_PAGE_PC, R_LARCH_PCALA_HI20, rHi20.offset ,
1217
+ rHi20.addend , &sym};
1218
+ Relocation newRLo12 = {R_ABS, R_LARCH_PCALA_LO12, rLo12.offset , rLo12.addend ,
1219
+ &sym};
1220
+ uint64_t pageDelta =
1221
+ getLoongArchPageDelta (symLocal, secAddr + rHi20.offset , rHi20.type );
1222
+ // pcalau12i $a0, %pc_hi20
1223
+ write32le (loc, insn (PCALAU12I, getD5 (currInsn), 0 , 0 ));
1224
+ relocate (loc, newRHi20, pageDelta);
1225
+ // addi.w/d $a0, $a0, %pc_lo12
1226
+ write32le (loc + 4 , insn (ctx.arg .is64 ? ADDI_D : ADDI_W, getD5 (nextInsn),
1227
+ getJ5 (nextInsn), 0 ));
1228
+ relocate (loc + 4 , newRLo12, SignExtend64 (symLocal, 64 ));
1229
+ return true ;
1230
+ }
1231
+
1158
1232
// During TLSDESC GD_TO_IE, the converted code sequence always includes an
1159
1233
// instruction related to the Lo12 relocation (ld.[wd]). To obtain correct val
1160
1234
// in `getRelocTargetVA`, expr of this instruction should be adjusted to
@@ -1172,6 +1246,30 @@ RelExpr LoongArch::adjustTlsExpr(RelType type, RelExpr expr) const {
1172
1246
return expr;
1173
1247
}
1174
1248
1249
+ static bool pairForGotRels (ArrayRef<Relocation> relocs) {
1250
+ // Check if R_LARCH_GOT_PC_HI20 and R_LARCH_GOT_PC_LO12 always appear in
1251
+ // pairs.
1252
+ size_t i = 0 ;
1253
+ const size_t size = relocs.size ();
1254
+ for (; i != size; ++i) {
1255
+ if (relocs[i].type == R_LARCH_GOT_PC_HI20) {
1256
+ if (i + 1 < size && relocs[i + 1 ].type == R_LARCH_GOT_PC_LO12) {
1257
+ ++i;
1258
+ continue ;
1259
+ }
1260
+ if (relaxable (relocs, i) && i + 2 < size &&
1261
+ relocs[i + 2 ].type == R_LARCH_GOT_PC_LO12) {
1262
+ i += 2 ;
1263
+ continue ;
1264
+ }
1265
+ break ;
1266
+ } else if (relocs[i].type == R_LARCH_GOT_PC_LO12) {
1267
+ break ;
1268
+ }
1269
+ }
1270
+ return i == size;
1271
+ }
1272
+
1175
1273
void LoongArch::relocateAlloc (InputSectionBase &sec, uint8_t *buf) const {
1176
1274
const unsigned bits = ctx.arg .is64 ? 64 : 32 ;
1177
1275
uint64_t secAddr = sec.getOutputSection ()->addr ;
@@ -1181,6 +1279,7 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
1181
1279
secAddr += ehIn->getParent ()->outSecOff ;
1182
1280
bool isExtreme = false , isRelax = false ;
1183
1281
const MutableArrayRef<Relocation> relocs = sec.relocs ();
1282
+ const bool isPairForGotRels = pairForGotRels (relocs);
1184
1283
for (size_t i = 0 , size = relocs.size (); i != size; ++i) {
1185
1284
Relocation &rel = relocs[i];
1186
1285
uint8_t *loc = buf + rel.offset ;
@@ -1264,6 +1363,24 @@ void LoongArch::relocateAlloc(InputSectionBase &sec, uint8_t *buf) const {
1264
1363
tlsdescToLe (loc, rel, val);
1265
1364
}
1266
1365
continue ;
1366
+ case RE_LOONGARCH_GOT_PAGE_PC:
1367
+ // In LoongArch, we try GOT indirection to PC relative optimization in
1368
+ // normal or medium code model, whether or not with R_LARCH_RELAX
1369
+ // relocation. Moreover, if the original code sequence can be relaxed to a
1370
+ // single instruction `pcaddi`, the first instruction will be removed and
1371
+ // it will not reach here.
1372
+ if (isPairForGotRels && rel.type == R_LARCH_GOT_PC_HI20) {
1373
+ bool isRelax = relaxable (relocs, i);
1374
+ const Relocation lo12Rel = isRelax ? relocs[i + 2 ] : relocs[i + 1 ];
1375
+ if (lo12Rel.type == R_LARCH_GOT_PC_LO12 &&
1376
+ tryGotToPCRel (loc, rel, lo12Rel, secAddr)) {
1377
+ // isRelax: skip relocations R_LARCH_RELAX, R_LARCH_GOT_PC_LO12
1378
+ // !isRelax: skip relocation R_LARCH_GOT_PC_LO12
1379
+ i += isRelax ? 2 : 1 ;
1380
+ continue ;
1381
+ }
1382
+ }
1383
+ break ;
1267
1384
default :
1268
1385
break ;
1269
1386
}
0 commit comments