Skip to content

Commit dcc71f2

Browse files
authored
[LLD][COFF] Add support for ARM64X same-address thunks (#151255)
Fixes MSVC CRT thread-local constructors support on hybrid ARM64X targets. `-arm64xsameaddress` is an undocumented option that ensures the specified function has the same address in both native and EC views of hybrid images. To achieve this, the linker emits additional thunks and replaces the symbols of those functions with the thunk symbol (the same thunk is used in both views). The thunk code jumps to the native function (similar to range extension thunks), but additional ARM64X relocations are emitted to replace the target with the EC function in the EC view. MSVC appears to generate thunks even for non-hybrid ARM64EC images. As a side effect, the native symbol is pulled in. Since this is used in the CRT for thread-local constructors, it results in the image containing unnecessary native code. Because these thunks do not appear to be useful in that context, we limit this behavior to actual hybrid targets. This may change if compatibility requires it. The tricky part is that thunks should be skipped if the symbol is not live in either view, and symbol replacement must be reflected in weak aliases. This requires thunk generation to happen before resolving weak aliases but after the GC pass. To enable this, the `markLive` call was moved earlier, and the final weak alias resolution was postponed until afterward. This requires more code to be aware of weak aliases, which previously could assume they were already resolved.
1 parent 8c9863e commit dcc71f2

File tree

12 files changed

+278
-50
lines changed

12 files changed

+278
-50
lines changed

lld/COFF/Chunks.cpp

Lines changed: 15 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -875,6 +875,19 @@ void RangeExtensionThunkARM64::writeTo(uint8_t *buf) const {
875875
applyArm64Imm(buf + 4, target->getRVA() & 0xfff, 0);
876876
}
877877

878+
void SameAddressThunkARM64EC::setDynamicRelocs(COFFLinkerContext &ctx) const {
879+
// Add ARM64X relocations replacing adrp/add instructions with a version using
880+
// the hybrid target.
881+
RangeExtensionThunkARM64 hybridView(ARM64EC, hybridTarget);
882+
uint8_t buf[sizeof(arm64Thunk)];
883+
hybridView.setRVA(rva);
884+
hybridView.writeTo(buf);
885+
uint32_t addrp = *reinterpret_cast<ulittle32_t *>(buf);
886+
uint32_t add = *reinterpret_cast<ulittle32_t *>(buf + sizeof(uint32_t));
887+
ctx.dynamicRelocs->set(this, addrp);
888+
ctx.dynamicRelocs->set(Arm64XRelocVal(this, sizeof(uint32_t)), add);
889+
}
890+
878891
LocalImportChunk::LocalImportChunk(COFFLinkerContext &c, Defined *s)
879892
: sym(s), ctx(c) {
880893
setAlignment(ctx.config.wordsize);
@@ -1258,7 +1271,8 @@ void DynamicRelocsChunk::finalize() {
12581271
}
12591272

12601273
// Set the reloc value. The reloc entry must be allocated beforehand.
1261-
void DynamicRelocsChunk::set(uint32_t rva, Arm64XRelocVal value) {
1274+
void DynamicRelocsChunk::set(Arm64XRelocVal offset, Arm64XRelocVal value) {
1275+
uint32_t rva = offset.get();
12621276
auto entry =
12631277
llvm::find_if(arm64xRelocs, [rva](const Arm64XDynamicRelocEntry &e) {
12641278
return e.offset.get() == rva;

lld/COFF/Chunks.h

Lines changed: 28 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -193,6 +193,8 @@ class NonSectionChunk : public Chunk {
193193
// allowed ranges. Return the additional space required for the extension.
194194
virtual uint32_t extendRanges() { return 0; };
195195

196+
virtual Defined *getEntryThunk() const { return nullptr; };
197+
196198
static bool classof(const Chunk *c) { return c->kind() >= OtherKind; }
197199

198200
protected:
@@ -633,7 +635,7 @@ class ImportThunkChunkARM64EC : public ImportThunkChunk {
633635
bool verifyRanges() override;
634636
uint32_t extendRanges() override;
635637

636-
Defined *exitThunk;
638+
Defined *exitThunk = nullptr;
637639
Defined *sym = nullptr;
638640
bool extended = false;
639641

@@ -675,6 +677,26 @@ class RangeExtensionThunkARM64 : public NonSectionCodeChunk {
675677
MachineTypes machine;
676678
};
677679

680+
// A chunk used to guarantee the same address for a function in both views of
681+
// a hybrid image. Similar to RangeExtensionThunkARM64 chunks, it calls the
682+
// target symbol using a BR instruction. It also contains an entry thunk for EC
683+
// compatibility and additional ARM64X relocations that swap targets between
684+
// views.
685+
class SameAddressThunkARM64EC : public RangeExtensionThunkARM64 {
686+
public:
687+
explicit SameAddressThunkARM64EC(Defined *t, Defined *hybridTarget,
688+
Defined *entryThunk)
689+
: RangeExtensionThunkARM64(ARM64EC, t), hybridTarget(hybridTarget),
690+
entryThunk(entryThunk) {}
691+
692+
Defined *getEntryThunk() const override { return entryThunk; }
693+
void setDynamicRelocs(COFFLinkerContext &ctx) const;
694+
695+
private:
696+
Defined *hybridTarget;
697+
Defined *entryThunk;
698+
};
699+
678700
// Windows-specific.
679701
// See comments for DefinedLocalImport class.
680702
class LocalImportChunk : public NonSectionChunk {
@@ -843,13 +865,13 @@ class Arm64XRelocVal {
843865
public:
844866
Arm64XRelocVal(uint64_t value = 0) : value(value) {}
845867
Arm64XRelocVal(Defined *sym, int32_t offset = 0) : sym(sym), value(offset) {}
846-
Arm64XRelocVal(Chunk *chunk, int32_t offset = 0)
868+
Arm64XRelocVal(const Chunk *chunk, int32_t offset = 0)
847869
: chunk(chunk), value(offset) {}
848870
uint64_t get() const;
849871

850872
private:
851873
Defined *sym = nullptr;
852-
Chunk *chunk = nullptr;
874+
const Chunk *chunk = nullptr;
853875
uint64_t value;
854876
};
855877

@@ -884,7 +906,7 @@ class DynamicRelocsChunk : public NonSectionChunk {
884906
arm64xRelocs.emplace_back(type, size, offset, value);
885907
}
886908

887-
void set(uint32_t rva, Arm64XRelocVal value);
909+
void set(Arm64XRelocVal offset, Arm64XRelocVal value);
888910

889911
private:
890912
std::vector<Arm64XDynamicRelocEntry> arm64xRelocs;
@@ -940,6 +962,8 @@ inline bool Chunk::isHotPatchable() const {
940962
inline Defined *Chunk::getEntryThunk() const {
941963
if (auto *c = dyn_cast<const SectionChunkEC>(this))
942964
return c->entryThunk;
965+
if (auto *c = dyn_cast<const NonSectionChunk>(this))
966+
return c->getEntryThunk();
943967
return nullptr;
944968
}
945969

lld/COFF/Config.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -223,6 +223,9 @@ struct Configuration {
223223
StringRef manifestUIAccess = "'false'";
224224
StringRef manifestFile;
225225

226+
// used for /arm64xsameaddress
227+
std::vector<std::pair<Symbol *, Symbol *>> sameAddresses;
228+
226229
// used for /dwodir
227230
StringRef dwoDir;
228231

lld/COFF/Driver.cpp

Lines changed: 45 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -500,7 +500,9 @@ void LinkerDriver::parseDirectives(InputFile *file) {
500500
file->symtab.parseAlternateName(arg->getValue());
501501
break;
502502
case OPT_arm64xsameaddress:
503-
if (!file->symtab.isEC())
503+
if (file->symtab.isEC())
504+
parseSameAddress(arg->getValue());
505+
else
504506
Warn(ctx) << arg->getSpelling()
505507
<< " is not allowed in non-ARM64EC files (" << toString(file)
506508
<< ")";
@@ -2295,6 +2297,13 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
22952297
args.filtered(OPT_dependentloadflag, OPT_dependentloadflag_opt))
22962298
parseDependentLoadFlags(arg);
22972299

2300+
for (auto *arg : args.filtered(OPT_arm64xsameaddress)) {
2301+
if (ctx.hybridSymtab)
2302+
parseSameAddress(arg->getValue());
2303+
else
2304+
Warn(ctx) << arg->getSpelling() << " is allowed only on EC targets";
2305+
}
2306+
22982307
if (tar) {
22992308
llvm::TimeTraceScope timeScope("Reproducer: response file");
23002309
tar->append(
@@ -2668,12 +2677,46 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
26682677
createECExportThunks();
26692678

26702679
// Resolve remaining undefined symbols and warn about imported locals.
2680+
std::vector<Undefined *> aliases;
26712681
ctx.forEachSymtab(
2672-
[&](SymbolTable &symtab) { symtab.resolveRemainingUndefines(); });
2682+
[&](SymbolTable &symtab) { symtab.resolveRemainingUndefines(aliases); });
26732683

26742684
if (errorCount())
26752685
return;
26762686

2687+
ctx.forEachActiveSymtab([](SymbolTable &symtab) {
2688+
symtab.initializeECThunks();
2689+
symtab.initializeLoadConfig();
2690+
});
2691+
2692+
// Identify unreferenced COMDAT sections.
2693+
if (config->doGC) {
2694+
if (config->mingw) {
2695+
// markLive doesn't traverse .eh_frame, but the personality function is
2696+
// only reached that way. The proper solution would be to parse and
2697+
// traverse the .eh_frame section, like the ELF linker does.
2698+
// For now, just manually try to retain the known possible personality
2699+
// functions. This doesn't bring in more object files, but only marks
2700+
// functions that already have been included to be retained.
2701+
ctx.forEachSymtab([&](SymbolTable &symtab) {
2702+
for (const char *n : {"__gxx_personality_v0", "__gcc_personality_v0",
2703+
"rust_eh_personality"}) {
2704+
Defined *d = dyn_cast_or_null<Defined>(symtab.findUnderscore(n));
2705+
if (d && !d->isGCRoot) {
2706+
d->isGCRoot = true;
2707+
config->gcroot.push_back(d);
2708+
}
2709+
}
2710+
});
2711+
}
2712+
2713+
markLive(ctx);
2714+
}
2715+
2716+
ctx.symtab.initializeSameAddressThunks();
2717+
for (auto alias : aliases)
2718+
alias->resolveWeakAlias();
2719+
26772720
if (config->mingw) {
26782721
// Make sure the crtend.o object is the last object file. This object
26792722
// file can contain terminating section chunks that need to be placed
@@ -2765,35 +2808,6 @@ void LinkerDriver::linkerMain(ArrayRef<const char *> argsArr) {
27652808
if (auto *arg = args.getLastArg(OPT_print_symbol_order))
27662809
config->printSymbolOrder = arg->getValue();
27672810

2768-
if (ctx.symtab.isEC())
2769-
ctx.symtab.initializeECThunks();
2770-
ctx.forEachActiveSymtab(
2771-
[](SymbolTable &symtab) { symtab.initializeLoadConfig(); });
2772-
2773-
// Identify unreferenced COMDAT sections.
2774-
if (config->doGC) {
2775-
if (config->mingw) {
2776-
// markLive doesn't traverse .eh_frame, but the personality function is
2777-
// only reached that way. The proper solution would be to parse and
2778-
// traverse the .eh_frame section, like the ELF linker does.
2779-
// For now, just manually try to retain the known possible personality
2780-
// functions. This doesn't bring in more object files, but only marks
2781-
// functions that already have been included to be retained.
2782-
ctx.forEachSymtab([&](SymbolTable &symtab) {
2783-
for (const char *n : {"__gxx_personality_v0", "__gcc_personality_v0",
2784-
"rust_eh_personality"}) {
2785-
Defined *d = dyn_cast_or_null<Defined>(symtab.findUnderscore(n));
2786-
if (d && !d->isGCRoot) {
2787-
d->isGCRoot = true;
2788-
config->gcroot.push_back(d);
2789-
}
2790-
}
2791-
});
2792-
}
2793-
2794-
markLive(ctx);
2795-
}
2796-
27972811
// Needs to happen after the last call to addFile().
27982812
convertResources();
27992813

lld/COFF/Driver.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -214,6 +214,8 @@ class LinkerDriver {
214214
void parsePDBPageSize(StringRef);
215215
void parseSection(StringRef);
216216

217+
void parseSameAddress(StringRef);
218+
217219
// Parses a MS-DOS stub file
218220
void parseDosStub(StringRef path);
219221

lld/COFF/DriverUtils.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -328,6 +328,22 @@ void LinkerDriver::parseSwaprun(StringRef arg) {
328328
} while (!arg.empty());
329329
}
330330

331+
void LinkerDriver::parseSameAddress(StringRef arg) {
332+
auto mangledName = getArm64ECMangledFunctionName(arg);
333+
Symbol *sym = ctx.symtab.addUndefined(mangledName ? *mangledName : arg);
334+
335+
// MSVC appears to generate thunks even for non-hybrid ARM64EC images.
336+
// As a side effect, the native symbol is pulled in. Since this is used
337+
// in the CRT for thread-local constructors, it results in the image
338+
// containing unnecessary native code. As these thunks don't appear to
339+
// be useful, we limit this behavior to actual hybrid targets. This may
340+
// change if compatibility becomes necessary.
341+
if (ctx.config.machine != ARM64X)
342+
return;
343+
Symbol *nativeSym = ctx.hybridSymtab->addUndefined(arg);
344+
ctx.config.sameAddresses.emplace_back(sym, nativeSym);
345+
}
346+
331347
// An RAII temporary file class that automatically removes a temporary file.
332348
namespace {
333349
class TemporaryFile {

lld/COFF/MarkLive.cpp

Lines changed: 4 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -49,7 +49,10 @@ void markLive(COFFLinkerContext &ctx) {
4949
addSym(file->impchkThunk->exitThunk);
5050
};
5151

52-
addSym = [&](Symbol *b) {
52+
addSym = [&](Symbol *s) {
53+
Defined *b = s->getDefined();
54+
if (!b)
55+
return;
5356
if (auto *sym = dyn_cast<DefinedRegular>(b)) {
5457
enqueue(sym->getChunk());
5558
} else if (auto *sym = dyn_cast<DefinedImportData>(b)) {

lld/COFF/Options.td

Lines changed: 3 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,9 @@ multiclass B_priv<string name> {
3131
def align : P<"align", "Section alignment">;
3232
def aligncomm : P<"aligncomm", "Set common symbol alignment">;
3333
def alternatename : P<"alternatename", "Define weak alias">;
34+
def arm64xsameaddress
35+
: P<"arm64xsameaddress", "Generate a thunk for the symbol with the same "
36+
"address in both native and EC views on ARM64X.">;
3437
def base : P<"base", "Base address of the program">;
3538
def color_diagnostics: Flag<["--"], "color-diagnostics">,
3639
HelpText<"Alias for --color-diagnostics=always">;
@@ -373,4 +376,3 @@ def tlbid : P_priv<"tlbid">;
373376
def tlbout : P_priv<"tlbout">;
374377
def verbose_all : P_priv<"verbose">;
375378
def guardsym : P_priv<"guardsym">;
376-
def arm64xsameaddress : P_priv<"arm64xsameaddress">;

lld/COFF/SymbolTable.cpp

Lines changed: 37 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -452,7 +452,7 @@ void SymbolTable::reportUnresolvable() {
452452
reportProblemSymbols(undefs, /*localImports=*/nullptr, true);
453453
}
454454

455-
void SymbolTable::resolveRemainingUndefines() {
455+
void SymbolTable::resolveRemainingUndefines(std::vector<Undefined *> &aliases) {
456456
llvm::TimeTraceScope timeScope("Resolve remaining undefined symbols");
457457
SmallPtrSet<Symbol *, 8> undefs;
458458
DenseMap<Symbol *, Symbol *> localImports;
@@ -468,8 +468,10 @@ void SymbolTable::resolveRemainingUndefines() {
468468
StringRef name = undef->getName();
469469

470470
// A weak alias may have been resolved, so check for that.
471-
if (undef->resolveWeakAlias())
471+
if (undef->getWeakAlias()) {
472+
aliases.push_back(undef);
472473
continue;
474+
}
473475

474476
// If we can resolve a symbol by removing __imp_ prefix, do that.
475477
// This odd rule is for compatibility with MSVC linker.
@@ -620,10 +622,10 @@ void SymbolTable::initializeECThunks() {
620622
return;
621623

622624
for (auto it : entryThunks) {
623-
auto *to = dyn_cast<Defined>(it.second);
625+
Defined *to = it.second->getDefined();
624626
if (!to)
625627
continue;
626-
auto *from = dyn_cast<DefinedRegular>(it.first);
628+
auto *from = dyn_cast_or_null<DefinedRegular>(it.first->getDefined());
627629
// We need to be able to add padding to the function and fill it with an
628630
// offset to its entry thunks. To ensure that padding the function is
629631
// feasible, functions are required to be COMDAT symbols with no offset.
@@ -642,7 +644,8 @@ void SymbolTable::initializeECThunks() {
642644
Symbol *sym = exitThunks.lookup(file->thunkSym);
643645
if (!sym)
644646
sym = exitThunks.lookup(file->impECSym);
645-
file->impchkThunk->exitThunk = dyn_cast_or_null<Defined>(sym);
647+
if (sym)
648+
file->impchkThunk->exitThunk = sym->getDefined();
646649
}
647650

648651
// On ARM64EC, the __imp_ symbol references the auxiliary IAT, while the
@@ -659,6 +662,35 @@ void SymbolTable::initializeECThunks() {
659662
});
660663
}
661664

665+
void SymbolTable::initializeSameAddressThunks() {
666+
for (auto iter : ctx.config.sameAddresses) {
667+
auto sym = dyn_cast_or_null<DefinedRegular>(iter.first->getDefined());
668+
if (!sym || !sym->isLive())
669+
continue;
670+
auto nativeSym =
671+
dyn_cast_or_null<DefinedRegular>(iter.second->getDefined());
672+
if (!nativeSym || !nativeSym->isLive())
673+
continue;
674+
Defined *entryThunk = sym->getChunk()->getEntryThunk();
675+
if (!entryThunk)
676+
continue;
677+
678+
// Replace symbols with symbols referencing the thunk. Store the original
679+
// symbol as equivalent DefinedSynthetic instances for use in the thunk
680+
// itself.
681+
auto symClone = make<DefinedSynthetic>(sym->getName(), sym->getChunk(),
682+
sym->getValue());
683+
auto nativeSymClone = make<DefinedSynthetic>(
684+
nativeSym->getName(), nativeSym->getChunk(), nativeSym->getValue());
685+
SameAddressThunkARM64EC *thunk =
686+
make<SameAddressThunkARM64EC>(nativeSymClone, symClone, entryThunk);
687+
sameAddressThunks.push_back(thunk);
688+
689+
replaceSymbol<DefinedSynthetic>(sym, sym->getName(), thunk);
690+
replaceSymbol<DefinedSynthetic>(nativeSym, nativeSym->getName(), thunk);
691+
}
692+
}
693+
662694
Symbol *SymbolTable::addUndefined(StringRef name, InputFile *f,
663695
bool overrideLazy) {
664696
auto [s, wasInserted] = insert(name, f);

0 commit comments

Comments
 (0)