Skip to content

Commit 4593e41

Browse files
committed
AMDGPU: Teach toolchain to link rocm device libs
Currently the library is separately linked, but this isn't correct to implement fast math flags correctly. Each module should get the version of the library appropriate for its combination of fast math and related flags, with the attributes propagated into its functions and internalized. HIP already maintains the list of libraries, but this is not used for OpenCL. Unfortunately, HIP uses a separate --hip-device-lib argument, despite both languages using the same bitcode library. Eventually these two searches need to be merged. An additional problem is there are 3 different locations the libraries are installed, depending on which build is used. This also needs to be consolidated (or at least the search logic needs to deal with this unnecessary complexity).
1 parent f5be71b commit 4593e41

34 files changed

+630
-14
lines changed

clang/include/clang/Basic/DiagnosticDriverKinds.td

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -55,6 +55,14 @@ def err_drv_no_cuda_installation : Error<
5555
def err_drv_no_cuda_libdevice : Error<
5656
"cannot find libdevice for %0. Provide path to different CUDA installation "
5757
"via --cuda-path, or pass -nocudalib to build without linking with libdevice.">;
58+
59+
def err_drv_no_rocm_installation : Error<
60+
"cannot find ROCm installation. Provide its path via --rocm-path, or pass "
61+
"-nogpulib.">;
62+
def err_drv_no_rocm_device_lib : Error<
63+
"cannot find device library for %0. Provide path to different ROCm installation "
64+
"via --rocm-path, or pass -nogpulib to build without linking default libraries.">;
65+
5866
def err_drv_cuda_version_unsupported : Error<
5967
"GPU arch %0 is supported by CUDA versions between %1 and %2 (inclusive), "
6068
"but installation at %3 is %4. Use --cuda-path to specify a different CUDA "

clang/include/clang/Driver/Options.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -608,6 +608,8 @@ def : Flag<["-"], "fno-cuda-rdc">, Alias<fno_gpu_rdc>;
608608
def fcuda_short_ptr : Flag<["-"], "fcuda-short-ptr">, Flags<[CC1Option]>,
609609
HelpText<"Use 32-bit pointers for accessing const/local/shared address spaces.">;
610610
def fno_cuda_short_ptr : Flag<["-"], "fno-cuda-short-ptr">;
611+
def rocm_path_EQ : Joined<["--"], "rocm-path=">, Group<i_Group>,
612+
HelpText<"ROCm installation path">;
611613
def hip_device_lib_path_EQ : Joined<["--"], "hip-device-lib-path=">, Group<Link_Group>,
612614
HelpText<"HIP device library path">;
613615
def hip_device_lib_EQ : Joined<["--"], "hip-device-lib=">, Group<Link_Group>,

clang/lib/Driver/Driver.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4857,6 +4857,8 @@ const ToolChain &Driver::getToolChain(const ArgList &Args,
48574857
TC = std::make_unique<toolchains::Solaris>(*this, Target, Args);
48584858
break;
48594859
case llvm::Triple::AMDHSA:
4860+
TC = std::make_unique<toolchains::ROCMToolChain>(*this, Target, Args);
4861+
break;
48604862
case llvm::Triple::AMDPAL:
48614863
case llvm::Triple::Mesa3D:
48624864
TC = std::make_unique<toolchains::AMDGPUToolChain>(*this, Target, Args);

clang/lib/Driver/ToolChains/AMDGPU.cpp

Lines changed: 250 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,14 +12,171 @@
1212
#include "clang/Driver/Compilation.h"
1313
#include "clang/Driver/DriverDiagnostic.h"
1414
#include "llvm/Option/ArgList.h"
15-
#include "llvm/Support/TargetParser.h"
15+
#include "llvm/Support/Path.h"
16+
#include "llvm/Support/VirtualFileSystem.h"
1617

1718
using namespace clang::driver;
1819
using namespace clang::driver::tools;
1920
using namespace clang::driver::toolchains;
2021
using namespace clang;
2122
using namespace llvm::opt;
2223

24+
RocmInstallationDetector::RocmInstallationDetector(
25+
const Driver &D, const llvm::Triple &HostTriple,
26+
const llvm::opt::ArgList &Args)
27+
: D(D) {
28+
struct Candidate {
29+
std::string Path;
30+
bool StrictChecking;
31+
32+
Candidate(std::string Path, bool StrictChecking = false)
33+
: Path(Path), StrictChecking(StrictChecking) {}
34+
};
35+
36+
SmallVector<Candidate, 4> Candidates;
37+
38+
if (Args.hasArg(clang::driver::options::OPT_rocm_path_EQ)) {
39+
Candidates.emplace_back(
40+
Args.getLastArgValue(clang::driver::options::OPT_rocm_path_EQ).str());
41+
} else {
42+
// Try to find relative to the compiler binary.
43+
const char *InstallDir = D.getInstalledDir();
44+
45+
// Check both a normal Unix prefix position of the clang binary, as well as
46+
// the Windows-esque layout the ROCm packages use with the host architecture
47+
// subdirectory of bin.
48+
49+
StringRef ParentDir = llvm::sys::path::parent_path(InstallDir);
50+
if (ParentDir == HostTriple.getArchName())
51+
ParentDir = llvm::sys::path::parent_path(ParentDir);
52+
53+
if (ParentDir == "bin") {
54+
Candidates.emplace_back(llvm::sys::path::parent_path(ParentDir).str(),
55+
/*StrictChecking=*/true);
56+
}
57+
58+
Candidates.emplace_back(D.SysRoot + "/opt/rocm");
59+
}
60+
61+
bool NoBuiltinLibs = Args.hasArg(options::OPT_nogpulib);
62+
63+
for (const auto &Candidate : Candidates) {
64+
InstallPath = Candidate.Path;
65+
if (InstallPath.empty() || !D.getVFS().exists(InstallPath))
66+
continue;
67+
68+
// FIXME: The install path situation is a real mess.
69+
70+
// For a cmake install, these are placed directly in
71+
// ${INSTALL_PREFIX}/lib
72+
73+
// In the separate OpenCL builds, the bitcode libraries are placed in
74+
// ${OPENCL_ROOT}/lib/x86_64/bitcode/*
75+
76+
// For the rocm installed packages, these are placed at
77+
// /opt/rocm/opencl/lib/x86_64/bitcode
78+
79+
// An additional copy is installed, in scattered locations between
80+
// /opt/rocm/hcc/rocdl/oclc
81+
// /opt/rocm/hcc/rocdl/ockl
82+
// /opt/rocm/hcc/rocdl/lib
83+
//
84+
// Yet another complete set is installed to
85+
// /opt/rocm/hcc/rocdl/lib
86+
87+
// For now just recognize the opencl package layout.
88+
89+
// BinPath = InstallPath + "/bin";
90+
llvm::sys::path::append(IncludePath, InstallPath, "include");
91+
llvm::sys::path::append(LibDevicePath, InstallPath, "lib");
92+
93+
auto &FS = D.getVFS();
94+
95+
// We don't need the include path for OpenCL, since clang already ships with
96+
// the default header.
97+
98+
bool CheckLibDevice = (!NoBuiltinLibs || Candidate.StrictChecking);
99+
if (CheckLibDevice && !FS.exists(LibDevicePath))
100+
continue;
101+
102+
const StringRef Suffix(".amdgcn.bc");
103+
104+
std::error_code EC;
105+
for (llvm::sys::fs::directory_iterator LI(LibDevicePath, EC), LE;
106+
!EC && LI != LE; LI = LI.increment(EC)) {
107+
StringRef FilePath = LI->path();
108+
StringRef FileName = llvm::sys::path::filename(FilePath);
109+
if (!FileName.endswith(Suffix))
110+
continue;
111+
112+
StringRef BaseName = FileName.drop_back(Suffix.size());
113+
114+
if (BaseName == "ocml") {
115+
OCML = FilePath;
116+
} else if (BaseName == "ockl") {
117+
OCKL = FilePath;
118+
} else if (BaseName == "opencl") {
119+
OpenCL = FilePath;
120+
} else if (BaseName == "hip") {
121+
HIP = FilePath;
122+
} else if (BaseName == "oclc_finite_only_off") {
123+
FiniteOnly.Off = FilePath;
124+
} else if (BaseName == "oclc_finite_only_on") {
125+
FiniteOnly.On = FilePath;
126+
} else if (BaseName == "oclc_daz_opt_on") {
127+
DenormalsAreZero.On = FilePath;
128+
} else if (BaseName == "oclc_daz_opt_off") {
129+
DenormalsAreZero.Off = FilePath;
130+
} else if (BaseName == "oclc_correctly_rounded_sqrt_on") {
131+
CorrectlyRoundedSqrt.On = FilePath;
132+
} else if (BaseName == "oclc_correctly_rounded_sqrt_off") {
133+
CorrectlyRoundedSqrt.Off = FilePath;
134+
} else if (BaseName == "oclc_unsafe_math_on") {
135+
UnsafeMath.On = FilePath;
136+
} else if (BaseName == "oclc_unsafe_math_off") {
137+
UnsafeMath.Off = FilePath;
138+
} else if (BaseName == "oclc_wavefrontsize64_on") {
139+
WavefrontSize64.On = FilePath;
140+
} else if (BaseName == "oclc_wavefrontsize64_off") {
141+
WavefrontSize64.Off = FilePath;
142+
} else {
143+
// Process all bitcode filenames that look like
144+
// ocl_isa_version_XXX.amdgcn.bc
145+
const StringRef DeviceLibPrefix = "oclc_isa_version_";
146+
if (!BaseName.startswith(DeviceLibPrefix))
147+
continue;
148+
149+
StringRef IsaVersionNumber =
150+
BaseName.drop_front(DeviceLibPrefix.size());
151+
152+
llvm::Twine GfxName = Twine("gfx") + IsaVersionNumber;
153+
SmallString<8> Tmp;
154+
LibDeviceMap.insert(
155+
std::make_pair(GfxName.toStringRef(Tmp), FilePath.str()));
156+
}
157+
}
158+
159+
if (!NoBuiltinLibs) {
160+
// Check that the required non-target libraries are all available.
161+
if (!allGenericLibsValid())
162+
continue;
163+
164+
// Check that we have found at least one libdevice that we can link in if
165+
// -nobuiltinlib hasn't been specified.
166+
if (LibDeviceMap.empty())
167+
continue;
168+
}
169+
170+
IsValid = true;
171+
break;
172+
}
173+
}
174+
175+
void RocmInstallationDetector::print(raw_ostream &OS) const {
176+
if (isValid())
177+
OS << "Found ROCm installation: " << InstallPath << '\n';
178+
}
179+
23180
void amdgpu::Linker::ConstructJob(Compilation &C, const JobAction &JA,
24181
const InputInfo &Output,
25182
const InputInfoList &Inputs,
@@ -142,6 +299,12 @@ llvm::DenormalMode AMDGPUToolChain::getDefaultDenormalModeForType(
142299
llvm::DenormalMode::getIEEE();
143300
}
144301

302+
/// ROCM Toolchain
303+
ROCMToolChain::ROCMToolChain(const Driver &D, const llvm::Triple &Triple,
304+
const ArgList &Args)
305+
: AMDGPUToolChain(D, Triple, Args),
306+
RocmInstallation(D, Triple, Args) { }
307+
145308
void AMDGPUToolChain::addClangTargetOptions(
146309
const llvm::opt::ArgList &DriverArgs,
147310
llvm::opt::ArgStringList &CC1Args,
@@ -155,3 +318,89 @@ void AMDGPUToolChain::addClangTargetOptions(
155318
CC1Args.push_back("-fapply-global-visibility-to-externs");
156319
}
157320
}
321+
322+
void ROCMToolChain::addClangTargetOptions(
323+
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
324+
Action::OffloadKind DeviceOffloadingKind) const {
325+
AMDGPUToolChain::addClangTargetOptions(DriverArgs, CC1Args,
326+
DeviceOffloadingKind);
327+
328+
if (DriverArgs.hasArg(options::OPT_nogpulib))
329+
return;
330+
331+
if (!RocmInstallation.isValid()) {
332+
getDriver().Diag(diag::err_drv_no_rocm_installation);
333+
return;
334+
}
335+
336+
// Get the device name and canonicalize it
337+
const StringRef GpuArch = DriverArgs.getLastArgValue(options::OPT_mcpu_EQ);
338+
auto Kind = llvm::AMDGPU::parseArchAMDGCN(GpuArch);
339+
const StringRef CanonArch = llvm::AMDGPU::getArchNameAMDGCN(Kind);
340+
std::string LibDeviceFile = RocmInstallation.getLibDeviceFile(CanonArch);
341+
if (LibDeviceFile.empty()) {
342+
getDriver().Diag(diag::err_drv_no_rocm_device_lib) << GpuArch;
343+
return;
344+
}
345+
346+
const unsigned ArchAttr = llvm::AMDGPU::getArchAttrAMDGCN(Kind);
347+
static bool HasWave32 = (ArchAttr & llvm::AMDGPU::FEATURE_WAVE32);
348+
349+
bool Wave64 = !HasWave32 || DriverArgs.hasFlag(
350+
options::OPT_mwavefrontsize64, options::OPT_mno_wavefrontsize64, false);
351+
352+
// TODO: There are way too many flags that change this. Do we need to check
353+
// them all?
354+
bool DAZ = DriverArgs.hasArg(options::OPT_cl_denorms_are_zero) ||
355+
getDefaultDenormsAreZeroForTarget(Kind);
356+
bool FiniteOnly = DriverArgs.hasArg(options::OPT_cl_finite_math_only);
357+
358+
bool UnsafeMathOpt =
359+
DriverArgs.hasArg(options::OPT_cl_unsafe_math_optimizations);
360+
bool FastRelaxedMath = DriverArgs.hasArg(options::OPT_cl_fast_relaxed_math);
361+
bool CorrectSqrt =
362+
DriverArgs.hasArg(options::OPT_cl_fp32_correctly_rounded_divide_sqrt);
363+
364+
// Add the OpenCL specific bitcode library.
365+
CC1Args.push_back("-mlink-builtin-bitcode");
366+
CC1Args.push_back(DriverArgs.MakeArgString(RocmInstallation.getOpenCLPath()));
367+
368+
// Add the generic set of libraries.
369+
RocmInstallation.addCommonBitcodeLibCC1Args(
370+
DriverArgs, CC1Args, LibDeviceFile, Wave64, DAZ, FiniteOnly,
371+
UnsafeMathOpt, FastRelaxedMath, CorrectSqrt);
372+
}
373+
374+
void RocmInstallationDetector::addCommonBitcodeLibCC1Args(
375+
const llvm::opt::ArgList &DriverArgs, llvm::opt::ArgStringList &CC1Args,
376+
StringRef LibDeviceFile, bool Wave64, bool DAZ, bool FiniteOnly,
377+
bool UnsafeMathOpt, bool FastRelaxedMath, bool CorrectSqrt) const {
378+
static const char LinkBitcodeFlag[] = "-mlink-builtin-bitcode";
379+
380+
CC1Args.push_back(LinkBitcodeFlag);
381+
CC1Args.push_back(DriverArgs.MakeArgString(getOCMLPath()));
382+
383+
CC1Args.push_back(LinkBitcodeFlag);
384+
CC1Args.push_back(DriverArgs.MakeArgString(getOCKLPath()));
385+
386+
CC1Args.push_back(LinkBitcodeFlag);
387+
CC1Args.push_back(DriverArgs.MakeArgString(getDenormalsAreZeroPath(DAZ)));
388+
389+
CC1Args.push_back(LinkBitcodeFlag);
390+
CC1Args.push_back(DriverArgs.MakeArgString(
391+
getUnsafeMathPath(UnsafeMathOpt || FastRelaxedMath)));
392+
393+
CC1Args.push_back(LinkBitcodeFlag);
394+
CC1Args.push_back(DriverArgs.MakeArgString(
395+
getFiniteOnlyPath(FiniteOnly || FastRelaxedMath)));
396+
397+
CC1Args.push_back(LinkBitcodeFlag);
398+
CC1Args.push_back(
399+
DriverArgs.MakeArgString(getCorrectlyRoundedSqrtPath(CorrectSqrt)));
400+
401+
CC1Args.push_back(LinkBitcodeFlag);
402+
CC1Args.push_back(DriverArgs.MakeArgString(getWavefrontSize64Path(Wave64)));
403+
404+
CC1Args.push_back(LinkBitcodeFlag);
405+
CC1Args.push_back(DriverArgs.MakeArgString(LibDeviceFile));
406+
}

0 commit comments

Comments
 (0)