-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[CUDA] Use --image3 to construct fat binary #151760
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-clang @llvm/pr-subscribers-clang-driver Author: Artem Belevich (Artem-B) ChangesCUDA-12.9 has removed fatbinary tool's --image3 has been supported since cuda-9, so we do not need CUDA SDK version checks. Full diff: https://github.com/llvm/llvm-project/pull/151760.diff 3 Files Affected:
diff --git a/clang/lib/Driver/ToolChains/Cuda.cpp b/clang/lib/Driver/ToolChains/Cuda.cpp
index 1f0b478c02b25..fdfcea852b4f2 100644
--- a/clang/lib/Driver/ToolChains/Cuda.cpp
+++ b/clang/lib/Driver/ToolChains/Cuda.cpp
@@ -549,22 +549,16 @@ void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
auto *A = II.getAction();
assert(A->getInputs().size() == 1 &&
"Device offload action is expected to have a single input");
- const char *gpu_arch_str = A->getOffloadingArch();
- assert(gpu_arch_str &&
+ StringRef GpuArch = A->getOffloadingArch();
+ assert(!GpuArch.empty() &&
"Device action expected to have associated a GPU architecture!");
- OffloadArch gpu_arch = StringToOffloadArch(gpu_arch_str);
- if (II.getType() == types::TY_PP_Asm &&
- !shouldIncludePTX(Args, gpu_arch_str))
+ if (II.getType() == types::TY_PP_Asm && !shouldIncludePTX(Args, GpuArch))
continue;
- // We need to pass an Arch of the form "sm_XX" for cubin files and
- // "compute_XX" for ptx.
- const char *Arch = (II.getType() == types::TY_PP_Asm)
- ? OffloadArchToVirtualArchString(gpu_arch)
- : gpu_arch_str;
- CmdArgs.push_back(
- Args.MakeArgString(llvm::Twine("--image=profile=") + Arch +
- ",file=" + getToolChain().getInputFilename(II)));
+ StringRef Kind = (II.getType() == types::TY_PP_Asm) ? "ptx" : "elf";
+ CmdArgs.push_back(Args.MakeArgString(
+ "--image3=kind=" + Kind + ",sm=" + GpuArch.drop_front(3) +
+ ",file=" + getToolChain().getInputFilename(II)));
}
for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))
diff --git a/clang/test/Driver/cuda-arch-translation.cu b/clang/test/Driver/cuda-arch-translation.cu
index e4f83740a92eb..b4a521df2e6f5 100644
--- a/clang/test/Driver/cuda-arch-translation.cu
+++ b/clang/test/Driver/cuda-arch-translation.cu
@@ -68,19 +68,19 @@
// HIP: clang-offload-bundler
-// SM20:--image=profile=sm_20{{.*}}
-// SM21:--image=profile=sm_21{{.*}}
-// SM30:--image=profile=sm_30{{.*}}
-// SM32:--image=profile=sm_32{{.*}}
-// SM35:--image=profile=sm_35{{.*}}
-// SM37:--image=profile=sm_37{{.*}}
-// SM50:--image=profile=sm_50{{.*}}
-// SM52:--image=profile=sm_52{{.*}}
-// SM53:--image=profile=sm_53{{.*}}
-// SM60:--image=profile=sm_60{{.*}}
-// SM61:--image=profile=sm_61{{.*}}
-// SM62:--image=profile=sm_62{{.*}}
-// SM70:--image=profile=sm_70{{.*}}
+// SM20:--image3=kind=elf,sm=20{{.*}}
+// SM21:--image3=kind=elf,sm=21{{.*}}
+// SM30:--image3=kind=elf,sm=30{{.*}}
+// SM32:--image3=kind=elf,sm=32{{.*}}
+// SM35:--image3=kind=elf,sm=35{{.*}}
+// SM37:--image3=kind=elf,sm=37{{.*}}
+// SM50:--image3=kind=elf,sm=50{{.*}}
+// SM52:--image3=kind=elf,sm=52{{.*}}
+// SM53:--image3=kind=elf,sm=53{{.*}}
+// SM60:--image3=kind=elf,sm=60{{.*}}
+// SM61:--image3=kind=elf,sm=61{{.*}}
+// SM62:--image3=kind=elf,sm=62{{.*}}
+// SM70:--image3=kind=elf,sm=70{{.*}}
// GFX600:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx600
// GFX601:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx601
// GFX602:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx602
diff --git a/clang/test/Driver/cuda-options.cu b/clang/test/Driver/cuda-options.cu
index db6536ca9e03b..fc8e83a2bb279 100644
--- a/clang/test/Driver/cuda-options.cu
+++ b/clang/test/Driver/cuda-options.cu
@@ -243,10 +243,10 @@
// INCLUDES-DEVICE:fatbinary
// INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]"
-// INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]"
-// INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]"
-// INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]"
-// INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]"
+// INCLUDES-DEVICE-DAG: "--image3=kind=elf,sm={{[0-9]+}},file=[[CUBINFILE]]"
+// INCLUDES-DEVICE-DAG: "--image3=kind=ptx,sm={{[0-9]+}},file=[[PTXFILE]]"
+// INCLUDES-DEVICE2-DAG: "--image3=kind=elf,sm={{[0-9]+}},file=[[CUBINFILE2]]"
+// INCLUDES-DEVICE2-DAG: "--image3=kind=ptx,sm={{[0-9]+}},file=[[PTXFILE2]]"
// Match host-side preprocessor job with -save-temps.
// HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
@@ -288,9 +288,9 @@
// FATBIN-COMMON:fatbinary
// FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]"
-// FATBIN-COMMON: "--image=profile=sm_52,file=
-// PTX-SM52: "--image=profile=compute_52,file=
-// NOPTX-SM52-NOT: "--image=profile=compute_52,file=
-// FATBIN-COMMON: "--image=profile=sm_60,file=
-// PTX-SM60: "--image=profile=compute_60,file=
-// NOPTX-SM60-NOT: "--image=profile=compute_60,file=
+// FATBIN-COMMON: "--image3=kind=elf,sm=52,file=
+// PTX-SM52: "--image3=kind=ptx,sm=52,file=
+// NOPTX-SM52-NOT: "--image3=kind=ptx,sm=52,file=
+// FATBIN-COMMON: "--image3=kind=elf,sm=60,file=
+// PTX-SM60: "--image3=kind=ptx,sm=60,file=
+// NOPTX-SM60-NOT: "--image3=kind=ptx,sm=60,file=
|
CUDA-12.9 has removed fatbinary tool's `--image` argument we've been using till now. --image3 has been supported since cuda-9, so we do not need CUDA SDK version checks.
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/59/builds/21980 Here is the relevant piece of the build log for the reference
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/46/builds/21078 Here is the relevant piece of the build log for the reference
|
CUDA-12.9 has removed fatbinary tool's
--image
argument we've been using till now.--image3 has been supported since cuda-9, so we do not need CUDA SDK version checks.