Skip to content

Commit 098ea94

Browse files
committed
[CUDA] Use --image3 to construct fat binary
CUDA-12.9 has removed fatbinary tool's `--image` argument we've been using till now. --image3 has been supported since cuda-9, so we do not need CUDA SDK version checks.
1 parent 149d4b5 commit 098ea94

File tree

3 files changed

+30
-36
lines changed

3 files changed

+30
-36
lines changed

clang/lib/Driver/ToolChains/Cuda.cpp

Lines changed: 7 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -549,22 +549,16 @@ void NVPTX::FatBinary::ConstructJob(Compilation &C, const JobAction &JA,
549549
auto *A = II.getAction();
550550
assert(A->getInputs().size() == 1 &&
551551
"Device offload action is expected to have a single input");
552-
const char *gpu_arch_str = A->getOffloadingArch();
553-
assert(gpu_arch_str &&
552+
StringRef GpuArch = A->getOffloadingArch();
553+
assert(!GpuArch.empty() &&
554554
"Device action expected to have associated a GPU architecture!");
555-
OffloadArch gpu_arch = StringToOffloadArch(gpu_arch_str);
556555

557-
if (II.getType() == types::TY_PP_Asm &&
558-
!shouldIncludePTX(Args, gpu_arch_str))
556+
if (II.getType() == types::TY_PP_Asm && !shouldIncludePTX(Args, GpuArch))
559557
continue;
560-
// We need to pass an Arch of the form "sm_XX" for cubin files and
561-
// "compute_XX" for ptx.
562-
const char *Arch = (II.getType() == types::TY_PP_Asm)
563-
? OffloadArchToVirtualArchString(gpu_arch)
564-
: gpu_arch_str;
565-
CmdArgs.push_back(
566-
Args.MakeArgString(llvm::Twine("--image=profile=") + Arch +
567-
",file=" + getToolChain().getInputFilename(II)));
558+
StringRef Kind = (II.getType() == types::TY_PP_Asm) ? "ptx" : "elf";
559+
CmdArgs.push_back(Args.MakeArgString(
560+
"--image3=kind=" + Kind + ",sm=" + GpuArch.drop_front(3) +
561+
",file=" + getToolChain().getInputFilename(II)));
568562
}
569563

570564
for (const auto &A : Args.getAllArgValues(options::OPT_Xcuda_fatbinary))

clang/test/Driver/cuda-arch-translation.cu

Lines changed: 13 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -68,19 +68,19 @@
6868

6969
// HIP: clang-offload-bundler
7070

71-
// SM20:--image=profile=sm_20{{.*}}
72-
// SM21:--image=profile=sm_21{{.*}}
73-
// SM30:--image=profile=sm_30{{.*}}
74-
// SM32:--image=profile=sm_32{{.*}}
75-
// SM35:--image=profile=sm_35{{.*}}
76-
// SM37:--image=profile=sm_37{{.*}}
77-
// SM50:--image=profile=sm_50{{.*}}
78-
// SM52:--image=profile=sm_52{{.*}}
79-
// SM53:--image=profile=sm_53{{.*}}
80-
// SM60:--image=profile=sm_60{{.*}}
81-
// SM61:--image=profile=sm_61{{.*}}
82-
// SM62:--image=profile=sm_62{{.*}}
83-
// SM70:--image=profile=sm_70{{.*}}
71+
// SM20:--image3=kind=elf,sm=20{{.*}}
72+
// SM21:--image3=kind=elf,sm=21{{.*}}
73+
// SM30:--image3=kind=elf,sm=30{{.*}}
74+
// SM32:--image3=kind=elf,sm=32{{.*}}
75+
// SM35:--image3=kind=elf,sm=35{{.*}}
76+
// SM37:--image3=kind=elf,sm=37{{.*}}
77+
// SM50:--image3=kind=elf,sm=50{{.*}}
78+
// SM52:--image3=kind=elf,sm=52{{.*}}
79+
// SM53:--image3=kind=elf,sm=53{{.*}}
80+
// SM60:--image3=kind=elf,sm=60{{.*}}
81+
// SM61:--image3=kind=elf,sm=61{{.*}}
82+
// SM62:--image3=kind=elf,sm=62{{.*}}
83+
// SM70:--image3=kind=elf,sm=70{{.*}}
8484
// GFX600:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx600
8585
// GFX601:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx601
8686
// GFX602:-targets=host-x86_64-unknown-linux-gnu,hipv4-amdgcn-amd-amdhsa--gfx602

clang/test/Driver/cuda-options.cu

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -243,10 +243,10 @@
243243

244244
// INCLUDES-DEVICE:fatbinary
245245
// INCLUDES-DEVICE-DAG: "--create" "[[FATBINARY:[^"]*]]"
246-
// INCLUDES-DEVICE-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE]]"
247-
// INCLUDES-DEVICE-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE]]"
248-
// INCLUDES-DEVICE2-DAG: "--image=profile=sm_{{[0-9]+}},file=[[CUBINFILE2]]"
249-
// INCLUDES-DEVICE2-DAG: "--image=profile=compute_{{[0-9]+}},file=[[PTXFILE2]]"
246+
// INCLUDES-DEVICE-DAG: "--image3=kind=elf,sm={{[0-9]+}},file=[[CUBINFILE]]"
247+
// INCLUDES-DEVICE-DAG: "--image3=kind=ptx,sm={{[0-9]+}},file=[[PTXFILE]]"
248+
// INCLUDES-DEVICE2-DAG: "--image3=kind=elf,sm={{[0-9]+}},file=[[CUBINFILE2]]"
249+
// INCLUDES-DEVICE2-DAG: "--image3=kind=ptx,sm={{[0-9]+}},file=[[PTXFILE2]]"
250250

251251
// Match host-side preprocessor job with -save-temps.
252252
// HOST-SAVE: "-cc1" "-triple" "x86_64-unknown-linux-gnu"
@@ -288,9 +288,9 @@
288288

289289
// FATBIN-COMMON:fatbinary
290290
// FATBIN-COMMON: "--create" "[[FATBINARY:[^"]*]]"
291-
// FATBIN-COMMON: "--image=profile=sm_52,file=
292-
// PTX-SM52: "--image=profile=compute_52,file=
293-
// NOPTX-SM52-NOT: "--image=profile=compute_52,file=
294-
// FATBIN-COMMON: "--image=profile=sm_60,file=
295-
// PTX-SM60: "--image=profile=compute_60,file=
296-
// NOPTX-SM60-NOT: "--image=profile=compute_60,file=
291+
// FATBIN-COMMON: "--image3=kind=elf,sm=52,file=
292+
// PTX-SM52: "--image3=kind=ptx,sm=52,file=
293+
// NOPTX-SM52-NOT: "--image3=kind=ptx,sm=52,file=
294+
// FATBIN-COMMON: "--image3=kind=elf,sm=60,file=
295+
// PTX-SM60: "--image3=kind=ptx,sm=60,file=
296+
// NOPTX-SM60-NOT: "--image3=kind=ptx,sm=60,file=

0 commit comments

Comments
 (0)