Skip to content

Commit d8660fa

Browse files
author
Justin Lebar
committed
[NVPTX] Implement __nvvm_atom_add_gen_d builtin.
Summary: This just seems to have been an oversight. We already supported the f64 atomic add with an explicit scope (e.g. "cta"), but not the scopeless version. Reviewers: tra Subscribers: jholewinski, sanjoy, cfe-commits, llvm-commits, hiraditya Differential Revision: https://reviews.llvm.org/D39638 git-svn-id: https://llvm.org/svn/llvm-project/llvm/trunk@317623 91177308-0d34-0410-b5e6-96231b3b80d8
1 parent 595a448 commit d8660fa

File tree

4 files changed

+39
-1
lines changed

4 files changed

+39
-1
lines changed

include/llvm/IR/IntrinsicsNVVM.td

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -683,10 +683,15 @@ let TargetPrefix = "nvvm" in {
683683
Intrinsic<[llvm_i64_ty], [llvm_double_ty], [IntrNoMem]>;
684684

685685

686-
// Atomic not available as an llvm intrinsic.
686+
// Atomics not available as llvm intrinsics.
687687
def int_nvvm_atomic_load_add_f32 : Intrinsic<[llvm_float_ty],
688688
[LLVMAnyPointerType<llvm_float_ty>, llvm_float_ty],
689689
[IntrArgMemOnly, NoCapture<0>]>;
690+
// Atomic add of f64 requires sm_60.
691+
def int_nvvm_atomic_load_add_f64 : Intrinsic<[llvm_double_ty],
692+
[LLVMAnyPointerType<llvm_double_ty>, llvm_double_ty],
693+
[IntrArgMemOnly, NoCapture<0>]>;
694+
690695
def int_nvvm_atomic_load_inc_32 : Intrinsic<[llvm_i32_ty],
691696
[LLVMAnyPointerType<llvm_i32_ty>, llvm_i32_ty],
692697
[IntrArgMemOnly, NoCapture<0>]>;

lib/Target/NVPTX/NVPTXISelLowering.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3449,6 +3449,7 @@ bool NVPTXTargetLowering::getTgtMemIntrinsic(
34493449
}
34503450

34513451
case Intrinsic::nvvm_atomic_load_add_f32:
3452+
case Intrinsic::nvvm_atomic_load_add_f64:
34523453
case Intrinsic::nvvm_atomic_load_inc_32:
34533454
case Intrinsic::nvvm_atomic_load_dec_32:
34543455

lib/Target/NVPTX/NVPTXIntrinsics.td

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1095,6 +1095,12 @@ def atomic_load_add_f32_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
10951095
(int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
10961096
def atomic_load_add_f32_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
10971097
(int_nvvm_atomic_load_add_f32 node:$a, node:$b)>;
1098+
def atomic_load_add_f64_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),
1099+
(int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
1100+
def atomic_load_add_f64_s: ATOMIC_SHARED_CHK<(ops node:$a, node:$b),
1101+
(int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
1102+
def atomic_load_add_f64_gen: ATOMIC_GENERIC_CHK<(ops node:$a, node:$b),
1103+
(int_nvvm_atomic_load_add_f64 node:$a, node:$b)>;
10981104

10991105
defm INT_PTX_ATOM_ADD_G_32 : F_ATOMIC_2<Int32Regs, ".global", ".u32", ".add",
11001106
atomic_load_add_32_g, i32imm, imm, hasAtomRedG32>;
@@ -1121,6 +1127,13 @@ defm INT_PTX_ATOM_ADD_S_F32 : F_ATOMIC_2<Float32Regs, ".shared", ".f32", ".add",
11211127
defm INT_PTX_ATOM_ADD_GEN_F32 : F_ATOMIC_2<Float32Regs, "", ".f32", ".add",
11221128
atomic_load_add_f32_gen, f32imm, fpimm, hasAtomAddF32>;
11231129

1130+
defm INT_PTX_ATOM_ADD_G_F64 : F_ATOMIC_2<Float64Regs, ".global", ".f64", ".add",
1131+
atomic_load_add_f64_g, f64imm, fpimm, hasAtomAddF64>;
1132+
defm INT_PTX_ATOM_ADD_S_F64 : F_ATOMIC_2<Float64Regs, ".shared", ".f64", ".add",
1133+
atomic_load_add_f64_s, f64imm, fpimm, hasAtomAddF64>;
1134+
defm INT_PTX_ATOM_ADD_GEN_F64 : F_ATOMIC_2<Float64Regs, "", ".f64", ".add",
1135+
atomic_load_add_f64_gen, f64imm, fpimm, hasAtomAddF64>;
1136+
11241137
// atom_sub
11251138

11261139
def atomic_load_sub_32_g: ATOMIC_GLOBAL_CHK<(ops node:$a, node:$b),

test/CodeGen/NVPTX/atomics-sm60.ll

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
; RUN: llc < %s -march=nvptx -mcpu=sm_60 | FileCheck %s
2+
; RUN: llc < %s -march=nvptx64 -mcpu=sm_60 | FileCheck %s
3+
4+
; CHECK-LABEL .func test(
5+
define void @test(double* %dp0, double addrspace(1)* %dp1, double addrspace(3)* %dp3, double %d) {
6+
; CHECK: atom.add.f64
7+
%r1 = call double @llvm.nvvm.atomic.load.add.f64.p0f64(double* %dp0, double %d)
8+
; CHECK: atom.global.add.f64
9+
%r2 = call double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* %dp1, double %d)
10+
; CHECK: atom.shared.add.f64
11+
%ret = call double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* %dp3, double %d)
12+
ret void
13+
}
14+
15+
declare double @llvm.nvvm.atomic.load.add.f64.p0f64(double* nocapture, double) #1
16+
declare double @llvm.nvvm.atomic.load.add.f64.p1f64(double addrspace(1)* nocapture, double) #1
17+
declare double @llvm.nvvm.atomic.load.add.f64.p3f64(double addrspace(3)* nocapture, double) #1
18+
19+
attributes #1 = { argmemonly nounwind }

0 commit comments

Comments
 (0)