[flang][cuda] Generate cuf.allocate for descriptor with CUDA components #152041

clementval · 2025-08-04T22:05:07Z

The descriptor for derived-type with CUDA components are allocated in managed memory. The lowering was calling the standard runtime on allocate statement where it should be a cuf.allocate operation.

llvmbot · 2025-08-04T22:05:40Z

@llvm/pr-subscribers-flang-fir-hlfir

Author: Valentin Clement (バレンタインクレメン) (clementval)

Changes

The descriptor for derived-type with CUDA components are allocated in managed memory. The lowering was calling the standard runtime on allocate statement where it should be a cuf.allocate operation.

Full diff: https://github.com/llvm/llvm-project/pull/152041.diff

5 Files Affected:

(modified) flang/include/flang/Semantics/tools.h (+2)
(modified) flang/lib/Lower/Allocatable.cpp (+5-3)
(modified) flang/lib/Lower/ConvertVariable.cpp (+4-2)
(modified) flang/lib/Semantics/tools.cpp (+15)
(modified) flang/test/Lower/CUDA/cuda-allocatable.cuf (+13)

diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index 317b9357b4c1f..966a30f7081fd 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -223,6 +223,8 @@ inline bool HasCUDAAttr(const Symbol &sym) {
   return false;
 }
 
+bool HasCUDAComponent(const Symbol &sym);
+
 inline bool IsCUDAShared(const Symbol &sym) {
   if (const auto *details{sym.GetUltimate().detailsIf<ObjectEntityDetails>()}) {
     if (details->cudaDataAttr() &&
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 15cd9770b35ba..92ac050c41ae2 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -466,7 +466,9 @@ class AllocateStmtHelper {
 
   void genSimpleAllocation(const Allocation &alloc,
                            const fir::MutableBoxValue &box) {
-    bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol());
+    bool isCudaAllocate =
+        Fortran::semantics::HasCUDAAttr(alloc.getSymbol()) ||
+        Fortran::semantics::HasCUDAComponent(alloc.getSymbol());
     bool isCudaDeviceContext = cuf::isCUDADeviceContext(builder.getRegion());
     bool inlineAllocation = !box.isDerived() && !errorManager.hasStatSpec() &&
                             !alloc.type.IsPolymorphic() &&
@@ -475,7 +477,7 @@ class AllocateStmtHelper {
     unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol());
 
     if (inlineAllocation &&
-        ((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) {
+        ((isCudaAllocate && isCudaDeviceContext) || !isCudaAllocate)) {
       // Pointers must use PointerAllocate so that their deallocations
       // can be validated.
       genInlinedAllocation(alloc, box);
@@ -494,7 +496,7 @@ class AllocateStmtHelper {
     genSetDeferredLengthParameters(alloc, box);
     genAllocateObjectBounds(alloc, box);
     mlir::Value stat;
-    if (!isCudaSymbol) {
+    if (!isCudaAllocate) {
       stat = genRuntimeAllocate(builder, loc, box, errorManager);
       setPinnedToFalse();
     } else {
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 647bd0d079985..b71f6519c39d9 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -814,8 +814,10 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
         baseTy = boxTy.getEleTy();
       baseTy = fir::unwrapRefType(baseTy);
 
-      if (mlir::isa<fir::SequenceType>(baseTy))
-        TODO(loc, "array of derived-type with device component");
+      if (mlir::isa<fir::SequenceType>(baseTy) &&
+          (fir::isAllocatableType(fir::getBase(exv).getType()) ||
+           fir::isPointerType(fir::getBase(exv).getType())))
+        return; // Allocator index need to be set after allocation.
 
       auto recTy =
           mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp
index 5a5b02e1ac3ce..913bf08cd0d99 100644
--- a/flang/lib/Semantics/tools.cpp
+++ b/flang/lib/Semantics/tools.cpp
@@ -1094,6 +1094,21 @@ bool IsDeviceAllocatable(const Symbol &symbol) {
   return false;
 }
 
+bool HasCUDAComponent(const Symbol &symbol) {
+  if (const auto *details{symbol.GetUltimate()
+              .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
+    const Fortran::semantics::DeclTypeSpec *type{details->type()};
+    const Fortran::semantics::DerivedTypeSpec *derived{
+        type ? type->AsDerived() : nullptr};
+    if (derived) {
+      if (FindCUDADeviceAllocatableUltimateComponent(*derived)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 UltimateComponentIterator::const_iterator
 FindCUDADeviceAllocatableUltimateComponent(const DerivedTypeSpec &derived) {
   UltimateComponentIterator ultimates{derived};
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 36e768bd7d92c..2cf8c7d336812 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -6,6 +6,10 @@ module globals
   real, device, allocatable :: a_device(:)
   real, managed, allocatable :: a_managed(:)
   real, pinned, allocatable :: a_pinned(:)
+  type :: t1
+    integer :: a
+    real, dimension(:), allocatable, device :: b
+  end type
 end module
 
 ! CHECK-LABEL: fir.global @_QMglobalsEa_device {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xf32>>>
@@ -222,3 +226,12 @@ end
 ! CHECK: %[[FALSE:.*]] = arith.constant false
 ! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4>
 ! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref<!fir.logical<4>>
+
+subroutine cuda_component()
+  use globals
+  type(t1), pointer, dimension(:) :: d
+  allocate(d(10))
+end subroutine
+
+! CHECK-LABEL: func.func @_QPcuda_component()
+! CHECK: cuf.allocate

llvmbot · 2025-08-04T22:05:41Z

@llvm/pr-subscribers-flang-semantics

Author: Valentin Clement (バレンタインクレメン) (clementval)

Changes

The descriptor for derived-type with CUDA components are allocated in managed memory. The lowering was calling the standard runtime on allocate statement where it should be a cuf.allocate operation.

Full diff: https://github.com/llvm/llvm-project/pull/152041.diff

5 Files Affected:

(modified) flang/include/flang/Semantics/tools.h (+2)
(modified) flang/lib/Lower/Allocatable.cpp (+5-3)
(modified) flang/lib/Lower/ConvertVariable.cpp (+4-2)
(modified) flang/lib/Semantics/tools.cpp (+15)
(modified) flang/test/Lower/CUDA/cuda-allocatable.cuf (+13)

diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index 317b9357b4c1f..966a30f7081fd 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -223,6 +223,8 @@ inline bool HasCUDAAttr(const Symbol &sym) {
   return false;
 }
 
+bool HasCUDAComponent(const Symbol &sym);
+
 inline bool IsCUDAShared(const Symbol &sym) {
   if (const auto *details{sym.GetUltimate().detailsIf<ObjectEntityDetails>()}) {
     if (details->cudaDataAttr() &&
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 15cd9770b35ba..92ac050c41ae2 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -466,7 +466,9 @@ class AllocateStmtHelper {
 
   void genSimpleAllocation(const Allocation &alloc,
                            const fir::MutableBoxValue &box) {
-    bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol());
+    bool isCudaAllocate =
+        Fortran::semantics::HasCUDAAttr(alloc.getSymbol()) ||
+        Fortran::semantics::HasCUDAComponent(alloc.getSymbol());
     bool isCudaDeviceContext = cuf::isCUDADeviceContext(builder.getRegion());
     bool inlineAllocation = !box.isDerived() && !errorManager.hasStatSpec() &&
                             !alloc.type.IsPolymorphic() &&
@@ -475,7 +477,7 @@ class AllocateStmtHelper {
     unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol());
 
     if (inlineAllocation &&
-        ((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) {
+        ((isCudaAllocate && isCudaDeviceContext) || !isCudaAllocate)) {
       // Pointers must use PointerAllocate so that their deallocations
       // can be validated.
       genInlinedAllocation(alloc, box);
@@ -494,7 +496,7 @@ class AllocateStmtHelper {
     genSetDeferredLengthParameters(alloc, box);
     genAllocateObjectBounds(alloc, box);
     mlir::Value stat;
-    if (!isCudaSymbol) {
+    if (!isCudaAllocate) {
       stat = genRuntimeAllocate(builder, loc, box, errorManager);
       setPinnedToFalse();
     } else {
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 647bd0d079985..b71f6519c39d9 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -814,8 +814,10 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
         baseTy = boxTy.getEleTy();
       baseTy = fir::unwrapRefType(baseTy);
 
-      if (mlir::isa<fir::SequenceType>(baseTy))
-        TODO(loc, "array of derived-type with device component");
+      if (mlir::isa<fir::SequenceType>(baseTy) &&
+          (fir::isAllocatableType(fir::getBase(exv).getType()) ||
+           fir::isPointerType(fir::getBase(exv).getType())))
+        return; // Allocator index need to be set after allocation.
 
       auto recTy =
           mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp
index 5a5b02e1ac3ce..913bf08cd0d99 100644
--- a/flang/lib/Semantics/tools.cpp
+++ b/flang/lib/Semantics/tools.cpp
@@ -1094,6 +1094,21 @@ bool IsDeviceAllocatable(const Symbol &symbol) {
   return false;
 }
 
+bool HasCUDAComponent(const Symbol &symbol) {
+  if (const auto *details{symbol.GetUltimate()
+              .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
+    const Fortran::semantics::DeclTypeSpec *type{details->type()};
+    const Fortran::semantics::DerivedTypeSpec *derived{
+        type ? type->AsDerived() : nullptr};
+    if (derived) {
+      if (FindCUDADeviceAllocatableUltimateComponent(*derived)) {
+        return true;
+      }
+    }
+  }
+  return false;
+}
+
 UltimateComponentIterator::const_iterator
 FindCUDADeviceAllocatableUltimateComponent(const DerivedTypeSpec &derived) {
   UltimateComponentIterator ultimates{derived};
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 36e768bd7d92c..2cf8c7d336812 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -6,6 +6,10 @@ module globals
   real, device, allocatable :: a_device(:)
   real, managed, allocatable :: a_managed(:)
   real, pinned, allocatable :: a_pinned(:)
+  type :: t1
+    integer :: a
+    real, dimension(:), allocatable, device :: b
+  end type
 end module
 
 ! CHECK-LABEL: fir.global @_QMglobalsEa_device {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xf32>>>
@@ -222,3 +226,12 @@ end
 ! CHECK: %[[FALSE:.*]] = arith.constant false
 ! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4>
 ! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref<!fir.logical<4>>
+
+subroutine cuda_component()
+  use globals
+  type(t1), pointer, dimension(:) :: d
+  allocate(d(10))
+end subroutine
+
+! CHECK-LABEL: func.func @_QPcuda_component()
+! CHECK: cuf.allocate

razvanlupusoru

Thank you!

[flang][cuda] Generate cuf.allocate for descriptor with CUDA components

889c1c8

clementval requested review from razvanlupusoru and vzakhari August 4, 2025 22:05

llvmbot added flang Flang issues not falling into any other category flang:fir-hlfir flang:semantics labels Aug 4, 2025

razvanlupusoru approved these changes Aug 4, 2025

View reviewed changes

vzakhari approved these changes Aug 4, 2025

View reviewed changes

clementval merged commit 9b195dc into llvm:main Aug 4, 2025
13 checks passed

clementval deleted the cuf_allocate_components branch August 4, 2025 23:51

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[flang][cuda] Generate cuf.allocate for descriptor with CUDA components #152041

[flang][cuda] Generate cuf.allocate for descriptor with CUDA components #152041

clementval commented Aug 4, 2025

Uh oh!

llvmbot commented Aug 4, 2025

Uh oh!

llvmbot commented Aug 4, 2025

Uh oh!

razvanlupusoru left a comment

Uh oh!

Uh oh!

Uh oh!

[flang][cuda] Generate cuf.allocate for descriptor with CUDA components #152041

[flang][cuda] Generate cuf.allocate for descriptor with CUDA components #152041

Conversation

clementval commented Aug 4, 2025

Uh oh!

llvmbot commented Aug 4, 2025

Uh oh!

llvmbot commented Aug 4, 2025

Uh oh!

razvanlupusoru left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

Uh oh!