-
Notifications
You must be signed in to change notification settings - Fork 14.7k
[flang][cuda] Generate cuf.allocate for descriptor with CUDA components #152041
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
@llvm/pr-subscribers-flang-fir-hlfir Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesThe descriptor for derived-type with CUDA components are allocated in managed memory. The lowering was calling the standard runtime on allocate statement where it should be a Full diff: https://github.com/llvm/llvm-project/pull/152041.diff 5 Files Affected:
diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index 317b9357b4c1f..966a30f7081fd 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -223,6 +223,8 @@ inline bool HasCUDAAttr(const Symbol &sym) {
return false;
}
+bool HasCUDAComponent(const Symbol &sym);
+
inline bool IsCUDAShared(const Symbol &sym) {
if (const auto *details{sym.GetUltimate().detailsIf<ObjectEntityDetails>()}) {
if (details->cudaDataAttr() &&
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 15cd9770b35ba..92ac050c41ae2 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -466,7 +466,9 @@ class AllocateStmtHelper {
void genSimpleAllocation(const Allocation &alloc,
const fir::MutableBoxValue &box) {
- bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol());
+ bool isCudaAllocate =
+ Fortran::semantics::HasCUDAAttr(alloc.getSymbol()) ||
+ Fortran::semantics::HasCUDAComponent(alloc.getSymbol());
bool isCudaDeviceContext = cuf::isCUDADeviceContext(builder.getRegion());
bool inlineAllocation = !box.isDerived() && !errorManager.hasStatSpec() &&
!alloc.type.IsPolymorphic() &&
@@ -475,7 +477,7 @@ class AllocateStmtHelper {
unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol());
if (inlineAllocation &&
- ((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) {
+ ((isCudaAllocate && isCudaDeviceContext) || !isCudaAllocate)) {
// Pointers must use PointerAllocate so that their deallocations
// can be validated.
genInlinedAllocation(alloc, box);
@@ -494,7 +496,7 @@ class AllocateStmtHelper {
genSetDeferredLengthParameters(alloc, box);
genAllocateObjectBounds(alloc, box);
mlir::Value stat;
- if (!isCudaSymbol) {
+ if (!isCudaAllocate) {
stat = genRuntimeAllocate(builder, loc, box, errorManager);
setPinnedToFalse();
} else {
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 647bd0d079985..b71f6519c39d9 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -814,8 +814,10 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
baseTy = boxTy.getEleTy();
baseTy = fir::unwrapRefType(baseTy);
- if (mlir::isa<fir::SequenceType>(baseTy))
- TODO(loc, "array of derived-type with device component");
+ if (mlir::isa<fir::SequenceType>(baseTy) &&
+ (fir::isAllocatableType(fir::getBase(exv).getType()) ||
+ fir::isPointerType(fir::getBase(exv).getType())))
+ return; // Allocator index need to be set after allocation.
auto recTy =
mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp
index 5a5b02e1ac3ce..913bf08cd0d99 100644
--- a/flang/lib/Semantics/tools.cpp
+++ b/flang/lib/Semantics/tools.cpp
@@ -1094,6 +1094,21 @@ bool IsDeviceAllocatable(const Symbol &symbol) {
return false;
}
+bool HasCUDAComponent(const Symbol &symbol) {
+ if (const auto *details{symbol.GetUltimate()
+ .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
+ const Fortran::semantics::DeclTypeSpec *type{details->type()};
+ const Fortran::semantics::DerivedTypeSpec *derived{
+ type ? type->AsDerived() : nullptr};
+ if (derived) {
+ if (FindCUDADeviceAllocatableUltimateComponent(*derived)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
UltimateComponentIterator::const_iterator
FindCUDADeviceAllocatableUltimateComponent(const DerivedTypeSpec &derived) {
UltimateComponentIterator ultimates{derived};
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 36e768bd7d92c..2cf8c7d336812 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -6,6 +6,10 @@ module globals
real, device, allocatable :: a_device(:)
real, managed, allocatable :: a_managed(:)
real, pinned, allocatable :: a_pinned(:)
+ type :: t1
+ integer :: a
+ real, dimension(:), allocatable, device :: b
+ end type
end module
! CHECK-LABEL: fir.global @_QMglobalsEa_device {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xf32>>>
@@ -222,3 +226,12 @@ end
! CHECK: %[[FALSE:.*]] = arith.constant false
! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref<!fir.logical<4>>
+
+subroutine cuda_component()
+ use globals
+ type(t1), pointer, dimension(:) :: d
+ allocate(d(10))
+end subroutine
+
+! CHECK-LABEL: func.func @_QPcuda_component()
+! CHECK: cuf.allocate
|
@llvm/pr-subscribers-flang-semantics Author: Valentin Clement (バレンタイン クレメン) (clementval) ChangesThe descriptor for derived-type with CUDA components are allocated in managed memory. The lowering was calling the standard runtime on allocate statement where it should be a Full diff: https://github.com/llvm/llvm-project/pull/152041.diff 5 Files Affected:
diff --git a/flang/include/flang/Semantics/tools.h b/flang/include/flang/Semantics/tools.h
index 317b9357b4c1f..966a30f7081fd 100644
--- a/flang/include/flang/Semantics/tools.h
+++ b/flang/include/flang/Semantics/tools.h
@@ -223,6 +223,8 @@ inline bool HasCUDAAttr(const Symbol &sym) {
return false;
}
+bool HasCUDAComponent(const Symbol &sym);
+
inline bool IsCUDAShared(const Symbol &sym) {
if (const auto *details{sym.GetUltimate().detailsIf<ObjectEntityDetails>()}) {
if (details->cudaDataAttr() &&
diff --git a/flang/lib/Lower/Allocatable.cpp b/flang/lib/Lower/Allocatable.cpp
index 15cd9770b35ba..92ac050c41ae2 100644
--- a/flang/lib/Lower/Allocatable.cpp
+++ b/flang/lib/Lower/Allocatable.cpp
@@ -466,7 +466,9 @@ class AllocateStmtHelper {
void genSimpleAllocation(const Allocation &alloc,
const fir::MutableBoxValue &box) {
- bool isCudaSymbol = Fortran::semantics::HasCUDAAttr(alloc.getSymbol());
+ bool isCudaAllocate =
+ Fortran::semantics::HasCUDAAttr(alloc.getSymbol()) ||
+ Fortran::semantics::HasCUDAComponent(alloc.getSymbol());
bool isCudaDeviceContext = cuf::isCUDADeviceContext(builder.getRegion());
bool inlineAllocation = !box.isDerived() && !errorManager.hasStatSpec() &&
!alloc.type.IsPolymorphic() &&
@@ -475,7 +477,7 @@ class AllocateStmtHelper {
unsigned allocatorIdx = Fortran::lower::getAllocatorIdx(alloc.getSymbol());
if (inlineAllocation &&
- ((isCudaSymbol && isCudaDeviceContext) || !isCudaSymbol)) {
+ ((isCudaAllocate && isCudaDeviceContext) || !isCudaAllocate)) {
// Pointers must use PointerAllocate so that their deallocations
// can be validated.
genInlinedAllocation(alloc, box);
@@ -494,7 +496,7 @@ class AllocateStmtHelper {
genSetDeferredLengthParameters(alloc, box);
genAllocateObjectBounds(alloc, box);
mlir::Value stat;
- if (!isCudaSymbol) {
+ if (!isCudaAllocate) {
stat = genRuntimeAllocate(builder, loc, box, errorManager);
setPinnedToFalse();
} else {
diff --git a/flang/lib/Lower/ConvertVariable.cpp b/flang/lib/Lower/ConvertVariable.cpp
index 647bd0d079985..b71f6519c39d9 100644
--- a/flang/lib/Lower/ConvertVariable.cpp
+++ b/flang/lib/Lower/ConvertVariable.cpp
@@ -814,8 +814,10 @@ initializeDeviceComponentAllocator(Fortran::lower::AbstractConverter &converter,
baseTy = boxTy.getEleTy();
baseTy = fir::unwrapRefType(baseTy);
- if (mlir::isa<fir::SequenceType>(baseTy))
- TODO(loc, "array of derived-type with device component");
+ if (mlir::isa<fir::SequenceType>(baseTy) &&
+ (fir::isAllocatableType(fir::getBase(exv).getType()) ||
+ fir::isPointerType(fir::getBase(exv).getType())))
+ return; // Allocator index need to be set after allocation.
auto recTy =
mlir::dyn_cast<fir::RecordType>(fir::unwrapSequenceType(baseTy));
diff --git a/flang/lib/Semantics/tools.cpp b/flang/lib/Semantics/tools.cpp
index 5a5b02e1ac3ce..913bf08cd0d99 100644
--- a/flang/lib/Semantics/tools.cpp
+++ b/flang/lib/Semantics/tools.cpp
@@ -1094,6 +1094,21 @@ bool IsDeviceAllocatable(const Symbol &symbol) {
return false;
}
+bool HasCUDAComponent(const Symbol &symbol) {
+ if (const auto *details{symbol.GetUltimate()
+ .detailsIf<Fortran::semantics::ObjectEntityDetails>()}) {
+ const Fortran::semantics::DeclTypeSpec *type{details->type()};
+ const Fortran::semantics::DerivedTypeSpec *derived{
+ type ? type->AsDerived() : nullptr};
+ if (derived) {
+ if (FindCUDADeviceAllocatableUltimateComponent(*derived)) {
+ return true;
+ }
+ }
+ }
+ return false;
+}
+
UltimateComponentIterator::const_iterator
FindCUDADeviceAllocatableUltimateComponent(const DerivedTypeSpec &derived) {
UltimateComponentIterator ultimates{derived};
diff --git a/flang/test/Lower/CUDA/cuda-allocatable.cuf b/flang/test/Lower/CUDA/cuda-allocatable.cuf
index 36e768bd7d92c..2cf8c7d336812 100644
--- a/flang/test/Lower/CUDA/cuda-allocatable.cuf
+++ b/flang/test/Lower/CUDA/cuda-allocatable.cuf
@@ -6,6 +6,10 @@ module globals
real, device, allocatable :: a_device(:)
real, managed, allocatable :: a_managed(:)
real, pinned, allocatable :: a_pinned(:)
+ type :: t1
+ integer :: a
+ real, dimension(:), allocatable, device :: b
+ end type
end module
! CHECK-LABEL: fir.global @_QMglobalsEa_device {data_attr = #cuf.cuda<device>} : !fir.box<!fir.heap<!fir.array<?xf32>>>
@@ -222,3 +226,12 @@ end
! CHECK: %[[FALSE:.*]] = arith.constant false
! CHECK: %[[FLASE_CONV:.*]] = fir.convert %[[FALSE]] : (i1) -> !fir.logical<4>
! CHECK: fir.store %[[FLASE_CONV]] to %[[PLOG_DECL]]#0 : !fir.ref<!fir.logical<4>>
+
+subroutine cuda_component()
+ use globals
+ type(t1), pointer, dimension(:) :: d
+ allocate(d(10))
+end subroutine
+
+! CHECK-LABEL: func.func @_QPcuda_component()
+! CHECK: cuf.allocate
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thank you!
The descriptor for derived-type with CUDA components are allocated in managed memory. The lowering was calling the standard runtime on allocate statement where it should be a
cuf.allocate
operation.