From 78c958db9cb04697696a1d00fd5df7cf6a1e0ea9 Mon Sep 17 00:00:00 2001 From: snarkmaster Date: Sun, 3 Aug 2025 21:22:32 -0700 Subject: [PATCH] Elide suspension points via [[clang::coro_destroyed_on_suspend]] See `AttrDocs.td` for the user-facing docs. The immediate motivation was that I noticed that short-circuiting coroutines failed to optimize well. My initial demo program is here: https://godbolt.org/z/oMdq8zfov ``` #include #include // `optional_wrapper` exists since `get_return_object()` can't return // `std::optional` directly. C++ coroutines have a fundamental timing mismatch // between when the return object is created and when the value is available: // // 1) Early (coroutine startup): `get_return_object()` is called and must return // something immediately. // 2) Later (when `co_return` executes): `return_value(T)` is called with the // actual value. // 3) Issue: If `get_return_object()` returns the storage, it's empty when // returned, and writing to it later cannot affect the already-returned copy. template struct optional_wrapper { std::optional storage_; std::optional*& pointer_; optional_wrapper(std::optional*& p) : pointer_(p) { pointer_ = &storage_; } operator std::optional() { return std::move(*storage_); } ~optional_wrapper() {} }; // Make `std::optional` a coroutine namespace std { template struct coroutine_traits, Args...> { struct promise_type { optional* storagePtr_ = nullptr; promise_type() = default; ::optional_wrapper get_return_object() { return ::optional_wrapper(storagePtr_); } suspend_never initial_suspend() const noexcept { return {}; } suspend_never final_suspend() const noexcept { return {}; } void return_value(T value) { *storagePtr_ = std::move(value); } void unhandled_exception() { // Leave storage_ empty to represent error } }; }; } // namespace std // Non-coroutine baseline -- compare to the `simple_coro` variants below std::optional simple_func(const std::optional& r) { if (r.has_value()) { return r.value() + 1; } return std::nullopt; // empty optional (error) } // Without `co_await`, this optimizes just like `simple_func`. // Unfortunately, this doesn't short-circuit when `r` is empty. std::optional wrong_simple_coro(const std::optional& r) { co_return r.value() + 2; } // `#if 0` adds a short-circuiting `co_await` that may suspend to // immediately exit the parent coroutine. Unfortunately, this // triggers a coro frame allocation, and pessimizes `simple_coro`. template struct optional_awaitable { std::optional opt_; bool await_ready() const noexcept { return opt_.has_value(); } T await_resume() { return opt_.value(); } template [[clang::coro_destroyed_on_suspend]] void await_suspend( std::coroutine_handle handle) { handle.destroy(); } }; template optional_awaitable operator co_await(std::optional opt) { return {std::move(opt)}; } // Matches the logic of `simple_func`. Before this patch, adding a // "suspend-to-exit" await point forces an allocation. std::optional simple_coro(const std::optional& r) { co_return co_await std::move(r) + 4; } int main() { return simple_coro(std::optional{8}).value() + wrong_simple_coro(std::optional{16}).value() + simple_func(std::optional{32}).value(); } ``` --- clang/docs/ReleaseNotes.rst | 4 + clang/include/clang/Basic/Attr.td | 8 + clang/include/clang/Basic/AttrDocs.td | 60 +++++++ clang/lib/CodeGen/CGCoroutine.cpp | 63 ++++++-- clang/lib/CodeGen/CodeGenFunction.h | 3 +- .../coro-destroyed-on-suspend.cpp | 152 ++++++++++++++++++ ...a-attribute-supported-attributes-list.test | 1 + 7 files changed, 274 insertions(+), 17 deletions(-) create mode 100644 clang/test/CodeGenCoroutines/coro-destroyed-on-suspend.cpp diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 9231f2cae9bfa..3ebf64bfbbe11 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -116,6 +116,10 @@ Removed Compiler Flags Attribute Changes in Clang -------------------------- +- Introduced a new attribute ``[[clang::coro_destroyed_on_suspend]]`` on coroutine ``await_suspend`` + methods to indicate that the coroutine will be destroyed during suspension, enabling optimizations + that skip the suspend point. + Improvements to Clang's diagnostics ----------------------------------- - Added a separate diagnostic group ``-Wfunction-effect-redeclarations``, for the more pedantic diff --git a/clang/include/clang/Basic/Attr.td b/clang/include/clang/Basic/Attr.td index 63fa6aa508827..f0e12ed3b9be3 100644 --- a/clang/include/clang/Basic/Attr.td +++ b/clang/include/clang/Basic/Attr.td @@ -1352,6 +1352,14 @@ def CoroAwaitElidableArgument : InheritableAttr { let SimpleHandler = 1; } +def CoroDestroyedOnSuspend: InheritableAttr { + let Spellings = [Clang<"coro_destroyed_on_suspend">]; + let Subjects = SubjectList<[CXXMethod]>; + let LangOpts = [CPlusPlus]; + let Documentation = [CoroDestroyedOnSuspendDoc]; + let SimpleHandler = 1; +} + // OSObject-based attributes. def OSConsumed : InheritableParamAttr { let Spellings = [Clang<"os_consumed">]; diff --git a/clang/include/clang/Basic/AttrDocs.td b/clang/include/clang/Basic/AttrDocs.td index 326cf0bc842db..7745e08de44cc 100644 --- a/clang/include/clang/Basic/AttrDocs.td +++ b/clang/include/clang/Basic/AttrDocs.td @@ -9244,6 +9244,66 @@ Example: }]; } +def CoroDestroyedOnSuspendDoc : Documentation { + let Category = DocCatDecl; + let Content = [{ +The ``[[clang::coro_destroyed_on_suspend]]`` attribute may be applied to a C++ +``void await_suspend(handle)`` method belonging to a coroutine awaiter object. + +**Warning:** The attribute must only be used when ``await_suspend`` is +guaranteed to call ``handle.destroy()`` before completing. Violating this +invariant **will** cause undefined behavior. Be sure to consider exceptions. + +This attribute improves the optimization of "short-circuiting" coroutines. It +is useful when every suspend point in the coroutine is either (i) trivial +(`std::suspend_never`), or (ii) a `co_await` that can be translated into simple +control flow as: + +.. code-block:: c++ + + if (awaiter.await_ready()) { + val = awaiter.await_resume(); + } else { + awaiter.await_suspend(); + return /* value representing the "execution short-circuited" outcome */ + } + +Adding the attribute permits the compiler to elide the `@llvm.coro.suspend` +intrinsic and go directly to the cleanup/destruction path, avoiding unnecessary +suspend/resume overhead. If all suspension points can be simplified via (i) or +(ii), then the coroutine should be able to be optimized nearly like a plain +function via the `isNoSuspendCoroutine` optimization. The typical wins are: + - no heap allocation + - smaller code size & better optimization + +For example, + +.. code-block:: c++ + + template + struct optional_awaitable { + std::optional opt_; + + bool await_ready() const noexcept { + return opt_.has_value(); + } + + T await_resume() { + return std::move(opt_).value(); + } + + [[clang::coro_destroyed_on_suspend]] + template + void await_suspend(std::coroutine_handle handle) { + // DANGER: If any exit path from a `coro_destroyed_on_suspend`-marked + // `await_suspend` neglects to call `destroy()`, that will cause UB. + handle.destroy(); + } + }; + +}]; +} + def CountedByDocs : Documentation { let Category = DocCatField; let Content = [{ diff --git a/clang/lib/CodeGen/CGCoroutine.cpp b/clang/lib/CodeGen/CGCoroutine.cpp index 827385f9c1a1f..e72c0e28a442e 100644 --- a/clang/lib/CodeGen/CGCoroutine.cpp +++ b/clang/lib/CodeGen/CGCoroutine.cpp @@ -247,8 +247,9 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co auto *NullPtr = llvm::ConstantPointerNull::get(CGF.CGM.Int8PtrTy); auto *SaveCall = Builder.CreateCall(CoroSave, {NullPtr}); + bool SuspendWillDestroyCoro = false; auto SuspendWrapper = CodeGenFunction(CGF.CGM).generateAwaitSuspendWrapper( - CGF.CurFn->getName(), Prefix, S); + CGF.CurFn->getName(), Prefix, S, SuspendWillDestroyCoro); CGF.CurCoro.InSuspendBlock = true; @@ -317,17 +318,24 @@ static LValueOrRValue emitSuspendExpression(CodeGenFunction &CGF, CGCoroData &Co } } - // Emit the suspend point. - const bool IsFinalSuspend = (Kind == AwaitKind::Final); - llvm::Function *CoroSuspend = - CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend); - auto *SuspendResult = Builder.CreateCall( - CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)}); - - // Create a switch capturing three possible continuations. - auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2); - Switch->addCase(Builder.getInt8(0), ReadyBlock); - Switch->addCase(Builder.getInt8(1), CleanupBlock); + // If suspend will destroy coroutine, skip the suspend intrinsic and go + // straight to cleanup + if (SuspendWillDestroyCoro) { + // Skip the suspend point and go directly to cleanup + CGF.EmitBranch(CleanupBlock); + } else { + // Emit the suspend point. + const bool IsFinalSuspend = (Kind == AwaitKind::Final); + llvm::Function *CoroSuspend = + CGF.CGM.getIntrinsic(llvm::Intrinsic::coro_suspend); + auto *SuspendResult = Builder.CreateCall( + CoroSuspend, {SaveCall, Builder.getInt1(IsFinalSuspend)}); + + // Create a switch capturing three possible continuations. + auto *Switch = Builder.CreateSwitch(SuspendResult, Coro.SuspendBB, 2); + Switch->addCase(Builder.getInt8(0), ReadyBlock); + Switch->addCase(Builder.getInt8(1), CleanupBlock); + } // Emit cleanup for this suspend point. CGF.EmitBlock(CleanupBlock); @@ -411,10 +419,9 @@ static QualType getCoroutineSuspendExprReturnType(const ASTContext &Ctx, } #endif -llvm::Function * -CodeGenFunction::generateAwaitSuspendWrapper(Twine const &CoroName, - Twine const &SuspendPointName, - CoroutineSuspendExpr const &S) { +llvm::Function *CodeGenFunction::generateAwaitSuspendWrapper( + Twine const &CoroName, Twine const &SuspendPointName, + CoroutineSuspendExpr const &S, bool &SuspendWillDestroyCoro) { std::string FuncName = (CoroName + ".__await_suspend_wrapper__" + SuspendPointName).str(); @@ -445,6 +452,30 @@ CodeGenFunction::generateAwaitSuspendWrapper(Twine const &CoroName, Fn->setMustProgress(); Fn->addFnAttr(llvm::Attribute::AttrKind::AlwaysInline); + // Is the underlying `await_suspend()` marked `coro_destroyed_on_suspend`? + SuspendWillDestroyCoro = false; + // Only check for the attribute if the suspend expression returns void. When + // a handle is returned, the outermost method call is `.address()`, and + // there's no point in doing more work to obtain the inner `await_suspend`. + if (S.getSuspendReturnType() == CoroutineSuspendExpr::SuspendReturnType::SuspendVoid) { + if (auto *CE = dyn_cast(S.getSuspendExpr()->IgnoreImplicit())) { + if (auto *ME = dyn_cast(CE->getCallee())) { + if (auto *MethodDecl = dyn_cast(ME->getMemberDecl())) { + if (MethodDecl->getNameAsString() == "await_suspend") { + for (const auto *Attr : MethodDecl->attrs()) { + if (Attr->getKind() == attr::Kind::CoroDestroyedOnSuspend) { + SuspendWillDestroyCoro = true; + break; + } + } + } else { + assert(false && "Unexpected method for [[clang::coro_destroyed_on_suspend]]"); + } + } + } + } + } + StartFunction(GlobalDecl(), ReturnTy, Fn, FI, args); // FIXME: add TBAA metadata to the loads diff --git a/clang/lib/CodeGen/CodeGenFunction.h b/clang/lib/CodeGen/CodeGenFunction.h index 6c32c98cec011..ecb5d8e2fcf8c 100644 --- a/clang/lib/CodeGen/CodeGenFunction.h +++ b/clang/lib/CodeGen/CodeGenFunction.h @@ -383,7 +383,8 @@ class CodeGenFunction : public CodeGenTypeCache { // where type is one of (void, i1, ptr) llvm::Function *generateAwaitSuspendWrapper(Twine const &CoroName, Twine const &SuspendPointName, - CoroutineSuspendExpr const &S); + CoroutineSuspendExpr const &S, + bool &SuspendWillDestroyCoro); /// CurGD - The GlobalDecl for the current function being compiled. GlobalDecl CurGD; diff --git a/clang/test/CodeGenCoroutines/coro-destroyed-on-suspend.cpp b/clang/test/CodeGenCoroutines/coro-destroyed-on-suspend.cpp new file mode 100644 index 0000000000000..99505c317e24e --- /dev/null +++ b/clang/test/CodeGenCoroutines/coro-destroyed-on-suspend.cpp @@ -0,0 +1,152 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s \ +// RUN: -disable-llvm-passes | FileCheck %s --check-prefix=CHECK-INITIAL +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm -o - %s \ +// RUN: -O2 | FileCheck %s --check-prefix=CHECK-OPTIMIZED + +#include "Inputs/coroutine.h" + +// Awaitable with `coro_destroyed_on_suspend` attribute +struct DestroyingAwaitable { + bool await_ready() { return false; } + [[clang::coro_destroyed_on_suspend]] + void await_suspend(std::coroutine_handle<> h) { + h.destroy(); + } + void await_resume() {} +}; + +// Awaitable without `coro_destroyed_on_suspend` (normal behavior) +struct NormalAwaitable { + bool await_ready() { return false; } + void await_suspend(std::coroutine_handle<> h) {} + void await_resume() {} +}; + +// Coroutine type with `std::suspend_never` for initial/final suspend +struct Task { + struct promise_type { + Task get_return_object() { return {}; } + std::suspend_never initial_suspend() { return {}; } + std::suspend_never final_suspend() noexcept { return {}; } + void return_void() {} + void unhandled_exception() {} + }; +}; + +// Single co_await with coro_destroyed_on_suspend. +// Should result in no allocation after optimization. +Task test_single_destroying_await() { + co_await DestroyingAwaitable{}; +} + +// CHECK-INITIAL-LABEL: define{{.*}} void @_Z28test_single_destroying_awaitv +// CHECK-INITIAL: call{{.*}} @llvm.coro.alloc +// CHECK-INITIAL: call{{.*}} @llvm.coro.begin + +// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z28test_single_destroying_awaitv +// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc +// CHECK-OPTIMIZED-NOT: call{{.*}} malloc +// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm + +// Test multiple `co_await`s, all with `coro_destroyed_on_suspend`. +// This should also result in no allocation after optimization. +Task test_multiple_destroying_awaits(bool condition) { + co_await DestroyingAwaitable{}; + co_await DestroyingAwaitable{}; + if (condition) { + co_await DestroyingAwaitable{}; + } +} + +// CHECK-INITIAL-LABEL: define{{.*}} void @_Z31test_multiple_destroying_awaitsb +// CHECK-INITIAL: call{{.*}} @llvm.coro.alloc +// CHECK-INITIAL: call{{.*}} @llvm.coro.begin + +// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z31test_multiple_destroying_awaitsb +// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc +// CHECK-OPTIMIZED-NOT: call{{.*}} malloc +// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm + +// Mixed awaits - some with `coro_destroyed_on_suspend`, some without. +// We should still see allocation because not all awaits destroy the coroutine. +Task test_mixed_awaits() { + co_await NormalAwaitable{}; // Must precede "destroy" to be reachable + co_await DestroyingAwaitable{}; +} + +// CHECK-INITIAL-LABEL: define{{.*}} void @_Z17test_mixed_awaitsv +// CHECK-INITIAL: call{{.*}} @llvm.coro.alloc +// CHECK-INITIAL: call{{.*}} @llvm.coro.begin + +// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z17test_mixed_awaitsv +// CHECK-OPTIMIZED: call{{.*}} @_Znwm + + +// Check the attribute detection affects control flow. With +// `coro_destroyed_on_suspend`, the suspend point should be skipped and control +// should go directly to cleanup instead of the normal suspend logic +Task test_attribute_detection() { + co_await DestroyingAwaitable{}; + // Unreachable in OPTIMIZED, so those builds don't see an allocation. + co_await NormalAwaitable{}; +} + +// Check that we skip the normal suspend intrinsic and go directly to cleanup. +// This is the key behavioral difference the attribute enables. +// +// CHECK-INITIAL-LABEL: define{{.*}} void @_Z24test_attribute_detectionv +// CHECK-INITIAL: call{{.*}} @_Z24test_attribute_detectionv.__await_suspend_wrapper__await +// CHECK-NEXT: br label %await.cleanup +// CHECK-INITIAL-NOT: call{{.*}} @llvm.coro.suspend +// CHECK-INITIAL: call{{.*}} @_Z24test_attribute_detectionv.__await_suspend_wrapper__await +// CHECK-INITIAL: call{{.*}} @llvm.coro.suspend +// CHECK-INITIAL: call{{.*}} @_Z24test_attribute_detectionv.__await_suspend_wrapper__final + +// Since `co_await DestroyingAwaitable{}` gets converted into an unconditional +// branch, the `co_await NormalAwaitable{}` is unreachable in optimized builds. +// +// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc +// CHECK-OPTIMIZED-NOT: call{{.*}} malloc +// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm + +// Template awaitable with `coro_destroyed_on_suspend` attribute +template +struct TemplateDestroyingAwaitable { + bool await_ready() { return false; } + + [[clang::coro_destroyed_on_suspend]] + void await_suspend(std::coroutine_handle<> h) { + h.destroy(); + } + + void await_resume() {} +}; + +Task test_template_destroying_await() { + co_await TemplateDestroyingAwaitable{}; +} + +// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z30test_template_destroying_awaitv +// CHECK-OPTIMIZED-NOT: call{{.*}} @llvm.coro.alloc +// CHECK-OPTIMIZED-NOT: call{{.*}} malloc +// CHECK-OPTIMIZED-NOT: call{{.*}} @_Znwm + +struct InvalidDestroyingAwaitable { + bool await_ready() { return false; } + + // The attribute is ignored, since the function doesn't return `void`! + [[clang::coro_destroyed_on_suspend]] + bool await_suspend(std::coroutine_handle<> h) { + h.destroy(); + return true; + } + + void await_resume() {} +}; + +Task test_invalid_destroying_await() { + co_await InvalidDestroyingAwaitable{}; +} + +// CHECK-OPTIMIZED-LABEL: define{{.*}} void @_Z29test_invalid_destroying_awaitv +// CHECK-OPTIMIZED: call{{.*}} @_Znwm diff --git a/clang/test/Misc/pragma-attribute-supported-attributes-list.test b/clang/test/Misc/pragma-attribute-supported-attributes-list.test index 05693538252aa..a02b97869adc9 100644 --- a/clang/test/Misc/pragma-attribute-supported-attributes-list.test +++ b/clang/test/Misc/pragma-attribute-supported-attributes-list.test @@ -62,6 +62,7 @@ // CHECK-NEXT: Convergent (SubjectMatchRule_function) // CHECK-NEXT: CoroAwaitElidable (SubjectMatchRule_record) // CHECK-NEXT: CoroAwaitElidableArgument (SubjectMatchRule_variable_is_parameter) +// CHECK-NEXT: CoroDestroyedOnSuspend (SubjectMatchRule_function_is_member) // CHECK-NEXT: CoroDisableLifetimeBound (SubjectMatchRule_function) // CHECK-NEXT: CoroLifetimeBound (SubjectMatchRule_record) // CHECK-NEXT: CoroOnlyDestroyWhenComplete (SubjectMatchRule_record)