From 33cebb2a734b2bb7c82f329dfa40c8f013b9e428 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Sat, 2 Aug 2025 22:33:04 +1000 Subject: [PATCH 1/2] coverage: Extract HIR-related helper code out of the main module --- .../src/coverage/hir_info.rs | 125 +++++++++++++++ .../src/coverage/mappings.rs | 2 +- .../rustc_mir_transform/src/coverage/mod.rs | 145 ++---------------- .../rustc_mir_transform/src/coverage/spans.rs | 3 +- 4 files changed, 139 insertions(+), 136 deletions(-) create mode 100644 compiler/rustc_mir_transform/src/coverage/hir_info.rs diff --git a/compiler/rustc_mir_transform/src/coverage/hir_info.rs b/compiler/rustc_mir_transform/src/coverage/hir_info.rs new file mode 100644 index 0000000000000..7d720159295bc --- /dev/null +++ b/compiler/rustc_mir_transform/src/coverage/hir_info.rs @@ -0,0 +1,125 @@ +use rustc_hir as hir; +use rustc_hir::intravisit::{Visitor, walk_expr}; +use rustc_middle::hir::nested_filter; +use rustc_middle::ty::TyCtxt; +use rustc_span::Span; +use rustc_span::def_id::LocalDefId; + +/// Function information extracted from HIR by the coverage instrumentor. +#[derive(Debug)] +pub(crate) struct ExtractedHirInfo { + pub(crate) function_source_hash: u64, + pub(crate) is_async_fn: bool, + /// The span of the function's signature, if available. + /// Must have the same context and filename as the body span. + pub(crate) fn_sig_span: Option, + pub(crate) body_span: Span, + /// "Holes" are regions within the function body (or its expansions) that + /// should not be included in coverage spans for this function + /// (e.g. closures and nested items). + pub(crate) hole_spans: Vec, +} + +pub(crate) fn extract_hir_info<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId) -> ExtractedHirInfo { + // FIXME(#79625): Consider improving MIR to provide the information needed, to avoid going back + // to HIR for it. + + // HACK: For synthetic MIR bodies (async closures), use the def id of the HIR body. + if tcx.is_synthetic_mir(def_id) { + return extract_hir_info(tcx, tcx.local_parent(def_id)); + } + + let hir_node = tcx.hir_node_by_def_id(def_id); + let fn_body_id = hir_node.body_id().expect("HIR node is a function with body"); + let hir_body = tcx.hir_body(fn_body_id); + + let maybe_fn_sig = hir_node.fn_sig(); + let is_async_fn = maybe_fn_sig.is_some_and(|fn_sig| fn_sig.header.is_async()); + + let mut body_span = hir_body.value.span; + + use hir::{Closure, Expr, ExprKind, Node}; + // Unexpand a closure's body span back to the context of its declaration. + // This helps with closure bodies that consist of just a single bang-macro, + // and also with closure bodies produced by async desugaring. + if let Node::Expr(&Expr { kind: ExprKind::Closure(&Closure { fn_decl_span, .. }), .. }) = + hir_node + { + body_span = body_span.find_ancestor_in_same_ctxt(fn_decl_span).unwrap_or(body_span); + } + + // The actual signature span is only used if it has the same context and + // filename as the body, and precedes the body. + let fn_sig_span = maybe_fn_sig.map(|fn_sig| fn_sig.span).filter(|&fn_sig_span| { + let source_map = tcx.sess.source_map(); + let file_idx = |span: Span| source_map.lookup_source_file_idx(span.lo()); + + fn_sig_span.eq_ctxt(body_span) + && fn_sig_span.hi() <= body_span.lo() + && file_idx(fn_sig_span) == file_idx(body_span) + }); + + let function_source_hash = hash_mir_source(tcx, hir_body); + + let hole_spans = extract_hole_spans_from_hir(tcx, hir_body); + + ExtractedHirInfo { function_source_hash, is_async_fn, fn_sig_span, body_span, hole_spans } +} + +fn hash_mir_source<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &'tcx hir::Body<'tcx>) -> u64 { + // FIXME(cjgillot) Stop hashing HIR manually here. + let owner = hir_body.id().hir_id.owner; + tcx.hir_owner_nodes(owner).opt_hash_including_bodies.unwrap().to_smaller_hash().as_u64() +} + +fn extract_hole_spans_from_hir<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &hir::Body<'tcx>) -> Vec { + struct HolesVisitor<'tcx> { + tcx: TyCtxt<'tcx>, + hole_spans: Vec, + } + + impl<'tcx> Visitor<'tcx> for HolesVisitor<'tcx> { + /// We have special handling for nested items, but we still want to + /// traverse into nested bodies of things that are not considered items, + /// such as "anon consts" (e.g. array lengths). + type NestedFilter = nested_filter::OnlyBodies; + + fn maybe_tcx(&mut self) -> TyCtxt<'tcx> { + self.tcx + } + + /// We override `visit_nested_item` instead of `visit_item` because we + /// only need the item's span, not the item itself. + fn visit_nested_item(&mut self, id: hir::ItemId) -> Self::Result { + let span = self.tcx.def_span(id.owner_id.def_id); + self.visit_hole_span(span); + // Having visited this item, we don't care about its children, + // so don't call `walk_item`. + } + + // We override `visit_expr` instead of the more specific expression + // visitors, so that we have direct access to the expression span. + fn visit_expr(&mut self, expr: &'tcx hir::Expr<'tcx>) { + match expr.kind { + hir::ExprKind::Closure(_) | hir::ExprKind::ConstBlock(_) => { + self.visit_hole_span(expr.span); + // Having visited this expression, we don't care about its + // children, so don't call `walk_expr`. + } + + // For other expressions, recursively visit as normal. + _ => walk_expr(self, expr), + } + } + } + impl HolesVisitor<'_> { + fn visit_hole_span(&mut self, hole_span: Span) { + self.hole_spans.push(hole_span); + } + } + + let mut visitor = HolesVisitor { tcx, hole_spans: vec![] }; + + visitor.visit_body(hir_body); + visitor.hole_spans +} diff --git a/compiler/rustc_mir_transform/src/coverage/mappings.rs b/compiler/rustc_mir_transform/src/coverage/mappings.rs index b4b4d0416fb99..244849358a17f 100644 --- a/compiler/rustc_mir_transform/src/coverage/mappings.rs +++ b/compiler/rustc_mir_transform/src/coverage/mappings.rs @@ -9,8 +9,8 @@ use rustc_middle::mir::{self, BasicBlock, StatementKind}; use rustc_middle::ty::TyCtxt; use rustc_span::Span; -use crate::coverage::ExtractedHirInfo; use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph, START_BCB}; +use crate::coverage::hir_info::ExtractedHirInfo; use crate::coverage::spans::extract_refined_covspans; use crate::coverage::unexpand::unexpand_into_body_span; use crate::errors::MCDCExceedsTestVectorLimit; diff --git a/compiler/rustc_mir_transform/src/coverage/mod.rs b/compiler/rustc_mir_transform/src/coverage/mod.rs index f253d1662cac8..955fb6cbad78a 100644 --- a/compiler/rustc_mir_transform/src/coverage/mod.rs +++ b/compiler/rustc_mir_transform/src/coverage/mod.rs @@ -1,28 +1,24 @@ -mod counters; -mod graph; -mod mappings; -pub(super) mod query; -mod spans; -#[cfg(test)] -mod tests; -mod unexpand; - -use rustc_hir as hir; -use rustc_hir::intravisit::{Visitor, walk_expr}; -use rustc_middle::hir::nested_filter; use rustc_middle::mir::coverage::{ CoverageKind, DecisionInfo, FunctionCoverageInfo, Mapping, MappingKind, }; use rustc_middle::mir::{self, BasicBlock, Statement, StatementKind, TerminatorKind}; use rustc_middle::ty::TyCtxt; -use rustc_span::Span; -use rustc_span::def_id::LocalDefId; use tracing::{debug, debug_span, trace}; use crate::coverage::counters::BcbCountersData; use crate::coverage::graph::CoverageGraph; use crate::coverage::mappings::ExtractedMappings; +mod counters; +mod graph; +mod hir_info; +mod mappings; +pub(super) mod query; +mod spans; +#[cfg(test)] +mod tests; +mod unexpand; + /// Inserts `StatementKind::Coverage` statements that either instrument the binary with injected /// counters, via intrinsic `llvm.instrprof.increment`, and/or inject metadata used during codegen /// to construct the coverage map. @@ -69,7 +65,7 @@ fn instrument_function_for_coverage<'tcx>(tcx: TyCtxt<'tcx>, mir_body: &mut mir: let def_id = mir_body.source.def_id(); let _span = debug_span!("instrument_function_for_coverage", ?def_id).entered(); - let hir_info = extract_hir_info(tcx, def_id.expect_local()); + let hir_info = hir_info::extract_hir_info(tcx, def_id.expect_local()); // Build the coverage graph, which is a simplified view of the MIR control-flow // graph that ignores some details not relevant to coverage instrumentation. @@ -262,122 +258,3 @@ fn inject_statement(mir_body: &mut mir::Body<'_>, counter_kind: CoverageKind, bb let statement = Statement::new(source_info, StatementKind::Coverage(counter_kind)); data.statements.insert(0, statement); } - -/// Function information extracted from HIR by the coverage instrumentor. -#[derive(Debug)] -struct ExtractedHirInfo { - function_source_hash: u64, - is_async_fn: bool, - /// The span of the function's signature, if available. - /// Must have the same context and filename as the body span. - fn_sig_span: Option, - body_span: Span, - /// "Holes" are regions within the function body (or its expansions) that - /// should not be included in coverage spans for this function - /// (e.g. closures and nested items). - hole_spans: Vec, -} - -fn extract_hir_info<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId) -> ExtractedHirInfo { - // FIXME(#79625): Consider improving MIR to provide the information needed, to avoid going back - // to HIR for it. - - // HACK: For synthetic MIR bodies (async closures), use the def id of the HIR body. - if tcx.is_synthetic_mir(def_id) { - return extract_hir_info(tcx, tcx.local_parent(def_id)); - } - - let hir_node = tcx.hir_node_by_def_id(def_id); - let fn_body_id = hir_node.body_id().expect("HIR node is a function with body"); - let hir_body = tcx.hir_body(fn_body_id); - - let maybe_fn_sig = hir_node.fn_sig(); - let is_async_fn = maybe_fn_sig.is_some_and(|fn_sig| fn_sig.header.is_async()); - - let mut body_span = hir_body.value.span; - - use hir::{Closure, Expr, ExprKind, Node}; - // Unexpand a closure's body span back to the context of its declaration. - // This helps with closure bodies that consist of just a single bang-macro, - // and also with closure bodies produced by async desugaring. - if let Node::Expr(&Expr { kind: ExprKind::Closure(&Closure { fn_decl_span, .. }), .. }) = - hir_node - { - body_span = body_span.find_ancestor_in_same_ctxt(fn_decl_span).unwrap_or(body_span); - } - - // The actual signature span is only used if it has the same context and - // filename as the body, and precedes the body. - let fn_sig_span = maybe_fn_sig.map(|fn_sig| fn_sig.span).filter(|&fn_sig_span| { - let source_map = tcx.sess.source_map(); - let file_idx = |span: Span| source_map.lookup_source_file_idx(span.lo()); - - fn_sig_span.eq_ctxt(body_span) - && fn_sig_span.hi() <= body_span.lo() - && file_idx(fn_sig_span) == file_idx(body_span) - }); - - let function_source_hash = hash_mir_source(tcx, hir_body); - - let hole_spans = extract_hole_spans_from_hir(tcx, hir_body); - - ExtractedHirInfo { function_source_hash, is_async_fn, fn_sig_span, body_span, hole_spans } -} - -fn hash_mir_source<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &'tcx hir::Body<'tcx>) -> u64 { - // FIXME(cjgillot) Stop hashing HIR manually here. - let owner = hir_body.id().hir_id.owner; - tcx.hir_owner_nodes(owner).opt_hash_including_bodies.unwrap().to_smaller_hash().as_u64() -} - -fn extract_hole_spans_from_hir<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &hir::Body<'tcx>) -> Vec { - struct HolesVisitor<'tcx> { - tcx: TyCtxt<'tcx>, - hole_spans: Vec, - } - - impl<'tcx> Visitor<'tcx> for HolesVisitor<'tcx> { - /// We have special handling for nested items, but we still want to - /// traverse into nested bodies of things that are not considered items, - /// such as "anon consts" (e.g. array lengths). - type NestedFilter = nested_filter::OnlyBodies; - - fn maybe_tcx(&mut self) -> TyCtxt<'tcx> { - self.tcx - } - - /// We override `visit_nested_item` instead of `visit_item` because we - /// only need the item's span, not the item itself. - fn visit_nested_item(&mut self, id: hir::ItemId) -> Self::Result { - let span = self.tcx.def_span(id.owner_id.def_id); - self.visit_hole_span(span); - // Having visited this item, we don't care about its children, - // so don't call `walk_item`. - } - - // We override `visit_expr` instead of the more specific expression - // visitors, so that we have direct access to the expression span. - fn visit_expr(&mut self, expr: &'tcx hir::Expr<'tcx>) { - match expr.kind { - hir::ExprKind::Closure(_) | hir::ExprKind::ConstBlock(_) => { - self.visit_hole_span(expr.span); - // Having visited this expression, we don't care about its - // children, so don't call `walk_expr`. - } - - // For other expressions, recursively visit as normal. - _ => walk_expr(self, expr), - } - } - } - impl HolesVisitor<'_> { - fn visit_hole_span(&mut self, hole_span: Span) { - self.hole_spans.push(hole_span); - } - } - - let mut visitor = HolesVisitor { tcx, hole_spans: vec![] }; - - visitor.visit_body(hir_body); - visitor.hole_spans -} diff --git a/compiler/rustc_mir_transform/src/coverage/spans.rs b/compiler/rustc_mir_transform/src/coverage/spans.rs index ddeae093df5b7..8f35409814fc5 100644 --- a/compiler/rustc_mir_transform/src/coverage/spans.rs +++ b/compiler/rustc_mir_transform/src/coverage/spans.rs @@ -6,8 +6,9 @@ use rustc_span::{BytePos, DesugaringKind, ExpnKind, MacroKind, Span}; use tracing::instrument; use crate::coverage::graph::{BasicCoverageBlock, CoverageGraph}; +use crate::coverage::hir_info::ExtractedHirInfo; use crate::coverage::spans::from_mir::{Hole, RawSpanFromMir, SpanFromMir}; -use crate::coverage::{ExtractedHirInfo, mappings, unexpand}; +use crate::coverage::{mappings, unexpand}; mod from_mir; From b8739895f2896495e85101d82bdf881ff9a922d9 Mon Sep 17 00:00:00 2001 From: Zalathar Date: Sat, 2 Aug 2025 22:39:54 +1000 Subject: [PATCH 2/2] coverage: Remove obsolete comment about hashing HIR This code does not hash HIR manually (and has not done so for some time); it merely obtains a hash returned as part of `hir_owner_nodes`. --- compiler/rustc_mir_transform/src/coverage/hir_info.rs | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/compiler/rustc_mir_transform/src/coverage/hir_info.rs b/compiler/rustc_mir_transform/src/coverage/hir_info.rs index 7d720159295bc..28fdc52b06cb9 100644 --- a/compiler/rustc_mir_transform/src/coverage/hir_info.rs +++ b/compiler/rustc_mir_transform/src/coverage/hir_info.rs @@ -67,9 +67,12 @@ pub(crate) fn extract_hir_info<'tcx>(tcx: TyCtxt<'tcx>, def_id: LocalDefId) -> E } fn hash_mir_source<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &'tcx hir::Body<'tcx>) -> u64 { - // FIXME(cjgillot) Stop hashing HIR manually here. let owner = hir_body.id().hir_id.owner; - tcx.hir_owner_nodes(owner).opt_hash_including_bodies.unwrap().to_smaller_hash().as_u64() + tcx.hir_owner_nodes(owner) + .opt_hash_including_bodies + .expect("hash should be present when coverage instrumentation is enabled") + .to_smaller_hash() + .as_u64() } fn extract_hole_spans_from_hir<'tcx>(tcx: TyCtxt<'tcx>, hir_body: &hir::Body<'tcx>) -> Vec {