Skip to content

Commit d800196

Browse files
committed
Add -Zannotate-moves for profiler visibility of move/copy operations
This implements a new unstable compiler flag `-Zannotate-moves` that makes move and copy operations visible in profilers by creating synthetic debug information. This is achieved with zero runtime cost by manipulating debug info scopes to make moves/copies appear as calls to `compiler_move<T, SIZE>` and `compiler_copy<T, SIZE>` marker functions in profiling tools. This allows developers to identify expensive move/copy operations in their code using standard profiling tools, without requiring specialized tooling or runtime instrumentation. The implementation works at codegen time. When processing MIR operands (`Operand::Move` and `Operand::Copy`), the codegen creates an `OperandRef` with an optional `move_annotation` field containing an `Instance` of the appropriate profiling marker function. When storing the operand, `store_with_annotation()` wraps the store operation in a synthetic debug scope that makes it appear inlined from the marker. Two marker functions (`compiler_move` and `compiler_copy`) are defined in `library/core/src/profiling.rs`. These are never actually called - they exist solely as debug info anchors. Operations are only annotated if the type: - Meets the size threshold (default: 65 bytes, configurable via `-Zannotate-moves=SIZE`) - Has a non-scalar backend representation (scalars use registers, not memcpy) This has a very small size impact on object file size. With the default limit it's well under 0.1%, and even with a very small limit of 8 bytes it's still ~1.5%. This could be enabled by default.
1 parent 27050c0 commit d800196

File tree

28 files changed

+889
-48
lines changed

28 files changed

+889
-48
lines changed

compiler/rustc_codegen_gcc/src/builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1069,7 +1069,7 @@ impl<'a, 'gcc, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'gcc, 'tcx> {
10691069
OperandValue::Ref(place.val)
10701070
};
10711071

1072-
OperandRef { val, layout: place.layout }
1072+
OperandRef { val, layout: place.layout, move_annotation: None }
10731073
}
10741074

10751075
fn write_operand_repeatedly(

compiler/rustc_codegen_gcc/src/debuginfo.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -19,7 +19,7 @@ use crate::context::CodegenCx;
1919
pub(super) const UNKNOWN_LINE_NUMBER: u32 = 0;
2020
pub(super) const UNKNOWN_COLUMN_NUMBER: u32 = 0;
2121

22-
impl<'a, 'gcc, 'tcx> DebugInfoBuilderMethods for Builder<'a, 'gcc, 'tcx> {
22+
impl<'a, 'gcc, 'tcx> DebugInfoBuilderMethods<'tcx> for Builder<'a, 'gcc, 'tcx> {
2323
// FIXME(eddyb) find a common convention for all of the debuginfo-related
2424
// names (choose between `dbg`, `debug`, `debuginfo`, `debug_info` etc.).
2525
fn dbg_var_addr(

compiler/rustc_codegen_llvm/src/abi.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -247,7 +247,7 @@ impl<'ll, 'tcx> ArgAbiExt<'ll, 'tcx> for ArgAbi<'tcx, Ty<'tcx>> {
247247
);
248248
bx.lifetime_end(llscratch, scratch_size);
249249
}
250-
_ => {
250+
PassMode::Pair(..) | PassMode::Direct { .. } => {
251251
OperandRef::from_immediate_or_packed_pair(bx, val, self.layout).val.store(bx, dst);
252252
}
253253
}

compiler/rustc_codegen_llvm/src/builder.rs

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -752,7 +752,7 @@ impl<'a, 'll, 'tcx> BuilderMethods<'a, 'tcx> for Builder<'a, 'll, 'tcx> {
752752
OperandValue::Ref(place.val)
753753
};
754754

755-
OperandRef { val, layout: place.layout }
755+
OperandRef { val, layout: place.layout, move_annotation: None }
756756
}
757757

758758
fn write_operand_repeatedly(

compiler/rustc_codegen_llvm/src/debuginfo/mod.rs

Lines changed: 52 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -146,7 +146,7 @@ impl<'ll> Builder<'_, 'll, '_> {
146146
}
147147
}
148148

149-
impl<'ll> DebugInfoBuilderMethods for Builder<'_, 'll, '_> {
149+
impl<'ll, 'tcx> DebugInfoBuilderMethods<'tcx> for Builder<'_, 'll, 'tcx> {
150150
// FIXME(eddyb) find a common convention for all of the debuginfo-related
151151
// names (choose between `dbg`, `debug`, `debuginfo`, `debug_info` etc.).
152152
fn dbg_var_addr(
@@ -284,6 +284,57 @@ impl<'ll> DebugInfoBuilderMethods for Builder<'_, 'll, '_> {
284284
llvm::set_value_name(value, name.as_bytes());
285285
}
286286
}
287+
288+
/// Annotate move/copy operations with debug info for profiling.
289+
///
290+
/// This creates a temporary debug scope that makes the move/copy appear as an inlined call to
291+
/// `compiler_move<T, SIZE>()` or `compiler_copy<T, SIZE>()`. The provided closure is executed
292+
/// with this temporary debug location active.
293+
///
294+
/// The `instance` parameter should be the monomorphized instance of the `compiler_move` or
295+
/// `compiler_copy` function with the actual type and size.
296+
fn with_move_annotation<R>(
297+
&mut self,
298+
instance: ty::Instance<'tcx>,
299+
f: impl FnOnce(&mut Self) -> R,
300+
) -> R {
301+
// Save the current debug location
302+
let saved_loc = self.get_dbg_loc();
303+
304+
// Create a DIScope for the compiler_move/compiler_copy function
305+
// We use the function's FnAbi for debug info generation
306+
let fn_abi = self
307+
.cx()
308+
.tcx
309+
.fn_abi_of_instance(
310+
self.cx().typing_env().as_query_input((instance, ty::List::empty())),
311+
)
312+
.unwrap();
313+
314+
let di_scope = self.cx().dbg_scope_fn(instance, fn_abi, None);
315+
316+
// Create an inlined debug location:
317+
// - scope: the compiler_move/compiler_copy function
318+
// - inlined_at: the current location (where the move/copy actually occurs)
319+
// - span: use the function's definition span
320+
let fn_span = self.cx().tcx.def_span(instance.def_id());
321+
let inlined_loc = self.cx().dbg_loc(di_scope, saved_loc, fn_span);
322+
323+
// Set the temporary debug location
324+
self.set_dbg_loc(inlined_loc);
325+
326+
// Execute the closure (which will generate the memcpy)
327+
let result = f(self);
328+
329+
// Restore the original debug location
330+
if let Some(loc) = saved_loc {
331+
self.set_dbg_loc(loc);
332+
} else {
333+
self.clear_dbg_loc();
334+
}
335+
336+
result
337+
}
287338
}
288339

289340
/// A source code location used to generate debug information.

compiler/rustc_codegen_ssa/src/mir/block.rs

Lines changed: 8 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -557,9 +557,11 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
557557
let op = match self.locals[mir::RETURN_PLACE] {
558558
LocalRef::Operand(op) => op,
559559
LocalRef::PendingOperand => bug!("use of return before def"),
560-
LocalRef::Place(cg_place) => {
561-
OperandRef { val: Ref(cg_place.val), layout: cg_place.layout }
562-
}
560+
LocalRef::Place(cg_place) => OperandRef {
561+
val: Ref(cg_place.val),
562+
layout: cg_place.layout,
563+
move_annotation: None,
564+
},
563565
LocalRef::UnsizedPlace(_) => bug!("return type must be sized"),
564566
};
565567
let llslot = match op.val {
@@ -1145,7 +1147,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
11451147
| (&mir::Operand::Constant(_), Ref(PlaceValue { llextra: None, .. })) => {
11461148
let tmp = PlaceRef::alloca(bx, op.layout);
11471149
bx.lifetime_start(tmp.val.llval, tmp.layout.size);
1148-
op.val.store(bx, tmp);
1150+
op.store_with_annotation(bx, tmp);
11491151
op.val = Ref(tmp.val);
11501152
lifetime_ends_after_call.push((tmp.val.llval, tmp.layout.size));
11511153
}
@@ -1553,13 +1555,13 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
15531555
};
15541556
let scratch = PlaceValue::alloca(bx, arg.layout.size, required_align);
15551557
bx.lifetime_start(scratch.llval, arg.layout.size);
1556-
op.val.store(bx, scratch.with_type(arg.layout));
1558+
op.store_with_annotation(bx, scratch.with_type(arg.layout));
15571559
lifetime_ends_after_call.push((scratch.llval, arg.layout.size));
15581560
(scratch.llval, scratch.align, true)
15591561
}
15601562
PassMode::Cast { .. } => {
15611563
let scratch = PlaceRef::alloca(bx, arg.layout);
1562-
op.val.store(bx, scratch);
1564+
op.store_with_annotation(bx, scratch);
15631565
(scratch.val.llval, scratch.val.align, true)
15641566
}
15651567
_ => (op.immediate_or_packed_pair(bx), arg.layout.align.abi, false),

compiler/rustc_codegen_ssa/src/mir/mod.rs

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -480,6 +480,7 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
480480
return local(OperandRef {
481481
val: OperandValue::Pair(a, b),
482482
layout: arg.layout,
483+
move_annotation: None,
483484
});
484485
}
485486
_ => {}
@@ -552,6 +553,7 @@ fn arg_local_refs<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>>(
552553
fx.caller_location = Some(OperandRef {
553554
val: OperandValue::Immediate(bx.get_param(llarg_idx)),
554555
layout: arg.layout,
556+
move_annotation: None,
555557
});
556558
}
557559

compiler/rustc_codegen_ssa/src/mir/operand.rs

Lines changed: 111 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,13 @@ use rustc_abi as abi;
55
use rustc_abi::{
66
Align, BackendRepr, FIRST_VARIANT, FieldIdx, Primitive, Size, TagEncoding, VariantIdx, Variants,
77
};
8+
use rustc_hir::LangItem;
89
use rustc_middle::mir::interpret::{Pointer, Scalar, alloc_range};
910
use rustc_middle::mir::{self, ConstValue};
10-
use rustc_middle::ty::Ty;
1111
use rustc_middle::ty::layout::{LayoutOf, TyAndLayout};
12+
use rustc_middle::ty::{self, Ty};
1213
use rustc_middle::{bug, span_bug};
13-
use rustc_session::config::OptLevel;
14+
use rustc_session::config::{AnnotateMoves, DebugInfo, OptLevel};
1415
use tracing::{debug, instrument};
1516

1617
use super::place::{PlaceRef, PlaceValue};
@@ -131,6 +132,10 @@ pub struct OperandRef<'tcx, V> {
131132

132133
/// The layout of value, based on its Rust type.
133134
pub layout: TyAndLayout<'tcx>,
135+
136+
/// Annotation for profiler visibility of move/copy operations.
137+
/// When set, the store operation should appear as an inlined call to this function.
138+
pub move_annotation: Option<ty::Instance<'tcx>>,
134139
}
135140

136141
impl<V: CodegenObject> fmt::Debug for OperandRef<'_, V> {
@@ -142,7 +147,7 @@ impl<V: CodegenObject> fmt::Debug for OperandRef<'_, V> {
142147
impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
143148
pub fn zero_sized(layout: TyAndLayout<'tcx>) -> OperandRef<'tcx, V> {
144149
assert!(layout.is_zst());
145-
OperandRef { val: OperandValue::ZeroSized, layout }
150+
OperandRef { val: OperandValue::ZeroSized, layout, move_annotation: None }
146151
}
147152

148153
pub(crate) fn from_const<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
@@ -180,7 +185,7 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
180185
}
181186
};
182187

183-
OperandRef { val, layout }
188+
OperandRef { val, layout, move_annotation: None }
184189
}
185190

186191
fn from_const_alloc<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
@@ -214,7 +219,7 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
214219
let size = s.size(bx);
215220
assert_eq!(size, layout.size, "abi::Scalar size does not match layout size");
216221
let val = read_scalar(offset, size, s, bx.immediate_backend_type(layout));
217-
OperandRef { val: OperandValue::Immediate(val), layout }
222+
OperandRef { val: OperandValue::Immediate(val), layout, move_annotation: None }
218223
}
219224
BackendRepr::ScalarPair(
220225
a @ abi::Scalar::Initialized { .. },
@@ -235,7 +240,7 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
235240
b,
236241
bx.scalar_pair_element_backend_type(layout, 1, true),
237242
);
238-
OperandRef { val: OperandValue::Pair(a_val, b_val), layout }
243+
OperandRef { val: OperandValue::Pair(a_val, b_val), layout, move_annotation: None }
239244
}
240245
_ if layout.is_zst() => OperandRef::zero_sized(layout),
241246
_ => {
@@ -285,6 +290,22 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
285290
self.val.deref(layout.align.abi).with_type(layout)
286291
}
287292

293+
/// Store this operand into a place, applying move/copy annotation if present.
294+
///
295+
/// This is the preferred method for storing operands, as it automatically
296+
/// applies profiler annotations for tracked move/copy operations.
297+
pub fn store_with_annotation<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
298+
self,
299+
bx: &mut Bx,
300+
dest: PlaceRef<'tcx, V>,
301+
) {
302+
if let Some(instance) = self.move_annotation {
303+
bx.with_move_annotation(instance, |bx| self.val.store(bx, dest))
304+
} else {
305+
self.val.store(bx, dest)
306+
}
307+
}
308+
288309
/// If this operand is a `Pair`, we return an aggregate with the two values.
289310
/// For other cases, see `immediate`.
290311
pub fn immediate_or_packed_pair<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
@@ -320,7 +341,7 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
320341
} else {
321342
OperandValue::Immediate(llval)
322343
};
323-
OperandRef { val, layout }
344+
OperandRef { val, layout, move_annotation: None }
324345
}
325346

326347
pub(crate) fn extract_field<Bx: BuilderMethods<'a, 'tcx, Value = V>>(
@@ -388,7 +409,7 @@ impl<'a, 'tcx, V: CodegenObject> OperandRef<'tcx, V> {
388409
})
389410
};
390411

391-
OperandRef { val, layout: field }
412+
OperandRef { val, layout: field, move_annotation: None }
392413
}
393414

394415
/// Obtain the actual discriminant of a value.
@@ -828,10 +849,15 @@ impl<'a, 'tcx, V: CodegenObject> OperandRefBuilder<'tcx, V> {
828849
}
829850
},
830851
};
831-
OperandRef { val, layout }
852+
OperandRef { val, layout, move_annotation: None }
832853
}
833854
}
834855

856+
/// Default size limit for move/copy annotations (in bytes). 64 bytes is a common size of a cache
857+
/// line, and the assumption is that anything this size or below is very cheap to move/copy, so only
858+
/// annotate copies larger than this.
859+
const MOVE_ANNOTATION_DEFAULT_LIMIT: u64 = 65;
860+
835861
impl<'a, 'tcx, V: CodegenObject> OperandValue<V> {
836862
/// Returns an `OperandValue` that's generally UB to use in any way.
837863
///
@@ -961,7 +987,7 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
961987
abi::Variants::Single { index: vidx },
962988
);
963989
let layout = o.layout.for_variant(bx.cx(), vidx);
964-
o = OperandRef { val: o.val, layout }
990+
o = OperandRef { layout, ..o }
965991
}
966992
_ => return None,
967993
}
@@ -1014,7 +1040,16 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
10141040

10151041
match *operand {
10161042
mir::Operand::Copy(ref place) | mir::Operand::Move(ref place) => {
1017-
self.codegen_consume(bx, place.as_ref())
1043+
let kind = match operand {
1044+
mir::Operand::Move(_) => LangItem::CompilerMove,
1045+
mir::Operand::Copy(_) => LangItem::CompilerCopy,
1046+
_ => unreachable!(),
1047+
};
1048+
1049+
// Check if we should annotate this move/copy for profiling
1050+
let move_annotation = self.move_copy_annotation_instance(bx, place.as_ref(), kind);
1051+
1052+
OperandRef { move_annotation, ..self.codegen_consume(bx, place.as_ref()) }
10181053
}
10191054

10201055
mir::Operand::Constant(ref constant) => {
@@ -1030,11 +1065,76 @@ impl<'a, 'tcx, Bx: BuilderMethods<'a, 'tcx>> FunctionCx<'a, 'tcx, Bx> {
10301065
return OperandRef {
10311066
val: OperandValue::Immediate(llval),
10321067
layout: bx.layout_of(ty),
1068+
move_annotation: None,
10331069
};
10341070
}
10351071
}
10361072
self.eval_mir_constant_to_operand(bx, constant)
10371073
}
10381074
}
10391075
}
1076+
1077+
/// Creates an `Instance` for annotating a move/copy operation at codegen time.
1078+
///
1079+
/// Returns `Some(instance)` if the operation should be annotated with debug info, `None`
1080+
/// otherwise. The instance represents a monomorphized `compiler_move<T, SIZE>` or
1081+
/// `compiler_copy<T, SIZE>` function that can be used to create debug scopes.
1082+
///
1083+
/// There are a number of conditions that must be met for an annotation to be created, but aside
1084+
/// from the basics (annotation is enabled, we're generating debuginfo), the primary concern is
1085+
/// moves/copies which could result in a real `memcpy`. So we check for the size limit, but also
1086+
/// that the underlying representation of the type is in memory.
1087+
fn move_copy_annotation_instance(
1088+
&self,
1089+
bx: &Bx,
1090+
place: mir::PlaceRef<'tcx>,
1091+
kind: LangItem,
1092+
) -> Option<ty::Instance<'tcx>> {
1093+
let tcx = bx.tcx();
1094+
let sess = tcx.sess;
1095+
1096+
// Skip if we're not generating debuginfo
1097+
if sess.opts.debuginfo == DebugInfo::None {
1098+
return None;
1099+
}
1100+
1101+
// Check if annotation is enabled and get size limit (otherwise skip)
1102+
let size_limit = match sess.opts.unstable_opts.annotate_moves {
1103+
AnnotateMoves::Disabled => return None,
1104+
AnnotateMoves::Enabled(None) => MOVE_ANNOTATION_DEFAULT_LIMIT,
1105+
AnnotateMoves::Enabled(Some(limit)) => limit,
1106+
};
1107+
1108+
let ty = self.monomorphized_place_ty(place);
1109+
let layout = bx.cx().layout_of(ty);
1110+
let ty_size = layout.size.bytes();
1111+
1112+
// Only annotate if type has a memory representation and exceeds size limit (and has a
1113+
// non-zero size)
1114+
if layout.is_zst()
1115+
|| ty_size < size_limit
1116+
|| !matches!(layout.backend_repr, BackendRepr::Memory { .. })
1117+
{
1118+
return None;
1119+
}
1120+
1121+
// Look up the DefId for compiler_move or compiler_copy lang item
1122+
let def_id = tcx.lang_items().get(kind)?;
1123+
1124+
// Create generic args: compiler_move<T, SIZE> or compiler_copy<T, SIZE>
1125+
let size_const = ty::Const::from_target_usize(tcx, ty_size);
1126+
let generic_args = tcx.mk_args(&[ty.into(), size_const.into()]);
1127+
1128+
// Create the Instance
1129+
let typing_env = self.mir.typing_env(tcx);
1130+
let instance = ty::Instance::expect_resolve(
1131+
tcx,
1132+
typing_env,
1133+
def_id,
1134+
generic_args,
1135+
rustc_span::DUMMY_SP, // span only used for error messages
1136+
);
1137+
1138+
Some(instance)
1139+
}
10401140
}

0 commit comments

Comments
 (0)