diff --git a/compiler/rustc_builtin_macros/src/autodiff.rs b/compiler/rustc_builtin_macros/src/autodiff.rs index 7b5c4a159b0c5..63f97f3ae29af 100644 --- a/compiler/rustc_builtin_macros/src/autodiff.rs +++ b/compiler/rustc_builtin_macros/src/autodiff.rs @@ -25,6 +25,16 @@ mod llvm_enzyme { use crate::errors; + pub(crate) fn outer_normal_attr( + kind: &P, + id: rustc_ast::AttrId, + span: Span, + ) -> rustc_ast::Attribute { + let style = rustc_ast::AttrStyle::Outer; + let kind = rustc_ast::AttrKind::Normal(kind.clone()); + rustc_ast::Attribute { kind, id, style, span } + } + // If we have a default `()` return type or explicitley `()` return type, // then we often can skip doing some work. fn has_ret(ty: &FnRetTy) -> bool { @@ -223,20 +233,8 @@ mod llvm_enzyme { .filter(|a| **a == DiffActivity::Active || **a == DiffActivity::ActiveOnly) .count() as u32; let (d_sig, new_args, idents, errored) = gen_enzyme_decl(ecx, &sig, &x, span); - let new_decl_span = d_sig.span; let d_body = gen_enzyme_body( - ecx, - &x, - n_active, - &sig, - &d_sig, - primal, - &new_args, - span, - sig_span, - new_decl_span, - idents, - errored, + ecx, &x, n_active, &sig, &d_sig, primal, &new_args, span, sig_span, idents, errored, ); let d_ident = first_ident(&meta_item_vec[0]); @@ -268,36 +266,39 @@ mod llvm_enzyme { }; let inline_never_attr = P(ast::NormalAttr { item: inline_item, tokens: None }); let new_id = ecx.sess.psess.attr_id_generator.mk_attr_id(); - let attr: ast::Attribute = ast::Attribute { - kind: ast::AttrKind::Normal(rustc_ad_attr.clone()), - id: new_id, - style: ast::AttrStyle::Outer, - span, - }; + let attr = outer_normal_attr(&rustc_ad_attr, new_id, span); let new_id = ecx.sess.psess.attr_id_generator.mk_attr_id(); - let inline_never: ast::Attribute = ast::Attribute { - kind: ast::AttrKind::Normal(inline_never_attr), - id: new_id, - style: ast::AttrStyle::Outer, - span, - }; + let inline_never = outer_normal_attr(&inline_never_attr, new_id, span); + + // We're avoid duplicating the attributes `#[rustc_autodiff]` and `#[inline(never)]`. + fn same_attribute(attr: &ast::AttrKind, item: &ast::AttrKind) -> bool { + match (attr, item) { + (ast::AttrKind::Normal(a), ast::AttrKind::Normal(b)) => { + let a = &a.item.path; + let b = &b.item.path; + a.segments.len() == b.segments.len() + && a.segments.iter().zip(b.segments.iter()).all(|(a, b)| a.ident == b.ident) + } + _ => false, + } + } // Don't add it multiple times: let orig_annotatable: Annotatable = match item { Annotatable::Item(ref mut iitem) => { - if !iitem.attrs.iter().any(|a| a.id == attr.id) { + if !iitem.attrs.iter().any(|a| same_attribute(&a.kind, &attr.kind)) { iitem.attrs.push(attr); } - if !iitem.attrs.iter().any(|a| a.id == inline_never.id) { + if !iitem.attrs.iter().any(|a| same_attribute(&a.kind, &inline_never.kind)) { iitem.attrs.push(inline_never.clone()); } Annotatable::Item(iitem.clone()) } Annotatable::AssocItem(ref mut assoc_item, i @ Impl) => { - if !assoc_item.attrs.iter().any(|a| a.id == attr.id) { + if !assoc_item.attrs.iter().any(|a| same_attribute(&a.kind, &attr.kind)) { assoc_item.attrs.push(attr); } - if !assoc_item.attrs.iter().any(|a| a.id == inline_never.id) { + if !assoc_item.attrs.iter().any(|a| same_attribute(&a.kind, &inline_never.kind)) { assoc_item.attrs.push(inline_never.clone()); } Annotatable::AssocItem(assoc_item.clone(), i) @@ -312,13 +313,7 @@ mod llvm_enzyme { delim: rustc_ast::token::Delimiter::Parenthesis, tokens: ts, }); - let d_attr: ast::Attribute = ast::Attribute { - kind: ast::AttrKind::Normal(rustc_ad_attr.clone()), - id: new_id, - style: ast::AttrStyle::Outer, - span, - }; - + let d_attr = outer_normal_attr(&rustc_ad_attr, new_id, span); let d_annotatable = if is_impl { let assoc_item: AssocItemKind = ast::AssocItemKind::Fn(asdf); let d_fn = P(ast::AssocItem { @@ -359,30 +354,27 @@ mod llvm_enzyme { ty } - /// We only want this function to type-check, since we will replace the body - /// later on llvm level. Using `loop {}` does not cover all return types anymore, - /// so instead we build something that should pass. We also add a inline_asm - /// line, as one more barrier for rustc to prevent inlining of this function. - /// FIXME(ZuseZ4): We still have cases of incorrect inlining across modules, see - /// , so this isn't sufficient. - /// It also triggers an Enzyme crash if we due to a bug ever try to differentiate - /// this function (which should never happen, since it is only a placeholder). - /// Finally, we also add back_box usages of all input arguments, to prevent rustc - /// from optimizing any arguments away. - fn gen_enzyme_body( + // Will generate a body of the type: + // ``` + // { + // unsafe { + // asm!("NOP"); + // } + // ::core::hint::black_box(primal(args)); + // ::core::hint::black_box((args, ret)); + // + // } + // ``` + fn init_body_helper( ecx: &ExtCtxt<'_>, - x: &AutoDiffAttrs, - n_active: u32, - sig: &ast::FnSig, - d_sig: &ast::FnSig, + span: Span, primal: Ident, new_names: &[String], - span: Span, sig_span: Span, new_decl_span: Span, - idents: Vec, + idents: &[Ident], errored: bool, - ) -> P { + ) -> (P, P, P, P) { let blackbox_path = ecx.std_path(&[sym::hint, sym::black_box]); let noop = ast::InlineAsm { asm_macro: ast::AsmMacro::Asm, @@ -431,6 +423,51 @@ mod llvm_enzyme { } body.stmts.push(ecx.stmt_semi(black_box_remaining_args)); + (body, primal_call, black_box_primal_call, blackbox_call_expr) + } + + /// We only want this function to type-check, since we will replace the body + /// later on llvm level. Using `loop {}` does not cover all return types anymore, + /// so instead we manually build something that should pass the type checker. + /// We also add a inline_asm line, as one more barrier for rustc to prevent inlining + /// or const propagation. inline_asm will also triggers an Enzyme crash if due to another + /// bug would ever try to accidentially differentiate this placeholder function body. + /// Finally, we also add back_box usages of all input arguments, to prevent rustc + /// from optimizing any arguments away. + fn gen_enzyme_body( + ecx: &ExtCtxt<'_>, + x: &AutoDiffAttrs, + n_active: u32, + sig: &ast::FnSig, + d_sig: &ast::FnSig, + primal: Ident, + new_names: &[String], + span: Span, + sig_span: Span, + idents: Vec, + errored: bool, + ) -> P { + let new_decl_span = d_sig.span; + + // Just adding some default inline-asm and black_box usages to prevent early inlining + // and optimizations which alter the function signature. + // + // The bb_primal_call is the black_box call of the primal function. We keep it around, + // since it has the convenient property of returning the type of the primal function, + // Remember, we only care to match types here. + // No matter which return we pick, we always wrap it into a std::hint::black_box call, + // to prevent rustc from propagating it into the caller. + let (mut body, primal_call, bb_primal_call, bb_call_expr) = init_body_helper( + ecx, + span, + primal, + new_names, + sig_span, + new_decl_span, + &idents, + errored, + ); + if !has_ret(&d_sig.decl.output) { // there is no return type that we have to match, () works fine. return body; @@ -442,7 +479,7 @@ mod llvm_enzyme { if primal_ret && n_active == 0 && x.mode.is_rev() { // We only have the primal ret. - body.stmts.push(ecx.stmt_expr(black_box_primal_call)); + body.stmts.push(ecx.stmt_expr(bb_primal_call)); return body; } @@ -534,11 +571,11 @@ mod llvm_enzyme { return body; } [arg] => { - ret = ecx.expr_call(new_decl_span, blackbox_call_expr, thin_vec![arg.clone()]); + ret = ecx.expr_call(new_decl_span, bb_call_expr, thin_vec![arg.clone()]); } args => { let ret_tuple: P = ecx.expr_tuple(span, args.into()); - ret = ecx.expr_call(new_decl_span, blackbox_call_expr, thin_vec![ret_tuple]); + ret = ecx.expr_call(new_decl_span, bb_call_expr, thin_vec![ret_tuple]); } } assert!(has_ret(&d_sig.decl.output)); @@ -551,7 +588,7 @@ mod llvm_enzyme { ecx: &ExtCtxt<'_>, span: Span, primal: Ident, - idents: Vec, + idents: &[Ident], ) -> P { let has_self = idents.len() > 0 && idents[0].name == kw::SelfLower; if has_self { diff --git a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs index 71705ecb4d0f5..7cd4ee539d875 100644 --- a/compiler/rustc_codegen_llvm/src/builder/autodiff.rs +++ b/compiler/rustc_codegen_llvm/src/builder/autodiff.rs @@ -28,6 +28,113 @@ fn get_params(fnc: &Value) -> Vec<&Value> { } } +fn match_args_from_caller_to_enzyme<'ll>( + cx: &SimpleCx<'ll>, + args: &mut Vec<&'ll llvm::Value>, + inputs: &[DiffActivity], + outer_args: &[&'ll llvm::Value], +) { + debug!("matching autodiff arguments"); + // We now handle the issue that Rust level arguments not always match the llvm-ir level + // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on + // llvm-ir level. The number of activities matches the number of Rust level arguments, so we + // need to match those. + // FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it + // using iterators and peek()? + let mut outer_pos: usize = 0; + let mut activity_pos = 0; + + let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap(); + let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap(); + let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap(); + let enzyme_dupnoneed = cx.create_metadata("enzyme_dupnoneed".to_string()).unwrap(); + + while activity_pos < inputs.len() { + let diff_activity = inputs[activity_pos as usize]; + // Duplicated arguments received a shadow argument, into which enzyme will write the + // gradient. + let (activity, duplicated): (&Metadata, bool) = match diff_activity { + DiffActivity::None => panic!("not a valid input activity"), + DiffActivity::Const => (enzyme_const, false), + DiffActivity::Active => (enzyme_out, false), + DiffActivity::ActiveOnly => (enzyme_out, false), + DiffActivity::Dual => (enzyme_dup, true), + DiffActivity::DualOnly => (enzyme_dupnoneed, true), + DiffActivity::Duplicated => (enzyme_dup, true), + DiffActivity::DuplicatedOnly => (enzyme_dupnoneed, true), + DiffActivity::FakeActivitySize => (enzyme_const, false), + }; + let outer_arg = outer_args[outer_pos]; + args.push(cx.get_metadata_value(activity)); + args.push(outer_arg); + if duplicated { + // We know that duplicated args by construction have a following argument, + // so this can not be out of bounds. + let next_outer_arg = outer_args[outer_pos + 1]; + let next_outer_ty = cx.val_ty(next_outer_arg); + // FIXME(ZuseZ4): We should add support for Vec here too, but it's less urgent since + // vectors behind references (&Vec) are already supported. Users can not pass a + // Vec by value for reverse mode, so this would only help forward mode autodiff. + let slice = { + if activity_pos + 1 >= inputs.len() { + // If there is no arg following our ptr, it also can't be a slice, + // since that would lead to a ptr, int pair. + false + } else { + let next_activity = inputs[activity_pos + 1]; + // We analyze the MIR types and add this dummy activity if we visit a slice. + next_activity == DiffActivity::FakeActivitySize + } + }; + if slice { + // A duplicated slice will have the following two outer_fn arguments: + // (..., ptr1, int1, ptr2, int2, ...). We add the following llvm-ir to our __enzyme call: + // (..., metadata! enzyme_dup, ptr, ptr, int1, ...). + // FIXME(ZuseZ4): We will upstream a safety check later which asserts that + // int2 >= int1, which means the shadow vector is large enough to store the gradient. + assert!(unsafe { + llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Integer + }); + let next_outer_arg2 = outer_args[outer_pos + 2]; + let next_outer_ty2 = cx.val_ty(next_outer_arg2); + assert!(unsafe { + llvm::LLVMRustGetTypeKind(next_outer_ty2) == llvm::TypeKind::Pointer + }); + let next_outer_arg3 = outer_args[outer_pos + 3]; + let next_outer_ty3 = cx.val_ty(next_outer_arg3); + assert!(unsafe { + llvm::LLVMRustGetTypeKind(next_outer_ty3) == llvm::TypeKind::Integer + }); + args.push(next_outer_arg2); + args.push(cx.get_metadata_value(enzyme_const)); + args.push(next_outer_arg); + outer_pos += 4; + activity_pos += 2; + } else { + // A duplicated pointer will have the following two outer_fn arguments: + // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call: + // (..., metadata! enzyme_dup, ptr, ptr, ...). + if matches!(diff_activity, DiffActivity::Duplicated | DiffActivity::DuplicatedOnly) + { + assert!( + unsafe { llvm::LLVMRustGetTypeKind(next_outer_ty) } + == llvm::TypeKind::Pointer + ); + } + // In the case of Dual we don't have assumptions, e.g. f32 would be valid. + args.push(next_outer_arg); + outer_pos += 2; + activity_pos += 1; + } + } else { + // We do not differentiate with resprect to this argument. + // We already added the metadata and argument above, so just increase the counters. + outer_pos += 1; + activity_pos += 1; + } + } +} + /// When differentiating `fn_to_diff`, take a `outer_fn` and generate another /// function with expected naming and calling conventions[^1] which will be /// discovered by the enzyme LLVM pass and its body populated with the differentiated @@ -43,9 +150,6 @@ fn generate_enzyme_call<'ll>( outer_fn: &'ll Value, attrs: AutoDiffAttrs, ) { - let inputs = attrs.input_activity; - let output = attrs.ret_activity; - // We have to pick the name depending on whether we want forward or reverse mode autodiff. let mut ad_name: String = match attrs.mode { DiffMode::Forward => "__enzyme_fwddiff", @@ -132,111 +236,13 @@ fn generate_enzyme_call<'ll>( let mut args = Vec::with_capacity(num_args as usize + 1); args.push(fn_to_diff); - let enzyme_const = cx.create_metadata("enzyme_const".to_string()).unwrap(); - let enzyme_out = cx.create_metadata("enzyme_out".to_string()).unwrap(); - let enzyme_dup = cx.create_metadata("enzyme_dup".to_string()).unwrap(); - let enzyme_dupnoneed = cx.create_metadata("enzyme_dupnoneed".to_string()).unwrap(); let enzyme_primal_ret = cx.create_metadata("enzyme_primal_return".to_string()).unwrap(); - - match output { - DiffActivity::Dual => { - args.push(cx.get_metadata_value(enzyme_primal_ret)); - } - DiffActivity::Active => { - args.push(cx.get_metadata_value(enzyme_primal_ret)); - } - _ => {} + if matches!(attrs.ret_activity, DiffActivity::Dual | DiffActivity::Active) { + args.push(cx.get_metadata_value(enzyme_primal_ret)); } - debug!("matching autodiff arguments"); - // We now handle the issue that Rust level arguments not always match the llvm-ir level - // arguments. A slice, `&[f32]`, for example, is represented as a pointer and a length on - // llvm-ir level. The number of activities matches the number of Rust level arguments, so we - // need to match those. - // FIXME(ZuseZ4): This logic is a bit more complicated than it should be, can we simplify it - // using iterators and peek()? - let mut outer_pos: usize = 0; - let mut activity_pos = 0; let outer_args: Vec<&llvm::Value> = get_params(outer_fn); - while activity_pos < inputs.len() { - let diff_activity = inputs[activity_pos as usize]; - // Duplicated arguments received a shadow argument, into which enzyme will write the - // gradient. - let (activity, duplicated): (&Metadata, bool) = match diff_activity { - DiffActivity::None => panic!("not a valid input activity"), - DiffActivity::Const => (enzyme_const, false), - DiffActivity::Active => (enzyme_out, false), - DiffActivity::ActiveOnly => (enzyme_out, false), - DiffActivity::Dual => (enzyme_dup, true), - DiffActivity::DualOnly => (enzyme_dupnoneed, true), - DiffActivity::Duplicated => (enzyme_dup, true), - DiffActivity::DuplicatedOnly => (enzyme_dupnoneed, true), - DiffActivity::FakeActivitySize => (enzyme_const, false), - }; - let outer_arg = outer_args[outer_pos]; - args.push(cx.get_metadata_value(activity)); - args.push(outer_arg); - if duplicated { - // We know that duplicated args by construction have a following argument, - // so this can not be out of bounds. - let next_outer_arg = outer_args[outer_pos + 1]; - let next_outer_ty = cx.val_ty(next_outer_arg); - // FIXME(ZuseZ4): We should add support for Vec here too, but it's less urgent since - // vectors behind references (&Vec) are already supported. Users can not pass a - // Vec by value for reverse mode, so this would only help forward mode autodiff. - let slice = { - if activity_pos + 1 >= inputs.len() { - // If there is no arg following our ptr, it also can't be a slice, - // since that would lead to a ptr, int pair. - false - } else { - let next_activity = inputs[activity_pos + 1]; - // We analyze the MIR types and add this dummy activity if we visit a slice. - next_activity == DiffActivity::FakeActivitySize - } - }; - if slice { - // A duplicated slice will have the following two outer_fn arguments: - // (..., ptr1, int1, ptr2, int2, ...). We add the following llvm-ir to our __enzyme call: - // (..., metadata! enzyme_dup, ptr, ptr, int1, ...). - // FIXME(ZuseZ4): We will upstream a safety check later which asserts that - // int2 >= int1, which means the shadow vector is large enough to store the gradient. - assert!(llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Integer); - let next_outer_arg2 = outer_args[outer_pos + 2]; - let next_outer_ty2 = cx.val_ty(next_outer_arg2); - assert!(llvm::LLVMRustGetTypeKind(next_outer_ty2) == llvm::TypeKind::Pointer); - let next_outer_arg3 = outer_args[outer_pos + 3]; - let next_outer_ty3 = cx.val_ty(next_outer_arg3); - assert!(llvm::LLVMRustGetTypeKind(next_outer_ty3) == llvm::TypeKind::Integer); - args.push(next_outer_arg2); - args.push(cx.get_metadata_value(enzyme_const)); - args.push(next_outer_arg); - outer_pos += 4; - activity_pos += 2; - } else { - // A duplicated pointer will have the following two outer_fn arguments: - // (..., ptr, ptr, ...). We add the following llvm-ir to our __enzyme call: - // (..., metadata! enzyme_dup, ptr, ptr, ...). - if matches!( - diff_activity, - DiffActivity::Duplicated | DiffActivity::DuplicatedOnly - ) { - assert!( - llvm::LLVMRustGetTypeKind(next_outer_ty) == llvm::TypeKind::Pointer - ); - } - // In the case of Dual we don't have assumptions, e.g. f32 would be valid. - args.push(next_outer_arg); - outer_pos += 2; - activity_pos += 1; - } - } else { - // We do not differentiate with resprect to this argument. - // We already added the metadata and argument above, so just increase the counters. - outer_pos += 1; - activity_pos += 1; - } - } + match_args_from_caller_to_enzyme(&cx, &mut args, &attrs.input_activity, &outer_args); let call = builder.call(enzyme_ty, ad_fn, &args, None); diff --git a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs index 90c53c347689b..5a3f0aa831059 100644 --- a/compiler/rustc_codegen_ssa/src/codegen_attrs.rs +++ b/compiler/rustc_codegen_ssa/src/codegen_attrs.rs @@ -798,16 +798,10 @@ fn autodiff_attrs(tcx: TyCtxt<'_>, id: DefId) -> Option { // check for exactly one autodiff attribute on placeholder functions. // There should only be one, since we generate a new placeholder per ad macro. - // FIXME(ZuseZ4): re-enable this check. Currently we add multiple, which doesn't cause harm but - // looks strange e.g. under cargo-expand. let attr = match &attrs[..] { [] => return None, [attr] => attr, - // These two attributes are the same and unfortunately duplicated due to a previous bug. - [attr, _attr2] => attr, _ => { - //FIXME(ZuseZ4): Once we fixed our parser, we should also prohibit the two-attribute - //branch above. span_bug!(attrs[1].span(), "cg_ssa: rustc_autodiff should only exist once per source"); } }; diff --git a/tests/pretty/autodiff_forward.pp b/tests/pretty/autodiff_forward.pp index 23c3b5b34a82a..dc7a2712f4231 100644 --- a/tests/pretty/autodiff_forward.pp +++ b/tests/pretty/autodiff_forward.pp @@ -53,7 +53,7 @@ pub fn f3(x: &[f64], y: f64) -> f64 { ::core::panicking::panic("not implemented") } -#[rustc_autodiff(ForwardFirst, Dual, Const, Const,)] +#[rustc_autodiff(Forward, Dual, Const, Const,)] #[inline(never)] pub fn df3(x: &[f64], bx: &[f64], y: f64) -> f64 { unsafe { asm!("NOP", options(pure, nomem)); }; @@ -73,10 +73,6 @@ } #[rustc_autodiff] #[inline(never)] -#[rustc_autodiff] -#[inline(never)] -#[rustc_autodiff] -#[inline(never)] pub fn f5(x: &[f64], y: f64) -> f64 { ::core::panicking::panic("not implemented") } diff --git a/tests/pretty/autodiff_forward.rs b/tests/pretty/autodiff_forward.rs index 35108d0d6f111..bc5582116322b 100644 --- a/tests/pretty/autodiff_forward.rs +++ b/tests/pretty/autodiff_forward.rs @@ -19,7 +19,7 @@ pub fn f2(x: &[f64], y: f64) -> f64 { unimplemented!() } -#[autodiff(df3, ForwardFirst, Dual, Const, Const)] +#[autodiff(df3, Forward, Dual, Const, Const)] pub fn f3(x: &[f64], y: f64) -> f64 { unimplemented!() } diff --git a/tests/pretty/autodiff_reverse.pp b/tests/pretty/autodiff_reverse.pp index a98d3782c7034..b2cf0244af4c6 100644 --- a/tests/pretty/autodiff_reverse.pp +++ b/tests/pretty/autodiff_reverse.pp @@ -51,7 +51,7 @@ pub fn f3(x: &[f64], y: f64) -> f64 { ::core::panicking::panic("not implemented") } -#[rustc_autodiff(ReverseFirst, Duplicated, Const, Active,)] +#[rustc_autodiff(Reverse, Duplicated, Const, Active,)] #[inline(never)] pub fn df3(x: &[f64], dx: &mut [f64], y: f64, dret: f64) -> f64 { unsafe { asm!("NOP", options(pure, nomem)); }; diff --git a/tests/pretty/autodiff_reverse.rs b/tests/pretty/autodiff_reverse.rs index 657201caa9401..3c024272f407e 100644 --- a/tests/pretty/autodiff_reverse.rs +++ b/tests/pretty/autodiff_reverse.rs @@ -18,7 +18,7 @@ pub fn f1(x: &[f64], y: f64) -> f64 { #[autodiff(df2, Reverse)] pub fn f2() {} -#[autodiff(df3, ReverseFirst, Duplicated, Const, Active)] +#[autodiff(df3, Reverse, Duplicated, Const, Active)] pub fn f3(x: &[f64], y: f64) -> f64 { unimplemented!() }