Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions crates/admin-cli/src/instance/release/cmd.rs
Original file line number Diff line number Diff line change
Expand Up @@ -82,6 +82,7 @@ pub async fn release(
id: Some(instance_id),
issue: None,
is_repair_tenant: None,
delete_attribution: None,
})
.await?;
}
Expand Down
19 changes: 19 additions & 0 deletions crates/api-core/src/handlers/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,24 @@ async fn remove_health_override(
Ok(())
}

/// Logs cloud-side delete attribution when present on the release request.
fn log_delete_attribution(delete_attribution: Option<&rpc::DeleteAttribution>) {
let Some(attribution) = delete_attribution else {
return;
};
let Some(initiated_by) = attribution.initiated_by.as_ref() else {
return;
};

tracing::info!(
org = %initiated_by.org,
org_display_name = %initiated_by.org_display_name,
user_id = %initiated_by.user_id,
tenant_id = %initiated_by.tenant_id,
"Instance delete attribution"
);
}

/// Handles the Instance Release workflow when released from the Repair tenant.
///
/// This function implements the logic for when the RepairSystem releases an instance after
Expand Down Expand Up @@ -711,6 +729,7 @@ pub(crate) async fn release(

log_machine_id(&instance.machine_id);
log_tenant_organization_id(instance.config.tenant.tenant_organization_id.as_str());
log_delete_attribution(delete_instance.delete_attribution.as_ref());

// Only enforce PreventInstanceDeletion for a real release (instance not yet marked deleted). Repair-tenant
// follow-up calls after deletion may still need to adjust health overrides below.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -450,6 +450,7 @@ pub async fn delete_instance(env: &TestEnv, instance_id: InstanceId, mh: &TestMa
id: Some(instance_id),
issue: None,
is_repair_tenant: None,
delete_attribution: None,
}))
.await
.expect("Delete instance failed.");
Expand Down
14 changes: 14 additions & 0 deletions crates/api-core/src/tests/instance.rs
Original file line number Diff line number Diff line change
Expand Up @@ -557,6 +557,7 @@ async fn test_measurement_assigned_ready_to_waiting_for_measurements_to_ca_faile
id: Some(instance_id),
issue: None,
is_repair_tenant: None,
delete_attribution: None,
}))
.await
.expect("Delete instance failed.");
Expand Down Expand Up @@ -1303,6 +1304,7 @@ async fn test_instance_deletion_before_provisioning_finishes(
id: Some(instance_id),
issue: None,
is_repair_tenant: None,
delete_attribution: None,
}))
.await
.expect("Delete instance failed.");
Expand Down Expand Up @@ -1349,6 +1351,7 @@ async fn test_instance_deletion_is_idempotent(_: PgPoolOptions, options: PgConne
id: Some(tinstance.id),
issue: None,
is_repair_tenant: None,
delete_attribution: None,
}))
.await
.unwrap_or_else(|_| panic!("Delete instance failed failed on attempt {i}."));
Expand All @@ -1366,6 +1369,7 @@ async fn test_instance_deletion_is_idempotent(_: PgPoolOptions, options: PgConne
id: Some(tinstance.id),
issue: None,
is_repair_tenant: None,
delete_attribution: None,
}))
.await
.expect_err("Expect deletion to fail");
Expand Down Expand Up @@ -2132,6 +2136,7 @@ async fn test_bootingwithdiscoveryimage_delay(_: PgPoolOptions, options: PgConne
id: Some(tinstance.id),
issue: None,
is_repair_tenant: None,
delete_attribution: None,
}))
.await
.expect("Delete instance failed.");
Expand Down Expand Up @@ -5408,6 +5413,7 @@ async fn test_instance_release_backward_compatibility(_: PgPoolOptions, options:
id: Some(instance_id),
issue: None, // Exactly what older clients produce
is_repair_tenant: None, // Exactly what older clients produce
delete_attribution: None,
}))
.await
.expect("Basic instance release should succeed");
Expand Down Expand Up @@ -5525,6 +5531,7 @@ async fn test_instance_release_repair_tenant(_: PgPoolOptions, options: PgConnec
id: Some(instance_id),
issue: None, // No issue reported
is_repair_tenant: Some(is_repair_tenant),
delete_attribution: None,
}))
.await
.expect("Instance release with repair tenant flag should succeed");
Expand Down Expand Up @@ -5623,6 +5630,7 @@ async fn test_instance_release_combined_enhancements(_: PgPoolOptions, options:
id: Some(instance_id),
issue: Some(issue),
is_repair_tenant: Some(true), // This is a repair tenant reporting an issue
delete_attribution: None,
}))
.await
.expect("Instance release with combined enhancements should succeed");
Expand Down Expand Up @@ -5729,6 +5737,7 @@ async fn test_instance_release_rejected_when_aggregate_health_has_prevent_instan
id: Some(instance_id),
issue: None,
is_repair_tenant: None,
delete_attribution: None,
}))
.await
.expect_err(
Expand All @@ -5754,6 +5763,7 @@ async fn test_instance_release_rejected_when_aggregate_health_has_prevent_instan
id: Some(instance_id),
issue: None,
is_repair_tenant: None,
delete_attribution: None,
}))
.await
.expect("release should succeed after removing PreventInstanceDeletion source");
Expand Down Expand Up @@ -5806,6 +5816,7 @@ async fn test_instance_release_auto_repair_enabled(_: PgPoolOptions, options: Pg
details: "ECC errors increasing, DIMM slot 3 needs replacement".to_string(),
}),
is_repair_tenant: None, // Regular tenant (not repair tenant)
delete_attribution: None,
}))
.await
.unwrap();
Expand Down Expand Up @@ -5918,6 +5929,7 @@ async fn test_instance_release_repair_tenant_successful_completion(
details: "CPU overheating and memory errors".to_string(),
}),
is_repair_tenant: None, // Regular tenant
delete_attribution: None,
}))
.await
.unwrap();
Expand Down Expand Up @@ -5970,6 +5982,7 @@ async fn test_instance_release_repair_tenant_successful_completion(
id: Some(instance_id),
issue: None, // No new issues - repair was successful
is_repair_tenant: Some(true), // Repair tenant
delete_attribution: None,
}))
.await
.unwrap();
Expand Down Expand Up @@ -6111,6 +6124,7 @@ async fn test_can_not_update_instance_config_after_deletion(
id: tinstance.id.into(),
issue: None,
is_repair_tenant: None,
delete_attribution: None,
}))
.await
.unwrap();
Expand Down
1 change: 1 addition & 0 deletions crates/api-core/src/tests/instance_config_update.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1156,6 +1156,7 @@ async fn test_update_instance_config_vpc_prefix_network_update_post_instance_del
id: Some(tinstance.id),
issue: None,
is_repair_tenant: None,
delete_attribution: None,
}))
.await
.expect("Delete instance failed.");
Expand Down
59 changes: 38 additions & 21 deletions crates/api-core/src/tests/network_security_group.rs
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,6 @@ use crate::tests::common::api_fixtures::instance::{
default_os_config, default_tenant_config, interface_network_config_with_devices,
single_interface_network_config,
};
use crate::tests::common::api_fixtures::test_managed_host::TestManagedHost;
use crate::tests::common::api_fixtures::{
create_test_env, populate_network_security_groups, site_explorer,
};
Expand Down Expand Up @@ -954,6 +953,7 @@ async fn test_network_security_group_delete(
id: instance.id,
issue: None,
is_repair_tenant: None,
delete_attribution: None,
}))
.await
.unwrap();
Expand Down Expand Up @@ -1761,6 +1761,7 @@ async fn test_network_security_group_get_attachments(
let good_network_security_group_id = "fd3ab096-d811-11ef-8fe9-7be4b2483448";

let vpc_id: VpcId = uuid!("2ff5ba26-da6a-11ef-9c48-5b78e547a5e7").into();
let instance_id: InstanceId = uuid!("46c555e0-da6a-11ef-b86d-db132142d068").into();

// Check attachments before doing anything else.
// There should be no objects with any attached NSG.
Expand Down Expand Up @@ -1806,23 +1807,32 @@ async fn test_network_security_group_get_attachments(
.await
.unwrap();

let test_managed_host =
TestManagedHost::from_rpc_machine(&mh.host_snapshot.clone().into(), env.api.clone());
// Create an Instance
let instance = test_managed_host
.instance_builer(&env)
.config(rpc::InstanceConfig {
tenant: Some(default_tenant_config()),
os: Some(default_os_config()),
network: Some(single_interface_network_config(segment_id)),
infiniband: None,
nvlink: None,
spxconfig: None,
network_security_group_id: Some(good_network_security_group_id.to_string()),
dpu_extension_services: None,
})
.build()
.await;
let _ = env
.api
.allocate_instance(tonic::Request::new(rpc::forge::InstanceAllocationRequest {
machine_id: mh.host_snapshot.id.into(),
config: Some(rpc::InstanceConfig {
tenant: Some(default_tenant_config()),
os: Some(default_os_config()),
network: Some(single_interface_network_config(segment_id)),
infiniband: None,
nvlink: None,
spxconfig: None,
network_security_group_id: Some(good_network_security_group_id.to_string()),
dpu_extension_services: None,
}),
instance_id: Some(instance_id),
instance_type_id: None,
metadata: Some(rpc::forge::Metadata {
name: "newinstance".to_string(),
description: "desc".to_string(),
labels: vec![],
}),
allow_unhealthy_machine: false,
}))
.await
.unwrap();

// Check attachments
let prop_status = env
Expand All @@ -1840,15 +1850,22 @@ async fn test_network_security_group_get_attachments(
attachments: vec![rpc::forge::NetworkSecurityGroupAttachments {
network_security_group_id: good_network_security_group_id.to_string(),
vpc_ids: vec![vpc_id.to_string()],
instance_ids: vec![instance.id.to_string()],
instance_ids: vec![instance_id.to_string()],
}],
};

assert_eq!(prop_status, expected_results);

// Delete the instance and wait for it to be fully gone
test_managed_host.delete_instance(&env, instance.id).await;

// Delete the instance
env.api
.release_instance(tonic::Request::new(rpc::forge::InstanceReleaseRequest {
id: Some(instance_id),
issue: None,
is_repair_tenant: None,
delete_attribution: None,
}))
.await
.unwrap();
// Delete the VPC
env.api
.delete_vpc(tonic::Request::new(rpc::forge::VpcDeletionRequest {
Expand Down
14 changes: 14 additions & 0 deletions crates/rpc/proto/forge.proto
Original file line number Diff line number Diff line change
Expand Up @@ -3323,12 +3323,26 @@ message Issue {
string details = 3; // Additional context about the issue
}

// Records who initiated an instance delete in the cloud API layer.
message DeleteInitiatedBy {
string org = 1;
string org_display_name = 2;
string user_id = 3;
string tenant_id = 4;
}

message DeleteAttribution {
DeleteInitiatedBy initiated_by = 1;
}

message InstanceReleaseRequest {
common.InstanceId id = 1;
// Optional issue information if tenant is reporting a problem
optional Issue issue = 2;
// Optional flag indicating the call is from repair tenant (default: false)
optional bool is_repair_tenant = 3;
// Optional cloud-side attribution for who initiated the delete
optional DeleteAttribution delete_attribution = 4;
}

message InstanceReleaseResult {
Expand Down
2 changes: 1 addition & 1 deletion rest-api/api/pkg/api/handler/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -4902,7 +4902,7 @@ func (dih DeleteInstanceHandler) Handle(c echo.Context) error {
}

// Prepare the delete/release request workflow object
releaseInstanceRequest := apiRequest.ToProto(instance)
releaseInstanceRequest := apiRequest.ToProto(instance, dbUser)

workflowOptions := temporalClient.StartWorkflowOptions{
ID: "instance-delete-" + instance.ID.String(),
Expand Down
48 changes: 44 additions & 4 deletions rest-api/api/pkg/api/handler/instance_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -9745,14 +9745,54 @@ func TestDeleteInstanceHandler_Handle(t *testing.T) {
assert.Nil(t, terr)
assert.Equal(t, cdbm.InstanceStatusTerminating, dinstance.Status)

if tt.verifyChildSpanner {
span := oteltrace.SpanFromContext(ec.Request().Context())
assert.True(t, span.SpanContext().IsValid())
sdDAO := cdbm.NewStatusDetailDAO(dbSession)
statusDetails, _, serr := sdDAO.GetAll(context.Background(), nil, cdbm.StatusDetailFilterInput{EntityIDs: []string{tt.args.reqInstance}}, cdbp.PageInput{})
require.NoError(t, serr)
require.NotEmpty(t, statusDetails)
require.NotNil(t, statusDetails[0].Message)

siteClient, scErr := tt.fields.scp.GetClientByID(dinstance.SiteID)
require.NoError(t, scErr)
mockSiteClient, ok := siteClient.(*tmocks.Client)
require.True(t, ok, "site temporal client should be a test mock")

var releaseReq *cwssaws.InstanceReleaseRequest
for i := len(mockSiteClient.Calls) - 1; i >= 0; i-- {
call := mockSiteClient.Calls[i]
if call.Method != "ExecuteWorkflow" || len(call.Arguments) <= 3 {
continue
}
wfName, ok := call.Arguments[2].(string)
if !ok || wfName != "DeleteInstanceV2" {
continue
}
req, ok := call.Arguments[3].(*cwssaws.InstanceReleaseRequest)
if !ok || req.GetId().GetValue() != tt.args.reqInstance {
continue
}
releaseReq = req
break
}
require.NotNil(t, releaseReq, "DeleteInstanceV2 workflow should have been called for this Instance")

require.NotNil(t, releaseReq.DeleteAttribution)
require.NotNil(t, releaseReq.DeleteAttribution.InitiatedBy)
assert.Equal(t, tt.args.reqOrg, releaseReq.DeleteAttribution.InitiatedBy.Org)
assert.Equal(t, tt.args.reqUser.ID.String(), releaseReq.DeleteAttribution.InitiatedBy.UserId)
assert.Equal(t, dinstance.TenantID.String(), releaseReq.DeleteAttribution.InitiatedBy.TenantId)

if tt.args.reqData != nil && tt.args.reqData.MachineHealthIssue != nil {
require.NotNil(t, releaseReq.Issue)
if tt.args.reqData.MachineHealthIssue.Details != nil {
assert.Equal(t, *tt.args.reqData.MachineHealthIssue.Details, releaseReq.Issue.Details)
}
if tt.args.reqData.MachineHealthIssue.Summary != nil {
assert.Equal(t, *tt.args.reqData.MachineHealthIssue.Summary, releaseReq.Issue.Summary)
}
}
})
}
}

func TestNewCreateInstanceHandler(t *testing.T) {
type args struct {
dbSession *cdb.Session
Expand Down
15 changes: 14 additions & 1 deletion rest-api/api/pkg/api/model/instance.go
Original file line number Diff line number Diff line change
Expand Up @@ -1604,7 +1604,7 @@ func (idr *APIInstanceDeleteRequest) Validate() error {
// cannot see. In particular, the `IsRepairTenant` capability gate
// (TargetedInstanceCreation on the Tenant config) is an authorization
// check that stays in the handler before this method runs.
func (idr *APIInstanceDeleteRequest) ToProto(instance *cdbm.Instance) *cwssaws.InstanceReleaseRequest {
func (idr *APIInstanceDeleteRequest) ToProto(instance *cdbm.Instance, user *cdbm.User) *cwssaws.InstanceReleaseRequest {
req := instance.ToReleaseRequestProto()
if idr.MachineHealthIssue != nil {
req.Issue = &cwssaws.Issue{
Expand All @@ -1620,6 +1620,19 @@ func (idr *APIInstanceDeleteRequest) ToProto(instance *cdbm.Instance) *cwssaws.I
if idr.IsRepairTenant != nil {
req.IsRepairTenant = idr.IsRepairTenant
}

// Build the delete attribution proto
initiatedBy := &cwssaws.DeleteInitiatedBy{
Org: instance.Tenant.Org,
UserId: user.ID.String(),
TenantId: instance.Tenant.ID.String(),
}
if instance.Tenant.OrgDisplayName != nil {
initiatedBy.OrgDisplayName = *instance.Tenant.OrgDisplayName
}
req.DeleteAttribution = &cwssaws.DeleteAttribution{
InitiatedBy: initiatedBy,
}
return req
}

Expand Down
Loading
Loading