diff --git a/CHANGELOG.md b/CHANGELOG.md index 162d44107d8cc..82adfb98ffd04 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,32 @@ # Changelog +## 17.3.0 + +### Automatic Updates + +17.3 introduces a new automatic update mechanism for system administrators to control which Teleport version their +agents are running. You can now configure the agent update schedule and desired agent version via the `autoupdate_config` +and `autoupdate_version` resources. + +Updates are performed by the new `teleport-update` binary. +This new system is package manager-agnostic and opt-in. Existing agents won't be automatically enrolled, you can enroll +existing 17.3+ agents by running `teleport-update enable`. + +`teleport-update` will become the new standard way of installing Teleport as it always picks the appropriate Teleport +edition (Community vs Enterprise), the cluster's desired version, and the correct Teleport variant (e.g. FIPS-compliant +cryptography). + +You can find more information about the feature in [our documentation](). + +### Package layout changes + +Starting with 17.3.0, the Teleport DEB and RPM packages, notably used by the `apt`, `yum`, `dnf` and `zypper` package +managers, will place the Teleport binaries in `/opt/teleport` instead of `/usr/local/bin`. + +The binaries will be symlinked to their previous location, no change should be required in your scripts or systemd units. + +This change allows us to do automatic updates without conflicting with the package manager. + ## 17.2.9 (02/25/25) * Updated go-jose/v4 to v4.0.5 (addresses CVE-2025-27144). [#52467](https://github.com/gravitational/teleport/pull/52467) diff --git a/api/client/webclient/webclient.go b/api/client/webclient/webclient.go index 28a5be6aac659..59e9249b55d2b 100644 --- a/api/client/webclient/webclient.go +++ b/api/client/webclient/webclient.go @@ -47,6 +47,15 @@ import ( "github.com/gravitational/teleport/api/utils/keys" ) +const ( + // AgentUpdateGroupParameter is the parameter used to specify the updater + // group when doing a Ping() or Find() query. + // The proxy server will modulate the auto_update part of the PingResponse + // based on the specified group. e.g. some groups might need to update + // before others. + AgentUpdateGroupParameter = "group" +) + // Config specifies information when building requests with the // webclient. type Config struct { @@ -68,6 +77,9 @@ type Config struct { Timeout time.Duration // TraceProvider is used to retrieve a Tracer for creating spans TraceProvider oteltrace.TracerProvider + // UpdateGroup is used to vary the webapi response based on the + // client's auto-update group. + UpdateGroup string } // CheckAndSetDefaults checks and sets defaults @@ -166,12 +178,25 @@ func Find(cfg *Config) (*PingResponse, error) { } defer clt.CloseIdleConnections() + return findWithClient(cfg, clt) +} + +func findWithClient(cfg *Config, clt *http.Client) (*PingResponse, error) { ctx, span := cfg.TraceProvider.Tracer("webclient").Start(cfg.Context, "webclient/Find") defer span.End() - endpoint := fmt.Sprintf("https://%s/webapi/find", cfg.ProxyAddr) + endpoint := &url.URL{ + Scheme: "https", + Host: cfg.ProxyAddr, + Path: "/webapi/find", + } + if cfg.UpdateGroup != "" { + endpoint.RawQuery = url.Values{ + AgentUpdateGroupParameter: []string{cfg.UpdateGroup}, + }.Encode() + } - req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint.String(), nil) if err != nil { return nil, trace.Wrap(err) } @@ -202,15 +227,29 @@ func Ping(cfg *Config) (*PingResponse, error) { } defer clt.CloseIdleConnections() + return pingWithClient(cfg, clt) +} + +func pingWithClient(cfg *Config, clt *http.Client) (*PingResponse, error) { ctx, span := cfg.TraceProvider.Tracer("webclient").Start(cfg.Context, "webclient/Ping") defer span.End() - endpoint := fmt.Sprintf("https://%s/webapi/ping", cfg.ProxyAddr) + endpoint := &url.URL{ + Scheme: "https", + Host: cfg.ProxyAddr, + Path: "/webapi/ping", + } + if cfg.UpdateGroup != "" { + endpoint.RawQuery = url.Values{ + AgentUpdateGroupParameter: []string{cfg.UpdateGroup}, + }.Encode() + } + if cfg.ConnectorName != "" { - endpoint = fmt.Sprintf("%s/%s", endpoint, cfg.ConnectorName) + endpoint = endpoint.JoinPath(cfg.ConnectorName) } - req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint, nil) + req, err := http.NewRequestWithContext(ctx, http.MethodGet, endpoint.String(), nil) if err != nil { return nil, trace.Wrap(err) } @@ -246,6 +285,7 @@ func Ping(cfg *Config) (*PingResponse, error) { return pr, nil } +// GetMOTD retrieves the Message Of The Day from the web proxy. func GetMOTD(cfg *Config) (*MotD, error) { clt, err := newWebClient(cfg) if err != nil { @@ -253,6 +293,10 @@ func GetMOTD(cfg *Config) (*MotD, error) { } defer clt.CloseIdleConnections() + return getMOTDWithClient(cfg, clt) +} + +func getMOTDWithClient(cfg *Config, clt *http.Client) (*MotD, error) { ctx, span := cfg.TraceProvider.Tracer("webclient").Start(cfg.Context, "webclient/GetMOTD") defer span.End() @@ -281,6 +325,60 @@ func GetMOTD(cfg *Config) (*MotD, error) { return motd, nil } +// NewReusableClient creates a reusable webproxy client. If you need to do a single call, +// use the webclient.Ping or webclient.Find functions instead. +func NewReusableClient(cfg *Config) (*ReusableClient, error) { + // no need to check and set config defaults, this happens in newWebClient + client, err := newWebClient(cfg) + if err != nil { + return nil, trace.Wrap(err, "building new web client") + } + + return &ReusableClient{ + client: client, + config: cfg, + }, nil +} + +// ReusableClient is a webproxy client that allows the caller to make multiple calls +// without having to buildi a new HTTP client each time. +// Before retiring the client, you must make sure no calls are still in-flight, then call +// ReusableClient.CloseIdleConnections(). +type ReusableClient struct { + client *http.Client + config *Config +} + +// Find fetches discovery data by connecting to the given web proxy address. +// It is designed to fetch proxy public addresses without any inefficiencies. +func (c *ReusableClient) Find() (*PingResponse, error) { + return findWithClient(c.config, c.client) +} + +// Ping serves two purposes. The first is to validate the HTTP endpoint of a +// Teleport proxy. This leads to better user experience: users get connection +// errors before being asked for passwords. The second is to return the form +// of authentication that the server supports. This also leads to better user +// experience: users only get prompted for the type of authentication the server supports. +func (c *ReusableClient) Ping() (*PingResponse, error) { + return pingWithClient(c.config, c.client) +} + +// GetMOTD retrieves the Message Of The Day from the web proxy. +func (c *ReusableClient) GetMOTD() (*MotD, error) { + return getMOTDWithClient(c.config, c.client) +} + +// CloseIdleConnections closes any connections on its [Transport] which +// were previously connected from previous requests but are now +// sitting idle in a "keep-alive" state. It does not interrupt any +// connections currently in use. +// +// This must be run before retiring the ReusableClient. +func (c *ReusableClient) CloseIdleConnections() { + c.client.CloseIdleConnections() +} + // MotD holds data about the current message of the day. type MotD struct { Text string @@ -305,6 +403,10 @@ type PingResponse struct { // reserved: license_warnings ([]string) // AutomaticUpgrades describes whether agents should automatically upgrade. AutomaticUpgrades bool `json:"automatic_upgrades"` + // Edition represents the Teleport edition. Possible values are "oss", "ent", and "community". + Edition string `json:"edition"` + // FIPS represents if Teleport is using FIPS-compliant cryptography. + FIPS bool `json:"fips"` } // PingErrorResponse contains the error from /webapi/ping. @@ -336,6 +438,12 @@ type AutoUpdateSettings struct { ToolsVersion string `json:"tools_version"` // ToolsAutoUpdate indicates if the requesting tools client should be updated. ToolsAutoUpdate bool `json:"tools_auto_update"` + // AgentVersion defines the version of teleport that agents enrolled into autoupdates should run. + AgentVersion string `json:"agent_version"` + // AgentAutoUpdate indicates if the requesting agent should attempt to update now. + AgentAutoUpdate bool `json:"agent_auto_update"` + // AgentUpdateJitterSeconds defines the jitter time an agent should wait before updating. + AgentUpdateJitterSeconds int `json:"agent_update_jitter_seconds"` } // KubeProxySettings is kubernetes proxy settings diff --git a/api/gen/proto/go/teleport/autoupdate/v1/autoupdate.pb.go b/api/gen/proto/go/teleport/autoupdate/v1/autoupdate.pb.go index e01283cc82414..2f864d21a2976 100644 --- a/api/gen/proto/go/teleport/autoupdate/v1/autoupdate.pb.go +++ b/api/gen/proto/go/teleport/autoupdate/v1/autoupdate.pb.go @@ -51,7 +51,7 @@ const ( AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE AutoUpdateAgentGroupState = 2 // AUTO_UPDATE_AGENT_GROUP_STATE_DONE represents that the group has been updated. New agents should run v2. AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE AutoUpdateAgentGroupState = 3 - // AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK represents that the group has been rolleback. + // AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK represents that the group has been rolled back. // New agents should run v1, existing agents should update to v1. AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK AutoUpdateAgentGroupState = 4 ) @@ -101,6 +101,71 @@ func (AutoUpdateAgentGroupState) EnumDescriptor() ([]byte, []int) { return file_teleport_autoupdate_v1_autoupdate_proto_rawDescGZIP(), []int{0} } +// AutoUpdateAgentRolloutState represents the rollout state. This tells if Teleport started updating agents from the +// start version to the target version, if the update is done, still in progress +// or if the rollout was manually reverted. +type AutoUpdateAgentRolloutState int32 + +const ( + // AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSPECIFIED state + AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSPECIFIED AutoUpdateAgentRolloutState = 0 + // AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED represents that no group in the rollout has been started yet. + AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED AutoUpdateAgentRolloutState = 1 + // AUTO_UPDATE_AGENT_ROLLOUT_STATE_ACTIVE represents that at least one group of the rollout has started. + // If every group is finished, the state will be AUTO_UPDATE_AGENT_ROLLOUT_STATE_DONE. + AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_ACTIVE AutoUpdateAgentRolloutState = 2 + // AUTO_UPDATE_AGENT_ROLLOUT_STATE_DONE represents that every group is in the DONE state, or has been in the done + // state (groups might become active again in time-based strategy). + AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_DONE AutoUpdateAgentRolloutState = 3 + // AUTO_UPDATE_AGENT_ROLLOUT_STATE_ROLLEDBACK represents that at least one group is in the rolledback state. + AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_ROLLEDBACK AutoUpdateAgentRolloutState = 4 +) + +// Enum value maps for AutoUpdateAgentRolloutState. +var ( + AutoUpdateAgentRolloutState_name = map[int32]string{ + 0: "AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSPECIFIED", + 1: "AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED", + 2: "AUTO_UPDATE_AGENT_ROLLOUT_STATE_ACTIVE", + 3: "AUTO_UPDATE_AGENT_ROLLOUT_STATE_DONE", + 4: "AUTO_UPDATE_AGENT_ROLLOUT_STATE_ROLLEDBACK", + } + AutoUpdateAgentRolloutState_value = map[string]int32{ + "AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSPECIFIED": 0, + "AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED": 1, + "AUTO_UPDATE_AGENT_ROLLOUT_STATE_ACTIVE": 2, + "AUTO_UPDATE_AGENT_ROLLOUT_STATE_DONE": 3, + "AUTO_UPDATE_AGENT_ROLLOUT_STATE_ROLLEDBACK": 4, + } +) + +func (x AutoUpdateAgentRolloutState) Enum() *AutoUpdateAgentRolloutState { + p := new(AutoUpdateAgentRolloutState) + *p = x + return p +} + +func (x AutoUpdateAgentRolloutState) String() string { + return protoimpl.X.EnumStringOf(x.Descriptor(), protoreflect.EnumNumber(x)) +} + +func (AutoUpdateAgentRolloutState) Descriptor() protoreflect.EnumDescriptor { + return file_teleport_autoupdate_v1_autoupdate_proto_enumTypes[1].Descriptor() +} + +func (AutoUpdateAgentRolloutState) Type() protoreflect.EnumType { + return &file_teleport_autoupdate_v1_autoupdate_proto_enumTypes[1] +} + +func (x AutoUpdateAgentRolloutState) Number() protoreflect.EnumNumber { + return protoreflect.EnumNumber(x) +} + +// Deprecated: Use AutoUpdateAgentRolloutState.Descriptor instead. +func (AutoUpdateAgentRolloutState) EnumDescriptor() ([]byte, []int) { + return file_teleport_autoupdate_v1_autoupdate_proto_rawDescGZIP(), []int{1} +} + // AutoUpdateConfig is a config singleton used to configure cluster // autoupdate settings. type AutoUpdateConfig struct { @@ -416,8 +481,9 @@ type AgentAutoUpdateGroup struct { Days []string `protobuf:"bytes,2,rep,name=days,proto3" json:"days,omitempty"` // start_hour to initiate update StartHour int32 `protobuf:"varint,3,opt,name=start_hour,json=startHour,proto3" json:"start_hour,omitempty"` - // wait_days after last group succeeds before this group can run. This can only be used when the strategy is "halt-on-failure". - WaitDays int64 `protobuf:"varint,4,opt,name=wait_days,json=waitDays,proto3" json:"wait_days,omitempty"` + // wait_hours after last group succeeds before this group can run. This can only be used when the strategy is "halt-on-failure". + // This field must be positive. + WaitHours int32 `protobuf:"varint,5,opt,name=wait_hours,json=waitHours,proto3" json:"wait_hours,omitempty"` } func (x *AgentAutoUpdateGroup) Reset() { @@ -471,9 +537,9 @@ func (x *AgentAutoUpdateGroup) GetStartHour() int32 { return 0 } -func (x *AgentAutoUpdateGroup) GetWaitDays() int64 { +func (x *AgentAutoUpdateGroup) GetWaitHours() int32 { if x != nil { - return x.WaitDays + return x.WaitHours } return 0 } @@ -853,6 +919,10 @@ type AutoUpdateAgentRolloutSpec struct { // availability. A group finishes its update once most of its agents are running the correct version. Agents that // missed the group update will try to catch back as soon as possible. Strategy string `protobuf:"bytes,5,opt,name=strategy,proto3" json:"strategy,omitempty"` + // maintenance_window_duration is the maintenance window duration. This can only be set if `strategy` is "time-based". + // Once the window is over, the group transitions to the done state. Existing agents won't be updated until the next + // maintenance window. + MaintenanceWindowDuration *durationpb.Duration `protobuf:"bytes,6,opt,name=maintenance_window_duration,json=maintenanceWindowDuration,proto3" json:"maintenance_window_duration,omitempty"` } func (x *AutoUpdateAgentRolloutSpec) Reset() { @@ -920,13 +990,35 @@ func (x *AutoUpdateAgentRolloutSpec) GetStrategy() string { return "" } +func (x *AutoUpdateAgentRolloutSpec) GetMaintenanceWindowDuration() *durationpb.Duration { + if x != nil { + return x.MaintenanceWindowDuration + } + return nil +} + // AutoUpdateAgentRolloutStatus tracks the current agent rollout status. +// The status is reset if any spec field changes except the mode. type AutoUpdateAgentRolloutStatus struct { state protoimpl.MessageState sizeCache protoimpl.SizeCache unknownFields protoimpl.UnknownFields Groups []*AutoUpdateAgentRolloutStatusGroup `protobuf:"bytes,1,rep,name=groups,proto3" json:"groups,omitempty"` + State AutoUpdateAgentRolloutState `protobuf:"varint,2,opt,name=state,proto3,enum=teleport.autoupdate.v1.AutoUpdateAgentRolloutState" json:"state,omitempty"` + // The start time is set when the rollout is created or reset. Usually this is caused by a version change. + // The timestamp allows the controller to detect that the rollout just changed. + // The controller will not start any group that should have been active before the start_time to avoid a double-update + // effect. + // For example, a group updates every day between 13:00 and 14:00. If the target version changes to 13:30, the group + // will not start updating to the new version directly. The controller sees that the group theoretical start time is + // before the rollout start time and the maintenance window belongs to the previous rollout. + // When the timestamp is nil, the controller will ignore the start time and check and allow groups to activate. + StartTime *timestamppb.Timestamp `protobuf:"bytes,3,opt,name=start_time,json=startTime,proto3" json:"start_time,omitempty"` + // Time override is an optional timestamp making the autoupdate_agent_rollout controller use a specific time instead + // of the system clock when evaluating time-based criteria. This field is used for testing and troubleshooting + // purposes. + TimeOverride *timestamppb.Timestamp `protobuf:"bytes,4,opt,name=time_override,json=timeOverride,proto3" json:"time_override,omitempty"` } func (x *AutoUpdateAgentRolloutStatus) Reset() { @@ -966,6 +1058,27 @@ func (x *AutoUpdateAgentRolloutStatus) GetGroups() []*AutoUpdateAgentRolloutStat return nil } +func (x *AutoUpdateAgentRolloutStatus) GetState() AutoUpdateAgentRolloutState { + if x != nil { + return x.State + } + return AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSPECIFIED +} + +func (x *AutoUpdateAgentRolloutStatus) GetStartTime() *timestamppb.Timestamp { + if x != nil { + return x.StartTime + } + return nil +} + +func (x *AutoUpdateAgentRolloutStatus) GetTimeOverride() *timestamppb.Timestamp { + if x != nil { + return x.TimeOverride + } + return nil +} + // AutoUpdateAgentRolloutStatusGroup tracks the current agent rollout status of a specific group. type AutoUpdateAgentRolloutStatusGroup struct { state protoimpl.MessageState @@ -982,6 +1095,13 @@ type AutoUpdateAgentRolloutStatusGroup struct { LastUpdateTime *timestamppb.Timestamp `protobuf:"bytes,4,opt,name=last_update_time,json=lastUpdateTime,proto3" json:"last_update_time,omitempty"` // last_update_reason is the trigger for the last update LastUpdateReason string `protobuf:"bytes,5,opt,name=last_update_reason,json=lastUpdateReason,proto3" json:"last_update_reason,omitempty"` + // config_days when the update can run. Supported values are "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" and "*" + ConfigDays []string `protobuf:"bytes,6,rep,name=config_days,json=configDays,proto3" json:"config_days,omitempty"` + // config_start_hour to initiate update + ConfigStartHour int32 `protobuf:"varint,7,opt,name=config_start_hour,json=configStartHour,proto3" json:"config_start_hour,omitempty"` + // config_wait_hours after last group succeeds before this group can run. This can only be used when the strategy is "halt-on-failure". + // This field must be positive. + ConfigWaitHours int32 `protobuf:"varint,9,opt,name=config_wait_hours,json=configWaitHours,proto3" json:"config_wait_hours,omitempty"` } func (x *AutoUpdateAgentRolloutStatusGroup) Reset() { @@ -1049,6 +1169,27 @@ func (x *AutoUpdateAgentRolloutStatusGroup) GetLastUpdateReason() string { return "" } +func (x *AutoUpdateAgentRolloutStatusGroup) GetConfigDays() []string { + if x != nil { + return x.ConfigDays + } + return nil +} + +func (x *AutoUpdateAgentRolloutStatusGroup) GetConfigStartHour() int32 { + if x != nil { + return x.ConfigStartHour + } + return 0 +} + +func (x *AutoUpdateAgentRolloutStatusGroup) GetConfigWaitHours() int32 { + if x != nil { + return x.ConfigWaitHours + } + return 0 +} + var File_teleport_autoupdate_v1_autoupdate_proto protoreflect.FileDescriptor var file_teleport_autoupdate_v1_autoupdate_proto_rawDesc = []byte{ @@ -1114,135 +1255,180 @@ var file_teleport_autoupdate_v1_autoupdate_proto_rawDesc = []byte{ 0x20, 0x03, 0x28, 0x0b, 0x32, 0x2c, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x47, 0x72, 0x6f, - 0x75, 0x70, 0x52, 0x07, 0x72, 0x65, 0x67, 0x75, 0x6c, 0x61, 0x72, 0x22, 0x7a, 0x0a, 0x14, 0x41, - 0x67, 0x65, 0x6e, 0x74, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x47, 0x72, - 0x6f, 0x75, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x64, 0x61, 0x79, 0x73, 0x18, - 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x64, 0x61, 0x79, 0x73, 0x12, 0x1d, 0x0a, 0x0a, 0x73, - 0x74, 0x61, 0x72, 0x74, 0x5f, 0x68, 0x6f, 0x75, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, 0x52, - 0x09, 0x73, 0x74, 0x61, 0x72, 0x74, 0x48, 0x6f, 0x75, 0x72, 0x12, 0x1b, 0x0a, 0x09, 0x77, 0x61, - 0x69, 0x74, 0x5f, 0x64, 0x61, 0x79, 0x73, 0x18, 0x04, 0x20, 0x01, 0x28, 0x03, 0x52, 0x08, 0x77, - 0x61, 0x69, 0x74, 0x44, 0x61, 0x79, 0x73, 0x22, 0xd9, 0x01, 0x0a, 0x11, 0x41, 0x75, 0x74, 0x6f, - 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x12, 0x0a, - 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6b, 0x69, 0x6e, - 0x64, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x75, 0x62, 0x5f, 0x6b, 0x69, 0x6e, 0x64, 0x18, 0x02, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x07, 0x73, 0x75, 0x62, 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x18, 0x0a, 0x07, - 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, - 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x38, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, - 0x74, 0x61, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, - 0x6f, 0x72, 0x74, 0x2e, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4d, 0x65, - 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, - 0x12, 0x41, 0x0a, 0x04, 0x73, 0x70, 0x65, 0x63, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x2d, - 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, - 0x64, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, - 0x74, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x53, 0x70, 0x65, 0x63, 0x52, 0x04, 0x73, - 0x70, 0x65, 0x63, 0x22, 0xc3, 0x01, 0x0a, 0x15, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, - 0x74, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x53, 0x70, 0x65, 0x63, 0x12, 0x48, 0x0a, - 0x05, 0x74, 0x6f, 0x6f, 0x6c, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x32, 0x2e, 0x74, - 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, - 0x74, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, - 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x53, 0x70, 0x65, 0x63, 0x54, 0x6f, 0x6f, 0x6c, 0x73, - 0x52, 0x05, 0x74, 0x6f, 0x6f, 0x6c, 0x73, 0x12, 0x4b, 0x0a, 0x06, 0x61, 0x67, 0x65, 0x6e, 0x74, - 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x33, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, - 0x72, 0x74, 0x2e, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x31, - 0x2e, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x53, 0x70, 0x65, 0x63, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x73, 0x52, 0x06, 0x61, 0x67, - 0x65, 0x6e, 0x74, 0x73, 0x4a, 0x04, 0x08, 0x01, 0x10, 0x02, 0x52, 0x0d, 0x74, 0x6f, 0x6f, 0x6c, - 0x73, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x43, 0x0a, 0x1a, 0x41, 0x75, 0x74, - 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x53, 0x70, - 0x65, 0x63, 0x54, 0x6f, 0x6f, 0x6c, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x74, 0x61, 0x72, 0x67, 0x65, - 0x74, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, - 0x0d, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x99, - 0x01, 0x0a, 0x1b, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x56, 0x65, 0x72, - 0x73, 0x69, 0x6f, 0x6e, 0x53, 0x70, 0x65, 0x63, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x73, 0x12, 0x23, - 0x0a, 0x0d, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, - 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x73, 0x74, 0x61, 0x72, 0x74, 0x56, 0x65, 0x72, 0x73, - 0x69, 0x6f, 0x6e, 0x12, 0x25, 0x0a, 0x0e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x5f, 0x76, 0x65, - 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x74, 0x61, 0x72, - 0x67, 0x65, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x0a, 0x08, 0x73, 0x63, - 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x73, 0x63, - 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x04, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x22, 0xb1, 0x02, 0x0a, 0x16, 0x41, - 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x6f, - 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x18, 0x01, 0x20, - 0x01, 0x28, 0x09, 0x52, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x75, 0x62, - 0x5f, 0x6b, 0x69, 0x6e, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x73, 0x75, 0x62, - 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, - 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x38, - 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, - 0x32, 0x1c, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x68, 0x65, 0x61, 0x64, - 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x52, 0x08, - 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x46, 0x0a, 0x04, 0x73, 0x70, 0x65, 0x63, - 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x32, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, - 0x74, 0x2e, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x31, 0x2e, - 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, - 0x6f, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0x53, 0x70, 0x65, 0x63, 0x52, 0x04, 0x73, 0x70, 0x65, 0x63, - 0x12, 0x4c, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x06, 0x20, 0x01, 0x28, 0x0b, - 0x32, 0x34, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x61, 0x75, 0x74, 0x6f, + 0x75, 0x70, 0x52, 0x07, 0x72, 0x65, 0x67, 0x75, 0x6c, 0x61, 0x72, 0x22, 0x8d, 0x01, 0x0a, 0x14, + 0x41, 0x67, 0x65, 0x6e, 0x74, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x47, + 0x72, 0x6f, 0x75, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x64, 0x61, 0x79, 0x73, + 0x18, 0x02, 0x20, 0x03, 0x28, 0x09, 0x52, 0x04, 0x64, 0x61, 0x79, 0x73, 0x12, 0x1d, 0x0a, 0x0a, + 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x68, 0x6f, 0x75, 0x72, 0x18, 0x03, 0x20, 0x01, 0x28, 0x05, + 0x52, 0x09, 0x73, 0x74, 0x61, 0x72, 0x74, 0x48, 0x6f, 0x75, 0x72, 0x12, 0x1d, 0x0a, 0x0a, 0x77, + 0x61, 0x69, 0x74, 0x5f, 0x68, 0x6f, 0x75, 0x72, 0x73, 0x18, 0x05, 0x20, 0x01, 0x28, 0x05, 0x52, + 0x09, 0x77, 0x61, 0x69, 0x74, 0x48, 0x6f, 0x75, 0x72, 0x73, 0x4a, 0x04, 0x08, 0x04, 0x10, 0x05, + 0x52, 0x09, 0x77, 0x61, 0x69, 0x74, 0x5f, 0x64, 0x61, 0x79, 0x73, 0x22, 0xd9, 0x01, 0x0a, 0x11, + 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, + 0x6e, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x12, 0x19, 0x0a, 0x08, 0x73, 0x75, 0x62, 0x5f, 0x6b, 0x69, 0x6e, + 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x73, 0x75, 0x62, 0x4b, 0x69, 0x6e, 0x64, + 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, + 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x38, 0x0a, 0x08, 0x6d, 0x65, + 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x74, + 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x2e, 0x76, + 0x31, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, + 0x64, 0x61, 0x74, 0x61, 0x12, 0x41, 0x0a, 0x04, 0x73, 0x70, 0x65, 0x63, 0x18, 0x05, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x2d, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x61, 0x75, + 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x41, 0x75, 0x74, 0x6f, + 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x53, 0x70, 0x65, + 0x63, 0x52, 0x04, 0x73, 0x70, 0x65, 0x63, 0x22, 0xc3, 0x01, 0x0a, 0x15, 0x41, 0x75, 0x74, 0x6f, + 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x53, 0x70, 0x65, + 0x63, 0x12, 0x48, 0x0a, 0x05, 0x74, 0x6f, 0x6f, 0x6c, 0x73, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, + 0x32, 0x32, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, - 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x6f, 0x6c, 0x6c, 0x6f, 0x75, 0x74, - 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x22, 0xc9, - 0x01, 0x0a, 0x1a, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, - 0x6e, 0x74, 0x52, 0x6f, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0x53, 0x70, 0x65, 0x63, 0x12, 0x23, 0x0a, - 0x0d, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, - 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x73, 0x74, 0x61, 0x72, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, - 0x6f, 0x6e, 0x12, 0x25, 0x0a, 0x0e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x5f, 0x76, 0x65, 0x72, - 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, 0x74, 0x61, 0x72, 0x67, - 0x65, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x0a, 0x08, 0x73, 0x63, 0x68, - 0x65, 0x64, 0x75, 0x6c, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x73, 0x63, 0x68, - 0x65, 0x64, 0x75, 0x6c, 0x65, 0x12, 0x27, 0x0a, 0x0f, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, - 0x61, 0x74, 0x65, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0e, - 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x4d, 0x6f, 0x64, 0x65, 0x12, 0x1a, - 0x0a, 0x08, 0x73, 0x74, 0x72, 0x61, 0x74, 0x65, 0x67, 0x79, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, - 0x52, 0x08, 0x73, 0x74, 0x72, 0x61, 0x74, 0x65, 0x67, 0x79, 0x22, 0x71, 0x0a, 0x1c, 0x41, 0x75, + 0x64, 0x61, 0x74, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x53, 0x70, 0x65, 0x63, 0x54, + 0x6f, 0x6f, 0x6c, 0x73, 0x52, 0x05, 0x74, 0x6f, 0x6f, 0x6c, 0x73, 0x12, 0x4b, 0x0a, 0x06, 0x61, + 0x67, 0x65, 0x6e, 0x74, 0x73, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x33, 0x2e, 0x74, 0x65, + 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, + 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x56, + 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x53, 0x70, 0x65, 0x63, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x73, + 0x52, 0x06, 0x61, 0x67, 0x65, 0x6e, 0x74, 0x73, 0x4a, 0x04, 0x08, 0x01, 0x10, 0x02, 0x52, 0x0d, + 0x74, 0x6f, 0x6f, 0x6c, 0x73, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x22, 0x43, 0x0a, + 0x1a, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x53, 0x70, 0x65, 0x63, 0x54, 0x6f, 0x6f, 0x6c, 0x73, 0x12, 0x25, 0x0a, 0x0e, 0x74, + 0x61, 0x72, 0x67, 0x65, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, + 0x01, 0x28, 0x09, 0x52, 0x0d, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x22, 0x99, 0x01, 0x0a, 0x1b, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, + 0x65, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x53, 0x70, 0x65, 0x63, 0x41, 0x67, 0x65, 0x6e, + 0x74, 0x73, 0x12, 0x23, 0x0a, 0x0d, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x73, + 0x69, 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x73, 0x74, 0x61, 0x72, 0x74, + 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x25, 0x0a, 0x0e, 0x74, 0x61, 0x72, 0x67, 0x65, + 0x74, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x0d, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x1a, + 0x0a, 0x08, 0x73, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, + 0x52, 0x08, 0x73, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x12, 0x12, 0x0a, 0x04, 0x6d, 0x6f, + 0x64, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6d, 0x6f, 0x64, 0x65, 0x22, 0xb1, + 0x02, 0x0a, 0x16, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, + 0x6e, 0x74, 0x52, 0x6f, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0x12, 0x12, 0x0a, 0x04, 0x6b, 0x69, 0x6e, + 0x64, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6b, 0x69, 0x6e, 0x64, 0x12, 0x19, 0x0a, + 0x08, 0x73, 0x75, 0x62, 0x5f, 0x6b, 0x69, 0x6e, 0x64, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x07, 0x73, 0x75, 0x62, 0x4b, 0x69, 0x6e, 0x64, 0x12, 0x18, 0x0a, 0x07, 0x76, 0x65, 0x72, 0x73, + 0x69, 0x6f, 0x6e, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, 0x07, 0x76, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x12, 0x38, 0x0a, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x18, 0x04, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1c, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, + 0x68, 0x65, 0x61, 0x64, 0x65, 0x72, 0x2e, 0x76, 0x31, 0x2e, 0x4d, 0x65, 0x74, 0x61, 0x64, 0x61, + 0x74, 0x61, 0x52, 0x08, 0x6d, 0x65, 0x74, 0x61, 0x64, 0x61, 0x74, 0x61, 0x12, 0x46, 0x0a, 0x04, + 0x73, 0x70, 0x65, 0x63, 0x18, 0x05, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x32, 0x2e, 0x74, 0x65, 0x6c, + 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, + 0x2e, 0x76, 0x31, 0x2e, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, + 0x65, 0x6e, 0x74, 0x52, 0x6f, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0x53, 0x70, 0x65, 0x63, 0x52, 0x04, + 0x73, 0x70, 0x65, 0x63, 0x12, 0x4c, 0x0a, 0x06, 0x73, 0x74, 0x61, 0x74, 0x75, 0x73, 0x18, 0x06, + 0x20, 0x01, 0x28, 0x0b, 0x32, 0x34, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, + 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x41, 0x75, + 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x6f, 0x6c, + 0x6c, 0x6f, 0x75, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x52, 0x06, 0x73, 0x74, 0x61, 0x74, + 0x75, 0x73, 0x22, 0xa4, 0x02, 0x0a, 0x1a, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, + 0x65, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x6f, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0x53, 0x70, 0x65, + 0x63, 0x12, 0x23, 0x0a, 0x0d, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, + 0x6f, 0x6e, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0c, 0x73, 0x74, 0x61, 0x72, 0x74, 0x56, + 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x25, 0x0a, 0x0e, 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, + 0x5f, 0x76, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x18, 0x02, 0x20, 0x01, 0x28, 0x09, 0x52, 0x0d, + 0x74, 0x61, 0x72, 0x67, 0x65, 0x74, 0x56, 0x65, 0x72, 0x73, 0x69, 0x6f, 0x6e, 0x12, 0x1a, 0x0a, + 0x08, 0x73, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x09, 0x52, + 0x08, 0x73, 0x63, 0x68, 0x65, 0x64, 0x75, 0x6c, 0x65, 0x12, 0x27, 0x0a, 0x0f, 0x61, 0x75, 0x74, + 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x5f, 0x6d, 0x6f, 0x64, 0x65, 0x18, 0x04, 0x20, 0x01, + 0x28, 0x09, 0x52, 0x0e, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x4d, 0x6f, + 0x64, 0x65, 0x12, 0x1a, 0x0a, 0x08, 0x73, 0x74, 0x72, 0x61, 0x74, 0x65, 0x67, 0x79, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x08, 0x73, 0x74, 0x72, 0x61, 0x74, 0x65, 0x67, 0x79, 0x12, 0x59, + 0x0a, 0x1b, 0x6d, 0x61, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x63, 0x65, 0x5f, 0x77, 0x69, + 0x6e, 0x64, 0x6f, 0x77, 0x5f, 0x64, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x18, 0x06, 0x20, + 0x01, 0x28, 0x0b, 0x32, 0x19, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, + 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x52, 0x19, + 0x6d, 0x61, 0x69, 0x6e, 0x74, 0x65, 0x6e, 0x61, 0x6e, 0x63, 0x65, 0x57, 0x69, 0x6e, 0x64, 0x6f, + 0x77, 0x44, 0x75, 0x72, 0x61, 0x74, 0x69, 0x6f, 0x6e, 0x22, 0xb8, 0x02, 0x0a, 0x1c, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x6f, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x12, 0x51, 0x0a, 0x06, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x73, 0x18, 0x01, 0x20, 0x03, 0x28, 0x0b, 0x32, 0x39, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x6f, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, - 0x47, 0x72, 0x6f, 0x75, 0x70, 0x52, 0x06, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x73, 0x22, 0xaf, 0x02, - 0x0a, 0x21, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, - 0x74, 0x52, 0x6f, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0x53, 0x74, 0x61, 0x74, 0x75, 0x73, 0x47, 0x72, - 0x6f, 0x75, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, - 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x39, 0x0a, 0x0a, 0x73, 0x74, 0x61, 0x72, 0x74, - 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, - 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, - 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, 0x73, 0x74, 0x61, 0x72, 0x74, 0x54, 0x69, - 0x6d, 0x65, 0x12, 0x47, 0x0a, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, - 0x0e, 0x32, 0x31, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x61, 0x75, 0x74, - 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x41, 0x75, 0x74, 0x6f, 0x55, - 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, - 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x44, 0x0a, 0x10, 0x6c, - 0x61, 0x73, 0x74, 0x5f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, - 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, - 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, - 0x70, 0x52, 0x0e, 0x6c, 0x61, 0x73, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, - 0x65, 0x12, 0x2c, 0x0a, 0x12, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, - 0x5f, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x18, 0x05, 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x6c, - 0x61, 0x73, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x52, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x2a, - 0xf7, 0x01, 0x0a, 0x19, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, - 0x65, 0x6e, 0x74, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2d, 0x0a, - 0x29, 0x41, 0x55, 0x54, 0x4f, 0x5f, 0x55, 0x50, 0x44, 0x41, 0x54, 0x45, 0x5f, 0x41, 0x47, 0x45, - 0x4e, 0x54, 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, - 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, 0x10, 0x00, 0x12, 0x2b, 0x0a, 0x27, - 0x41, 0x55, 0x54, 0x4f, 0x5f, 0x55, 0x50, 0x44, 0x41, 0x54, 0x45, 0x5f, 0x41, 0x47, 0x45, 0x4e, - 0x54, 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, - 0x53, 0x54, 0x41, 0x52, 0x54, 0x45, 0x44, 0x10, 0x01, 0x12, 0x28, 0x0a, 0x24, 0x41, 0x55, 0x54, - 0x4f, 0x5f, 0x55, 0x50, 0x44, 0x41, 0x54, 0x45, 0x5f, 0x41, 0x47, 0x45, 0x4e, 0x54, 0x5f, 0x47, - 0x52, 0x4f, 0x55, 0x50, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x41, 0x43, 0x54, 0x49, 0x56, - 0x45, 0x10, 0x02, 0x12, 0x26, 0x0a, 0x22, 0x41, 0x55, 0x54, 0x4f, 0x5f, 0x55, 0x50, 0x44, 0x41, - 0x54, 0x45, 0x5f, 0x41, 0x47, 0x45, 0x4e, 0x54, 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x5f, 0x53, - 0x54, 0x41, 0x54, 0x45, 0x5f, 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x03, 0x12, 0x2c, 0x0a, 0x28, 0x41, + 0x47, 0x72, 0x6f, 0x75, 0x70, 0x52, 0x06, 0x67, 0x72, 0x6f, 0x75, 0x70, 0x73, 0x12, 0x49, 0x0a, + 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x18, 0x02, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x33, 0x2e, 0x74, + 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2e, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, + 0x74, 0x65, 0x2e, 0x76, 0x31, 0x2e, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, + 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x6f, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, 0x74, 0x65, 0x12, 0x39, 0x0a, 0x0a, 0x73, 0x74, 0x61, 0x72, + 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, + 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, + 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, 0x73, 0x74, 0x61, 0x72, 0x74, 0x54, + 0x69, 0x6d, 0x65, 0x12, 0x3f, 0x0a, 0x0d, 0x74, 0x69, 0x6d, 0x65, 0x5f, 0x6f, 0x76, 0x65, 0x72, + 0x72, 0x69, 0x64, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, + 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, + 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0c, 0x74, 0x69, 0x6d, 0x65, 0x4f, 0x76, 0x65, 0x72, + 0x72, 0x69, 0x64, 0x65, 0x22, 0xc0, 0x03, 0x0a, 0x21, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, + 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x6f, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0x53, + 0x74, 0x61, 0x74, 0x75, 0x73, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x12, 0x12, 0x0a, 0x04, 0x6e, 0x61, + 0x6d, 0x65, 0x18, 0x01, 0x20, 0x01, 0x28, 0x09, 0x52, 0x04, 0x6e, 0x61, 0x6d, 0x65, 0x12, 0x39, + 0x0a, 0x0a, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x02, 0x20, 0x01, + 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, + 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x09, + 0x73, 0x74, 0x61, 0x72, 0x74, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x47, 0x0a, 0x05, 0x73, 0x74, 0x61, + 0x74, 0x65, 0x18, 0x03, 0x20, 0x01, 0x28, 0x0e, 0x32, 0x31, 0x2e, 0x74, 0x65, 0x6c, 0x65, 0x70, + 0x6f, 0x72, 0x74, 0x2e, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x2e, 0x76, + 0x31, 0x2e, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, + 0x74, 0x47, 0x72, 0x6f, 0x75, 0x70, 0x53, 0x74, 0x61, 0x74, 0x65, 0x52, 0x05, 0x73, 0x74, 0x61, + 0x74, 0x65, 0x12, 0x44, 0x0a, 0x10, 0x6c, 0x61, 0x73, 0x74, 0x5f, 0x75, 0x70, 0x64, 0x61, 0x74, + 0x65, 0x5f, 0x74, 0x69, 0x6d, 0x65, 0x18, 0x04, 0x20, 0x01, 0x28, 0x0b, 0x32, 0x1a, 0x2e, 0x67, + 0x6f, 0x6f, 0x67, 0x6c, 0x65, 0x2e, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x62, 0x75, 0x66, 0x2e, 0x54, + 0x69, 0x6d, 0x65, 0x73, 0x74, 0x61, 0x6d, 0x70, 0x52, 0x0e, 0x6c, 0x61, 0x73, 0x74, 0x55, 0x70, + 0x64, 0x61, 0x74, 0x65, 0x54, 0x69, 0x6d, 0x65, 0x12, 0x2c, 0x0a, 0x12, 0x6c, 0x61, 0x73, 0x74, + 0x5f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x5f, 0x72, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x18, 0x05, + 0x20, 0x01, 0x28, 0x09, 0x52, 0x10, 0x6c, 0x61, 0x73, 0x74, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, + 0x52, 0x65, 0x61, 0x73, 0x6f, 0x6e, 0x12, 0x1f, 0x0a, 0x0b, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, + 0x5f, 0x64, 0x61, 0x79, 0x73, 0x18, 0x06, 0x20, 0x03, 0x28, 0x09, 0x52, 0x0a, 0x63, 0x6f, 0x6e, + 0x66, 0x69, 0x67, 0x44, 0x61, 0x79, 0x73, 0x12, 0x2a, 0x0a, 0x11, 0x63, 0x6f, 0x6e, 0x66, 0x69, + 0x67, 0x5f, 0x73, 0x74, 0x61, 0x72, 0x74, 0x5f, 0x68, 0x6f, 0x75, 0x72, 0x18, 0x07, 0x20, 0x01, + 0x28, 0x05, 0x52, 0x0f, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x53, 0x74, 0x61, 0x72, 0x74, 0x48, + 0x6f, 0x75, 0x72, 0x12, 0x2a, 0x0a, 0x11, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x5f, 0x77, 0x61, + 0x69, 0x74, 0x5f, 0x68, 0x6f, 0x75, 0x72, 0x73, 0x18, 0x09, 0x20, 0x01, 0x28, 0x05, 0x52, 0x0f, + 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x57, 0x61, 0x69, 0x74, 0x48, 0x6f, 0x75, 0x72, 0x73, 0x4a, + 0x04, 0x08, 0x08, 0x10, 0x09, 0x52, 0x10, 0x63, 0x6f, 0x6e, 0x66, 0x69, 0x67, 0x5f, 0x77, 0x61, + 0x69, 0x74, 0x5f, 0x64, 0x61, 0x79, 0x73, 0x2a, 0xf7, 0x01, 0x0a, 0x19, 0x41, 0x75, 0x74, 0x6f, + 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, 0x41, 0x67, 0x65, 0x6e, 0x74, 0x47, 0x72, 0x6f, 0x75, 0x70, + 0x53, 0x74, 0x61, 0x74, 0x65, 0x12, 0x2d, 0x0a, 0x29, 0x41, 0x55, 0x54, 0x4f, 0x5f, 0x55, 0x50, + 0x44, 0x41, 0x54, 0x45, 0x5f, 0x41, 0x47, 0x45, 0x4e, 0x54, 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, + 0x5f, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, + 0x45, 0x44, 0x10, 0x00, 0x12, 0x2b, 0x0a, 0x27, 0x41, 0x55, 0x54, 0x4f, 0x5f, 0x55, 0x50, 0x44, + 0x41, 0x54, 0x45, 0x5f, 0x41, 0x47, 0x45, 0x4e, 0x54, 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x5f, + 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x54, 0x41, 0x52, 0x54, 0x45, 0x44, 0x10, + 0x01, 0x12, 0x28, 0x0a, 0x24, 0x41, 0x55, 0x54, 0x4f, 0x5f, 0x55, 0x50, 0x44, 0x41, 0x54, 0x45, + 0x5f, 0x41, 0x47, 0x45, 0x4e, 0x54, 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x5f, 0x53, 0x54, 0x41, + 0x54, 0x45, 0x5f, 0x41, 0x43, 0x54, 0x49, 0x56, 0x45, 0x10, 0x02, 0x12, 0x26, 0x0a, 0x22, 0x41, 0x55, 0x54, 0x4f, 0x5f, 0x55, 0x50, 0x44, 0x41, 0x54, 0x45, 0x5f, 0x41, 0x47, 0x45, 0x4e, 0x54, - 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x52, 0x4f, 0x4c, - 0x4c, 0x45, 0x44, 0x42, 0x41, 0x43, 0x4b, 0x10, 0x04, 0x42, 0x56, 0x5a, 0x54, 0x67, 0x69, 0x74, - 0x68, 0x75, 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x72, 0x61, 0x76, 0x69, 0x74, 0x61, 0x74, - 0x69, 0x6f, 0x6e, 0x61, 0x6c, 0x2f, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2f, 0x61, - 0x70, 0x69, 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x67, 0x6f, 0x2f, - 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2f, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, - 0x61, 0x74, 0x65, 0x2f, 0x76, 0x31, 0x3b, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, - 0x65, 0x62, 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, + 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x44, 0x4f, 0x4e, + 0x45, 0x10, 0x03, 0x12, 0x2c, 0x0a, 0x28, 0x41, 0x55, 0x54, 0x4f, 0x5f, 0x55, 0x50, 0x44, 0x41, + 0x54, 0x45, 0x5f, 0x41, 0x47, 0x45, 0x4e, 0x54, 0x5f, 0x47, 0x52, 0x4f, 0x55, 0x50, 0x5f, 0x53, + 0x54, 0x41, 0x54, 0x45, 0x5f, 0x52, 0x4f, 0x4c, 0x4c, 0x45, 0x44, 0x42, 0x41, 0x43, 0x4b, 0x10, + 0x04, 0x2a, 0x83, 0x02, 0x0a, 0x1b, 0x41, 0x75, 0x74, 0x6f, 0x55, 0x70, 0x64, 0x61, 0x74, 0x65, + 0x41, 0x67, 0x65, 0x6e, 0x74, 0x52, 0x6f, 0x6c, 0x6c, 0x6f, 0x75, 0x74, 0x53, 0x74, 0x61, 0x74, + 0x65, 0x12, 0x2f, 0x0a, 0x2b, 0x41, 0x55, 0x54, 0x4f, 0x5f, 0x55, 0x50, 0x44, 0x41, 0x54, 0x45, + 0x5f, 0x41, 0x47, 0x45, 0x4e, 0x54, 0x5f, 0x52, 0x4f, 0x4c, 0x4c, 0x4f, 0x55, 0x54, 0x5f, 0x53, + 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x50, 0x45, 0x43, 0x49, 0x46, 0x49, 0x45, 0x44, + 0x10, 0x00, 0x12, 0x2d, 0x0a, 0x29, 0x41, 0x55, 0x54, 0x4f, 0x5f, 0x55, 0x50, 0x44, 0x41, 0x54, + 0x45, 0x5f, 0x41, 0x47, 0x45, 0x4e, 0x54, 0x5f, 0x52, 0x4f, 0x4c, 0x4c, 0x4f, 0x55, 0x54, 0x5f, + 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x55, 0x4e, 0x53, 0x54, 0x41, 0x52, 0x54, 0x45, 0x44, 0x10, + 0x01, 0x12, 0x2a, 0x0a, 0x26, 0x41, 0x55, 0x54, 0x4f, 0x5f, 0x55, 0x50, 0x44, 0x41, 0x54, 0x45, + 0x5f, 0x41, 0x47, 0x45, 0x4e, 0x54, 0x5f, 0x52, 0x4f, 0x4c, 0x4c, 0x4f, 0x55, 0x54, 0x5f, 0x53, + 0x54, 0x41, 0x54, 0x45, 0x5f, 0x41, 0x43, 0x54, 0x49, 0x56, 0x45, 0x10, 0x02, 0x12, 0x28, 0x0a, + 0x24, 0x41, 0x55, 0x54, 0x4f, 0x5f, 0x55, 0x50, 0x44, 0x41, 0x54, 0x45, 0x5f, 0x41, 0x47, 0x45, + 0x4e, 0x54, 0x5f, 0x52, 0x4f, 0x4c, 0x4c, 0x4f, 0x55, 0x54, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x45, + 0x5f, 0x44, 0x4f, 0x4e, 0x45, 0x10, 0x03, 0x12, 0x2e, 0x0a, 0x2a, 0x41, 0x55, 0x54, 0x4f, 0x5f, + 0x55, 0x50, 0x44, 0x41, 0x54, 0x45, 0x5f, 0x41, 0x47, 0x45, 0x4e, 0x54, 0x5f, 0x52, 0x4f, 0x4c, + 0x4c, 0x4f, 0x55, 0x54, 0x5f, 0x53, 0x54, 0x41, 0x54, 0x45, 0x5f, 0x52, 0x4f, 0x4c, 0x4c, 0x45, + 0x44, 0x42, 0x41, 0x43, 0x4b, 0x10, 0x04, 0x42, 0x56, 0x5a, 0x54, 0x67, 0x69, 0x74, 0x68, 0x75, + 0x62, 0x2e, 0x63, 0x6f, 0x6d, 0x2f, 0x67, 0x72, 0x61, 0x76, 0x69, 0x74, 0x61, 0x74, 0x69, 0x6f, + 0x6e, 0x61, 0x6c, 0x2f, 0x74, 0x65, 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2f, 0x61, 0x70, 0x69, + 0x2f, 0x67, 0x65, 0x6e, 0x2f, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x2f, 0x67, 0x6f, 0x2f, 0x74, 0x65, + 0x6c, 0x65, 0x70, 0x6f, 0x72, 0x74, 0x2f, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, + 0x65, 0x2f, 0x76, 0x31, 0x3b, 0x61, 0x75, 0x74, 0x6f, 0x75, 0x70, 0x64, 0x61, 0x74, 0x65, 0x62, + 0x06, 0x70, 0x72, 0x6f, 0x74, 0x6f, 0x33, } var ( @@ -1257,52 +1443,57 @@ func file_teleport_autoupdate_v1_autoupdate_proto_rawDescGZIP() []byte { return file_teleport_autoupdate_v1_autoupdate_proto_rawDescData } -var file_teleport_autoupdate_v1_autoupdate_proto_enumTypes = make([]protoimpl.EnumInfo, 1) +var file_teleport_autoupdate_v1_autoupdate_proto_enumTypes = make([]protoimpl.EnumInfo, 2) var file_teleport_autoupdate_v1_autoupdate_proto_msgTypes = make([]protoimpl.MessageInfo, 14) var file_teleport_autoupdate_v1_autoupdate_proto_goTypes = []any{ (AutoUpdateAgentGroupState)(0), // 0: teleport.autoupdate.v1.AutoUpdateAgentGroupState - (*AutoUpdateConfig)(nil), // 1: teleport.autoupdate.v1.AutoUpdateConfig - (*AutoUpdateConfigSpec)(nil), // 2: teleport.autoupdate.v1.AutoUpdateConfigSpec - (*AutoUpdateConfigSpecTools)(nil), // 3: teleport.autoupdate.v1.AutoUpdateConfigSpecTools - (*AutoUpdateConfigSpecAgents)(nil), // 4: teleport.autoupdate.v1.AutoUpdateConfigSpecAgents - (*AgentAutoUpdateSchedules)(nil), // 5: teleport.autoupdate.v1.AgentAutoUpdateSchedules - (*AgentAutoUpdateGroup)(nil), // 6: teleport.autoupdate.v1.AgentAutoUpdateGroup - (*AutoUpdateVersion)(nil), // 7: teleport.autoupdate.v1.AutoUpdateVersion - (*AutoUpdateVersionSpec)(nil), // 8: teleport.autoupdate.v1.AutoUpdateVersionSpec - (*AutoUpdateVersionSpecTools)(nil), // 9: teleport.autoupdate.v1.AutoUpdateVersionSpecTools - (*AutoUpdateVersionSpecAgents)(nil), // 10: teleport.autoupdate.v1.AutoUpdateVersionSpecAgents - (*AutoUpdateAgentRollout)(nil), // 11: teleport.autoupdate.v1.AutoUpdateAgentRollout - (*AutoUpdateAgentRolloutSpec)(nil), // 12: teleport.autoupdate.v1.AutoUpdateAgentRolloutSpec - (*AutoUpdateAgentRolloutStatus)(nil), // 13: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatus - (*AutoUpdateAgentRolloutStatusGroup)(nil), // 14: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatusGroup - (*v1.Metadata)(nil), // 15: teleport.header.v1.Metadata - (*durationpb.Duration)(nil), // 16: google.protobuf.Duration - (*timestamppb.Timestamp)(nil), // 17: google.protobuf.Timestamp + (AutoUpdateAgentRolloutState)(0), // 1: teleport.autoupdate.v1.AutoUpdateAgentRolloutState + (*AutoUpdateConfig)(nil), // 2: teleport.autoupdate.v1.AutoUpdateConfig + (*AutoUpdateConfigSpec)(nil), // 3: teleport.autoupdate.v1.AutoUpdateConfigSpec + (*AutoUpdateConfigSpecTools)(nil), // 4: teleport.autoupdate.v1.AutoUpdateConfigSpecTools + (*AutoUpdateConfigSpecAgents)(nil), // 5: teleport.autoupdate.v1.AutoUpdateConfigSpecAgents + (*AgentAutoUpdateSchedules)(nil), // 6: teleport.autoupdate.v1.AgentAutoUpdateSchedules + (*AgentAutoUpdateGroup)(nil), // 7: teleport.autoupdate.v1.AgentAutoUpdateGroup + (*AutoUpdateVersion)(nil), // 8: teleport.autoupdate.v1.AutoUpdateVersion + (*AutoUpdateVersionSpec)(nil), // 9: teleport.autoupdate.v1.AutoUpdateVersionSpec + (*AutoUpdateVersionSpecTools)(nil), // 10: teleport.autoupdate.v1.AutoUpdateVersionSpecTools + (*AutoUpdateVersionSpecAgents)(nil), // 11: teleport.autoupdate.v1.AutoUpdateVersionSpecAgents + (*AutoUpdateAgentRollout)(nil), // 12: teleport.autoupdate.v1.AutoUpdateAgentRollout + (*AutoUpdateAgentRolloutSpec)(nil), // 13: teleport.autoupdate.v1.AutoUpdateAgentRolloutSpec + (*AutoUpdateAgentRolloutStatus)(nil), // 14: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatus + (*AutoUpdateAgentRolloutStatusGroup)(nil), // 15: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatusGroup + (*v1.Metadata)(nil), // 16: teleport.header.v1.Metadata + (*durationpb.Duration)(nil), // 17: google.protobuf.Duration + (*timestamppb.Timestamp)(nil), // 18: google.protobuf.Timestamp } var file_teleport_autoupdate_v1_autoupdate_proto_depIdxs = []int32{ - 15, // 0: teleport.autoupdate.v1.AutoUpdateConfig.metadata:type_name -> teleport.header.v1.Metadata - 2, // 1: teleport.autoupdate.v1.AutoUpdateConfig.spec:type_name -> teleport.autoupdate.v1.AutoUpdateConfigSpec - 3, // 2: teleport.autoupdate.v1.AutoUpdateConfigSpec.tools:type_name -> teleport.autoupdate.v1.AutoUpdateConfigSpecTools - 4, // 3: teleport.autoupdate.v1.AutoUpdateConfigSpec.agents:type_name -> teleport.autoupdate.v1.AutoUpdateConfigSpecAgents - 16, // 4: teleport.autoupdate.v1.AutoUpdateConfigSpecAgents.maintenance_window_duration:type_name -> google.protobuf.Duration - 5, // 5: teleport.autoupdate.v1.AutoUpdateConfigSpecAgents.schedules:type_name -> teleport.autoupdate.v1.AgentAutoUpdateSchedules - 6, // 6: teleport.autoupdate.v1.AgentAutoUpdateSchedules.regular:type_name -> teleport.autoupdate.v1.AgentAutoUpdateGroup - 15, // 7: teleport.autoupdate.v1.AutoUpdateVersion.metadata:type_name -> teleport.header.v1.Metadata - 8, // 8: teleport.autoupdate.v1.AutoUpdateVersion.spec:type_name -> teleport.autoupdate.v1.AutoUpdateVersionSpec - 9, // 9: teleport.autoupdate.v1.AutoUpdateVersionSpec.tools:type_name -> teleport.autoupdate.v1.AutoUpdateVersionSpecTools - 10, // 10: teleport.autoupdate.v1.AutoUpdateVersionSpec.agents:type_name -> teleport.autoupdate.v1.AutoUpdateVersionSpecAgents - 15, // 11: teleport.autoupdate.v1.AutoUpdateAgentRollout.metadata:type_name -> teleport.header.v1.Metadata - 12, // 12: teleport.autoupdate.v1.AutoUpdateAgentRollout.spec:type_name -> teleport.autoupdate.v1.AutoUpdateAgentRolloutSpec - 13, // 13: teleport.autoupdate.v1.AutoUpdateAgentRollout.status:type_name -> teleport.autoupdate.v1.AutoUpdateAgentRolloutStatus - 14, // 14: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatus.groups:type_name -> teleport.autoupdate.v1.AutoUpdateAgentRolloutStatusGroup - 17, // 15: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatusGroup.start_time:type_name -> google.protobuf.Timestamp - 0, // 16: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatusGroup.state:type_name -> teleport.autoupdate.v1.AutoUpdateAgentGroupState - 17, // 17: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatusGroup.last_update_time:type_name -> google.protobuf.Timestamp - 18, // [18:18] is the sub-list for method output_type - 18, // [18:18] is the sub-list for method input_type - 18, // [18:18] is the sub-list for extension type_name - 18, // [18:18] is the sub-list for extension extendee - 0, // [0:18] is the sub-list for field type_name + 16, // 0: teleport.autoupdate.v1.AutoUpdateConfig.metadata:type_name -> teleport.header.v1.Metadata + 3, // 1: teleport.autoupdate.v1.AutoUpdateConfig.spec:type_name -> teleport.autoupdate.v1.AutoUpdateConfigSpec + 4, // 2: teleport.autoupdate.v1.AutoUpdateConfigSpec.tools:type_name -> teleport.autoupdate.v1.AutoUpdateConfigSpecTools + 5, // 3: teleport.autoupdate.v1.AutoUpdateConfigSpec.agents:type_name -> teleport.autoupdate.v1.AutoUpdateConfigSpecAgents + 17, // 4: teleport.autoupdate.v1.AutoUpdateConfigSpecAgents.maintenance_window_duration:type_name -> google.protobuf.Duration + 6, // 5: teleport.autoupdate.v1.AutoUpdateConfigSpecAgents.schedules:type_name -> teleport.autoupdate.v1.AgentAutoUpdateSchedules + 7, // 6: teleport.autoupdate.v1.AgentAutoUpdateSchedules.regular:type_name -> teleport.autoupdate.v1.AgentAutoUpdateGroup + 16, // 7: teleport.autoupdate.v1.AutoUpdateVersion.metadata:type_name -> teleport.header.v1.Metadata + 9, // 8: teleport.autoupdate.v1.AutoUpdateVersion.spec:type_name -> teleport.autoupdate.v1.AutoUpdateVersionSpec + 10, // 9: teleport.autoupdate.v1.AutoUpdateVersionSpec.tools:type_name -> teleport.autoupdate.v1.AutoUpdateVersionSpecTools + 11, // 10: teleport.autoupdate.v1.AutoUpdateVersionSpec.agents:type_name -> teleport.autoupdate.v1.AutoUpdateVersionSpecAgents + 16, // 11: teleport.autoupdate.v1.AutoUpdateAgentRollout.metadata:type_name -> teleport.header.v1.Metadata + 13, // 12: teleport.autoupdate.v1.AutoUpdateAgentRollout.spec:type_name -> teleport.autoupdate.v1.AutoUpdateAgentRolloutSpec + 14, // 13: teleport.autoupdate.v1.AutoUpdateAgentRollout.status:type_name -> teleport.autoupdate.v1.AutoUpdateAgentRolloutStatus + 17, // 14: teleport.autoupdate.v1.AutoUpdateAgentRolloutSpec.maintenance_window_duration:type_name -> google.protobuf.Duration + 15, // 15: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatus.groups:type_name -> teleport.autoupdate.v1.AutoUpdateAgentRolloutStatusGroup + 1, // 16: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatus.state:type_name -> teleport.autoupdate.v1.AutoUpdateAgentRolloutState + 18, // 17: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatus.start_time:type_name -> google.protobuf.Timestamp + 18, // 18: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatus.time_override:type_name -> google.protobuf.Timestamp + 18, // 19: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatusGroup.start_time:type_name -> google.protobuf.Timestamp + 0, // 20: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatusGroup.state:type_name -> teleport.autoupdate.v1.AutoUpdateAgentGroupState + 18, // 21: teleport.autoupdate.v1.AutoUpdateAgentRolloutStatusGroup.last_update_time:type_name -> google.protobuf.Timestamp + 22, // [22:22] is the sub-list for method output_type + 22, // [22:22] is the sub-list for method input_type + 22, // [22:22] is the sub-list for extension type_name + 22, // [22:22] is the sub-list for extension extendee + 0, // [0:22] is the sub-list for field type_name } func init() { file_teleport_autoupdate_v1_autoupdate_proto_init() } @@ -1315,7 +1506,7 @@ func file_teleport_autoupdate_v1_autoupdate_proto_init() { File: protoimpl.DescBuilder{ GoPackagePath: reflect.TypeOf(x{}).PkgPath(), RawDescriptor: file_teleport_autoupdate_v1_autoupdate_proto_rawDesc, - NumEnums: 1, + NumEnums: 2, NumMessages: 14, NumExtensions: 0, NumServices: 0, diff --git a/api/proto/teleport/autoupdate/v1/autoupdate.proto b/api/proto/teleport/autoupdate/v1/autoupdate.proto index 5c7527d0177cf..73f6d440f998e 100644 --- a/api/proto/teleport/autoupdate/v1/autoupdate.proto +++ b/api/proto/teleport/autoupdate/v1/autoupdate.proto @@ -71,14 +71,18 @@ message AgentAutoUpdateSchedules { // AgentAutoUpdateGroup specifies the update schedule for a group of agents. message AgentAutoUpdateGroup { + reserved 4; + reserved "wait_days"; + // name of the group string name = 1; // days when the update can run. Supported values are "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" and "*" repeated string days = 2; // start_hour to initiate update int32 start_hour = 3; - // wait_days after last group succeeds before this group can run. This can only be used when the strategy is "halt-on-failure". - int64 wait_days = 4; + // wait_hours after last group succeeds before this group can run. This can only be used when the strategy is "halt-on-failure". + // This field must be positive. + int32 wait_hours = 5; } // AutoUpdateVersion is a resource singleton with version required for @@ -159,15 +163,38 @@ message AutoUpdateAgentRolloutSpec { // availability. A group finishes its update once most of its agents are running the correct version. Agents that // missed the group update will try to catch back as soon as possible. string strategy = 5; + // maintenance_window_duration is the maintenance window duration. This can only be set if `strategy` is "time-based". + // Once the window is over, the group transitions to the done state. Existing agents won't be updated until the next + // maintenance window. + google.protobuf.Duration maintenance_window_duration = 6; } // AutoUpdateAgentRolloutStatus tracks the current agent rollout status. +// The status is reset if any spec field changes except the mode. message AutoUpdateAgentRolloutStatus { repeated AutoUpdateAgentRolloutStatusGroup groups = 1; + AutoUpdateAgentRolloutState state = 2; + // The start time is set when the rollout is created or reset. Usually this is caused by a version change. + // The timestamp allows the controller to detect that the rollout just changed. + // The controller will not start any group that should have been active before the start_time to avoid a double-update + // effect. + // For example, a group updates every day between 13:00 and 14:00. If the target version changes to 13:30, the group + // will not start updating to the new version directly. The controller sees that the group theoretical start time is + // before the rollout start time and the maintenance window belongs to the previous rollout. + // When the timestamp is nil, the controller will ignore the start time and check and allow groups to activate. + google.protobuf.Timestamp start_time = 3; + + // Time override is an optional timestamp making the autoupdate_agent_rollout controller use a specific time instead + // of the system clock when evaluating time-based criteria. This field is used for testing and troubleshooting + // purposes. + google.protobuf.Timestamp time_override = 4; } // AutoUpdateAgentRolloutStatusGroup tracks the current agent rollout status of a specific group. message AutoUpdateAgentRolloutStatusGroup { + reserved 8; + reserved "config_wait_days"; + // name of the group string name = 1; // start_time of the rollout @@ -178,6 +205,13 @@ message AutoUpdateAgentRolloutStatusGroup { google.protobuf.Timestamp last_update_time = 4; // last_update_reason is the trigger for the last update string last_update_reason = 5; + // config_days when the update can run. Supported values are "Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun" and "*" + repeated string config_days = 6; + // config_start_hour to initiate update + int32 config_start_hour = 7; + // config_wait_hours after last group succeeds before this group can run. This can only be used when the strategy is "halt-on-failure". + // This field must be positive. + int32 config_wait_hours = 9; } // AutoUpdateAgentGroupState represents the agent group state. This state controls whether the agents from this group @@ -192,7 +226,25 @@ enum AutoUpdateAgentGroupState { AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE = 2; // AUTO_UPDATE_AGENT_GROUP_STATE_DONE represents that the group has been updated. New agents should run v2. AUTO_UPDATE_AGENT_GROUP_STATE_DONE = 3; - // AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK represents that the group has been rolleback. + // AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK represents that the group has been rolled back. // New agents should run v1, existing agents should update to v1. AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK = 4; } + +// AutoUpdateAgentRolloutState represents the rollout state. This tells if Teleport started updating agents from the +// start version to the target version, if the update is done, still in progress +// or if the rollout was manually reverted. +enum AutoUpdateAgentRolloutState { + // AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSPECIFIED state + AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSPECIFIED = 0; + // AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED represents that no group in the rollout has been started yet. + AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED = 1; + // AUTO_UPDATE_AGENT_ROLLOUT_STATE_ACTIVE represents that at least one group of the rollout has started. + // If every group is finished, the state will be AUTO_UPDATE_AGENT_ROLLOUT_STATE_DONE. + AUTO_UPDATE_AGENT_ROLLOUT_STATE_ACTIVE = 2; + // AUTO_UPDATE_AGENT_ROLLOUT_STATE_DONE represents that every group is in the DONE state, or has been in the done + // state (groups might become active again in time-based strategy). + AUTO_UPDATE_AGENT_ROLLOUT_STATE_DONE = 3; + // AUTO_UPDATE_AGENT_ROLLOUT_STATE_ROLLEDBACK represents that at least one group is in the rolledback state. + AUTO_UPDATE_AGENT_ROLLOUT_STATE_ROLLEDBACK = 4; +} diff --git a/api/types/autoupdate/config.go b/api/types/autoupdate/config.go index 32ae056195b64..ad79765895c0d 100644 --- a/api/types/autoupdate/config.go +++ b/api/types/autoupdate/config.go @@ -79,21 +79,41 @@ func ValidateAutoUpdateConfig(c *autoupdate.AutoUpdateConfig) error { return trace.BadParameter("spec.agents.maintenance_window_duration must be greater than 10 minutes when the strategy is %q", c.Spec.Agents.Strategy) } - if err := checkAgentSchedules(c.Spec.Agents.Schedules); err != nil { + if err := checkAgentSchedules(c); err != nil { return trace.Wrap(err, "validating spec.agents.schedules") } - } return nil } -func checkAgentSchedules(schedules *autoupdate.AgentAutoUpdateSchedules) error { - // TODO: change this logic when we implement group support. - // Currently we reject any non-nil schedule - // When we'll implement schedule support, we'll treat an empty schedule as the default schedule. - if schedules == nil { - return nil +func checkAgentSchedules(c *autoupdate.AutoUpdateConfig) error { + // Validate groups + groups := c.Spec.Agents.GetSchedules().GetRegular() + seenGroups := make(map[string]int, len(groups)) + for i, group := range groups { + if group.Name == "" { + return trace.BadParameter("spec.agents.schedules.regular[%d].name should not be empty", i) + } + if _, err := types.ParseWeekdays(group.Days); err != nil { + return trace.Wrap(err, "validating spec.agents.schedules.regular[%d].days", i) + } + if group.WaitHours < 0 { + return trace.BadParameter("spec.agents.schedules.regular[%d].wait_hours cannot be negative", i) + } + if group.StartHour > 23 || group.StartHour < 0 { + return trace.BadParameter("spec.agents.schedules.regular[%d].start_hour must be between 0 and 23", i) + } + if c.Spec.Agents.Strategy == AgentsStrategyTimeBased && group.WaitHours != 0 { + return trace.BadParameter("spec.agents.schedules.regular[%d].wait_hours must be zero when strategy is %s", i, AgentsStrategyTimeBased) + } + if c.Spec.Agents.Strategy == AgentsStrategyHaltOnError && i == 0 && group.WaitHours != 0 { + return trace.BadParameter("spec.agents.schedules.regular[0].wait_hours must be zero as it's the first group") + } + if conflictingGroup, ok := seenGroups[group.Name]; ok { + return trace.BadParameter("spec.agents.schedules.regular contains groups with the same name %q at indices %d and %d", group.Name, conflictingGroup, i) + } + seenGroups[group.Name] = i } - return trace.NotImplemented("agent schedules are not implemented yet") + return nil } diff --git a/api/types/autoupdate/config_test.go b/api/types/autoupdate/config_test.go index f6b6a87aa6bd8..0981dd7e681c1 100644 --- a/api/types/autoupdate/config_test.go +++ b/api/types/autoupdate/config_test.go @@ -32,6 +32,7 @@ import ( // TestNewAutoUpdateConfig verifies validation for AutoUpdateConfig resource. func TestNewAutoUpdateConfig(t *testing.T) { + t.Parallel() tests := []struct { name string spec *autoupdate.AutoUpdateConfigSpec @@ -225,3 +226,250 @@ func TestNewAutoUpdateConfig(t *testing.T) { }) } } + +func TestValidateAutoUpdateConfig(t *testing.T) { + t.Parallel() + tests := []struct { + name string + config *autoupdate.AutoUpdateConfig + assertErr require.ErrorAssertionFunc + }{ + { + name: "valid time-based rollout with groups", + config: &autoupdate.AutoUpdateConfig{ + Kind: types.KindAutoUpdateConfig, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateConfig, + }, + Spec: &autoupdate.AutoUpdateConfigSpec{ + Agents: &autoupdate.AutoUpdateConfigSpecAgents{ + Mode: AgentsUpdateModeEnabled, + Strategy: AgentsStrategyTimeBased, + MaintenanceWindowDuration: durationpb.New(time.Hour), + Schedules: &autoupdate.AgentAutoUpdateSchedules{ + Regular: []*autoupdate.AgentAutoUpdateGroup{ + {Name: "g1", Days: []string{"*"}, WaitHours: 0}, + {Name: "g2", Days: []string{"*"}, WaitHours: 0}, + }, + }, + }, + }, + }, + assertErr: require.NoError, + }, + { + name: "valid halt-on-error config with groups", + config: &autoupdate.AutoUpdateConfig{ + Kind: types.KindAutoUpdateConfig, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateConfig, + }, + Spec: &autoupdate.AutoUpdateConfigSpec{ + Agents: &autoupdate.AutoUpdateConfigSpecAgents{ + Mode: AgentsUpdateModeEnabled, + Strategy: AgentsStrategyHaltOnError, + Schedules: &autoupdate.AgentAutoUpdateSchedules{ + Regular: []*autoupdate.AgentAutoUpdateGroup{ + {Name: "g1", Days: []string{"*"}, WaitHours: 0}, + {Name: "g2", Days: []string{"*"}, WaitHours: 1}, + }, + }, + }, + }, + }, + assertErr: require.NoError, + }, + { + name: "group with negative wait days", + config: &autoupdate.AutoUpdateConfig{ + Kind: types.KindAutoUpdateConfig, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateConfig, + }, + Spec: &autoupdate.AutoUpdateConfigSpec{ + Agents: &autoupdate.AutoUpdateConfigSpecAgents{ + Mode: AgentsUpdateModeEnabled, + Strategy: AgentsStrategyHaltOnError, + Schedules: &autoupdate.AgentAutoUpdateSchedules{ + Regular: []*autoupdate.AgentAutoUpdateGroup{ + {Name: "g1", Days: []string{"*"}, WaitHours: 0}, + {Name: "g2", Days: []string{"*"}, WaitHours: -1}, + }, + }, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "group with invalid week days", + config: &autoupdate.AutoUpdateConfig{ + Kind: types.KindAutoUpdateConfig, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateConfig, + }, + Spec: &autoupdate.AutoUpdateConfigSpec{ + Agents: &autoupdate.AutoUpdateConfigSpecAgents{ + Mode: AgentsUpdateModeEnabled, + Strategy: AgentsStrategyHaltOnError, + Schedules: &autoupdate.AgentAutoUpdateSchedules{ + Regular: []*autoupdate.AgentAutoUpdateGroup{ + {Name: "g1", Days: []string{"*"}, WaitHours: 0}, + {Name: "g2", Days: []string{"frurfday"}, WaitHours: 1}, + }, + }, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "group with no week days", + config: &autoupdate.AutoUpdateConfig{ + Kind: types.KindAutoUpdateConfig, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateConfig, + }, + Spec: &autoupdate.AutoUpdateConfigSpec{ + Agents: &autoupdate.AutoUpdateConfigSpecAgents{ + Mode: AgentsUpdateModeEnabled, + Strategy: AgentsStrategyHaltOnError, + Schedules: &autoupdate.AgentAutoUpdateSchedules{ + Regular: []*autoupdate.AgentAutoUpdateGroup{ + {Name: "g1", Days: []string{"*"}, WaitHours: 0}, + {Name: "g2", WaitHours: 1}, + }, + }, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "group with empty name", + config: &autoupdate.AutoUpdateConfig{ + Kind: types.KindAutoUpdateConfig, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateConfig, + }, + Spec: &autoupdate.AutoUpdateConfigSpec{ + Agents: &autoupdate.AutoUpdateConfigSpecAgents{ + Mode: AgentsUpdateModeEnabled, + Strategy: AgentsStrategyHaltOnError, + Schedules: &autoupdate.AgentAutoUpdateSchedules{ + Regular: []*autoupdate.AgentAutoUpdateGroup{ + {Name: "g1", Days: []string{"*"}, WaitHours: 0}, + {Name: "", Days: []string{"*"}, WaitHours: 1}, + }, + }, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "first group with non zero wait days", + config: &autoupdate.AutoUpdateConfig{ + Kind: types.KindAutoUpdateConfig, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateConfig, + }, + Spec: &autoupdate.AutoUpdateConfigSpec{ + Agents: &autoupdate.AutoUpdateConfigSpecAgents{ + Mode: AgentsUpdateModeEnabled, + Strategy: AgentsStrategyHaltOnError, + Schedules: &autoupdate.AgentAutoUpdateSchedules{ + Regular: []*autoupdate.AgentAutoUpdateGroup{ + {Name: "g1", Days: []string{"*"}, WaitHours: 1}, + {Name: "g2", Days: []string{"*"}, WaitHours: 0}, + }, + }, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "group with non zero wait days on a time-based config", + config: &autoupdate.AutoUpdateConfig{ + Kind: types.KindAutoUpdateConfig, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateConfig, + }, + Spec: &autoupdate.AutoUpdateConfigSpec{ + Agents: &autoupdate.AutoUpdateConfigSpecAgents{ + Mode: AgentsUpdateModeEnabled, + Strategy: AgentsStrategyTimeBased, + Schedules: &autoupdate.AgentAutoUpdateSchedules{ + Regular: []*autoupdate.AgentAutoUpdateGroup{ + {Name: "g1", Days: []string{"*"}, WaitHours: 0}, + {Name: "g2", Days: []string{"*"}, WaitHours: 1}, + }, + }, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "group with impossible start hour", + config: &autoupdate.AutoUpdateConfig{ + Kind: types.KindAutoUpdateConfig, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateConfig, + }, + Spec: &autoupdate.AutoUpdateConfigSpec{ + Agents: &autoupdate.AutoUpdateConfigSpecAgents{ + Mode: AgentsUpdateModeEnabled, + Strategy: AgentsStrategyHaltOnError, + Schedules: &autoupdate.AgentAutoUpdateSchedules{ + Regular: []*autoupdate.AgentAutoUpdateGroup{ + {Name: "g1", Days: []string{"*"}, WaitHours: 0}, + {Name: "dark hour", Days: []string{"*"}, StartHour: 24}, + }, + }, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "groups with same names", + config: &autoupdate.AutoUpdateConfig{ + Kind: types.KindAutoUpdateConfig, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateConfig, + }, + Spec: &autoupdate.AutoUpdateConfigSpec{ + Agents: &autoupdate.AutoUpdateConfigSpecAgents{ + Mode: AgentsUpdateModeEnabled, + Strategy: AgentsStrategyHaltOnError, + Schedules: &autoupdate.AgentAutoUpdateSchedules{ + Regular: []*autoupdate.AgentAutoUpdateGroup{ + {Name: "g1", Days: []string{"*"}, WaitHours: 0}, + {Name: "g1", Days: []string{"*"}, WaitHours: 0}, + }, + }, + }, + }, + }, + assertErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := ValidateAutoUpdateConfig(tt.config) + tt.assertErr(t, err) + }) + } +} diff --git a/api/types/autoupdate/rollout.go b/api/types/autoupdate/rollout.go index d935244af31b3..111c9a65e0095 100644 --- a/api/types/autoupdate/rollout.go +++ b/api/types/autoupdate/rollout.go @@ -72,5 +72,32 @@ func ValidateAutoUpdateAgentRollout(v *autoupdate.AutoUpdateAgentRollout) error return trace.Wrap(err, "validating spec.strategy") } + groups := v.GetStatus().GetGroups() + seenGroups := make(map[string]int, len(groups)) + for i, group := range groups { + if group.Name == "" { + return trace.BadParameter("status.groups[%d].name is empty", i) + } + if _, err := types.ParseWeekdays(group.ConfigDays); err != nil { + return trace.BadParameter("status.groups[%d].config_days is invalid", i) + } + if group.ConfigStartHour > 23 || group.ConfigStartHour < 0 { + return trace.BadParameter("spec.agents.schedules.regular[%d].start_hour must be less than or equal to 23", i) + } + if group.ConfigWaitHours < 0 { + return trace.BadParameter("status.schedules.groups[%d].config_wait_hours cannot be negative", i) + } + if v.Spec.Strategy == AgentsStrategyTimeBased && group.ConfigWaitHours != 0 { + return trace.BadParameter("status.schedules.groups[%d].config_wait_hours must be zero when strategy is %s", i, AgentsStrategyTimeBased) + } + if v.Spec.Strategy == AgentsStrategyHaltOnError && i == 0 && group.ConfigWaitHours != 0 { + return trace.BadParameter("status.schedules.groups[0].config_wait_hours must be zero as it's the first group") + } + if conflictingGroup, ok := seenGroups[group.Name]; ok { + return trace.BadParameter("spec.agents.schedules.regular contains groups with the same name %q at indices %d and %d", group.Name, conflictingGroup, i) + } + seenGroups[group.Name] = i + } + return nil } diff --git a/api/types/autoupdate/rollout_test.go b/api/types/autoupdate/rollout_test.go index cce4dc8495d83..d95ba9ef890fd 100644 --- a/api/types/autoupdate/rollout_test.go +++ b/api/types/autoupdate/rollout_test.go @@ -30,6 +30,7 @@ import ( // TestNewAutoUpdateConfig verifies validation for AutoUpdateConfig resource. func TestNewAutoUpdateAgentRollout(t *testing.T) { + t.Parallel() tests := []struct { name string spec *autoupdate.AutoUpdateAgentRolloutSpec @@ -41,7 +42,7 @@ func TestNewAutoUpdateAgentRollout(t *testing.T) { spec: &autoupdate.AutoUpdateAgentRolloutSpec{ StartVersion: "1.2.3", TargetVersion: "2.3.4-dev", - Schedule: AgentsScheduleRegular, + Schedule: AgentsScheduleImmediate, AutoupdateMode: AgentsUpdateModeEnabled, Strategy: AgentsStrategyHaltOnError, }, @@ -57,7 +58,7 @@ func TestNewAutoUpdateAgentRollout(t *testing.T) { Spec: &autoupdate.AutoUpdateAgentRolloutSpec{ StartVersion: "1.2.3", TargetVersion: "2.3.4-dev", - Schedule: AgentsScheduleRegular, + Schedule: AgentsScheduleImmediate, AutoupdateMode: AgentsUpdateModeEnabled, Strategy: AgentsStrategyHaltOnError, }, @@ -74,7 +75,7 @@ func TestNewAutoUpdateAgentRollout(t *testing.T) { name: "missing start version", spec: &autoupdate.AutoUpdateAgentRolloutSpec{ TargetVersion: "2.3.4-dev", - Schedule: AgentsScheduleRegular, + Schedule: AgentsScheduleImmediate, AutoupdateMode: AgentsUpdateModeEnabled, Strategy: AgentsStrategyHaltOnError, }, @@ -87,7 +88,7 @@ func TestNewAutoUpdateAgentRollout(t *testing.T) { spec: &autoupdate.AutoUpdateAgentRolloutSpec{ StartVersion: "1.2.3", TargetVersion: "2-3-4", - Schedule: AgentsScheduleRegular, + Schedule: AgentsScheduleImmediate, AutoupdateMode: AgentsUpdateModeEnabled, Strategy: AgentsStrategyHaltOnError, }, @@ -100,7 +101,7 @@ func TestNewAutoUpdateAgentRollout(t *testing.T) { spec: &autoupdate.AutoUpdateAgentRolloutSpec{ StartVersion: "1.2.3", TargetVersion: "2.3.4-dev", - Schedule: AgentsScheduleRegular, + Schedule: AgentsScheduleImmediate, AutoupdateMode: "invalid-mode", Strategy: AgentsStrategyHaltOnError, }, @@ -126,7 +127,7 @@ func TestNewAutoUpdateAgentRollout(t *testing.T) { spec: &autoupdate.AutoUpdateAgentRolloutSpec{ StartVersion: "1.2.3", TargetVersion: "2.3.4-dev", - Schedule: AgentsScheduleRegular, + Schedule: AgentsScheduleImmediate, AutoupdateMode: AgentsUpdateModeEnabled, Strategy: "invalid-strategy", }, @@ -143,3 +144,216 @@ func TestNewAutoUpdateAgentRollout(t *testing.T) { }) } } + +var ( + timeBasedRolloutSpec = autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.3", + TargetVersion: "2.3.4-dev", + Schedule: AgentsScheduleRegular, + AutoupdateMode: AgentsUpdateModeEnabled, + Strategy: AgentsStrategyTimeBased, + } + haltOnErrorRolloutSpec = autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.3", + TargetVersion: "2.3.4-dev", + Schedule: AgentsScheduleRegular, + AutoupdateMode: AgentsUpdateModeEnabled, + Strategy: AgentsStrategyHaltOnError, + } +) + +func TestValidateAutoUpdateAgentRollout(t *testing.T) { + t.Parallel() + tests := []struct { + name string + rollout *autoupdate.AutoUpdateAgentRollout + assertErr require.ErrorAssertionFunc + }{ + { + name: "valid time-based rollout with groups", + rollout: &autoupdate.AutoUpdateAgentRollout{ + Kind: types.KindAutoUpdateAgentRollout, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateAgentRollout, + }, + Spec: &timeBasedRolloutSpec, + Status: &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {Name: "g1", ConfigDays: []string{"*"}}, + {Name: "g2", ConfigDays: []string{"*"}}, + }, + }, + }, + assertErr: require.NoError, + }, + { + name: "valid halt-on-error rollout with groups", + rollout: &autoupdate.AutoUpdateAgentRollout{ + Kind: types.KindAutoUpdateAgentRollout, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateAgentRollout, + }, + Spec: &haltOnErrorRolloutSpec, + Status: &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {Name: "g1", ConfigDays: []string{"*"}}, + {Name: "g2", ConfigDays: []string{"*"}, ConfigWaitHours: 1}, + }, + }, + }, + assertErr: require.NoError, + }, + { + name: "group with negative wait days", + rollout: &autoupdate.AutoUpdateAgentRollout{ + Kind: types.KindAutoUpdateAgentRollout, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateAgentRollout, + }, + Spec: &haltOnErrorRolloutSpec, + Status: &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {Name: "g1", ConfigDays: []string{"*"}}, + {Name: "g2", ConfigDays: []string{"*"}, ConfigWaitHours: -1}, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "group with invalid week days", + rollout: &autoupdate.AutoUpdateAgentRollout{ + Kind: types.KindAutoUpdateAgentRollout, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateAgentRollout, + }, + Spec: &haltOnErrorRolloutSpec, + Status: &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {Name: "g1", ConfigDays: []string{"*"}}, + {Name: "g2", ConfigDays: []string{"frurfday"}, ConfigWaitHours: 1}, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "group with no week days", + rollout: &autoupdate.AutoUpdateAgentRollout{ + Kind: types.KindAutoUpdateAgentRollout, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateAgentRollout, + }, + Spec: &haltOnErrorRolloutSpec, + Status: &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {Name: "g1", ConfigDays: []string{"*"}}, + {Name: "g2", ConfigDays: []string{}, ConfigWaitHours: 1}, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "group with empty name", + rollout: &autoupdate.AutoUpdateAgentRollout{ + Kind: types.KindAutoUpdateAgentRollout, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateAgentRollout, + }, + Spec: &haltOnErrorRolloutSpec, + Status: &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {Name: "g1", ConfigDays: []string{"*"}}, + {Name: "", ConfigDays: []string{"*"}, ConfigWaitHours: 1}, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "first group with non zero wait days", + rollout: &autoupdate.AutoUpdateAgentRollout{ + Kind: types.KindAutoUpdateAgentRollout, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateAgentRollout, + }, + Spec: &haltOnErrorRolloutSpec, + Status: &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {Name: "g1", ConfigDays: []string{"*"}, ConfigWaitHours: 1}, + {Name: "g2", ConfigDays: []string{"*"}}, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "group with non zero wait days on a time-based rollout", + rollout: &autoupdate.AutoUpdateAgentRollout{ + Kind: types.KindAutoUpdateAgentRollout, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateAgentRollout, + }, + Spec: &timeBasedRolloutSpec, + Status: &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {Name: "g1", ConfigDays: []string{"*"}}, + {Name: "g2", ConfigDays: []string{"*"}, ConfigWaitHours: 1}, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "group with impossible start hour", + rollout: &autoupdate.AutoUpdateAgentRollout{ + Kind: types.KindAutoUpdateAgentRollout, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateAgentRollout, + }, + Spec: &haltOnErrorRolloutSpec, + Status: &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {Name: "g1", ConfigDays: []string{"*"}}, + {Name: "dark hour", ConfigDays: []string{"*"}, ConfigStartHour: 24}, + }, + }, + }, + assertErr: require.Error, + }, + { + name: "group with same name", + rollout: &autoupdate.AutoUpdateAgentRollout{ + Kind: types.KindAutoUpdateAgentRollout, + Version: types.V1, + Metadata: &headerv1.Metadata{ + Name: types.MetaNameAutoUpdateAgentRollout, + }, + Spec: &haltOnErrorRolloutSpec, + Status: &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {Name: "g1", ConfigDays: []string{"*"}}, + {Name: "g1", ConfigDays: []string{"*"}}, + }, + }, + }, + assertErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + err := ValidateAutoUpdateAgentRollout(tt.rollout) + tt.assertErr(t, err) + }) + } +} diff --git a/api/types/autoupdate/utils.go b/api/types/autoupdate/utils.go index 4772ff8a94411..7fdcf3d612903 100644 --- a/api/types/autoupdate/utils.go +++ b/api/types/autoupdate/utils.go @@ -51,7 +51,7 @@ func checkToolsMode(mode string) error { func checkScheduleName(schedule string) error { switch schedule { - case AgentsScheduleRegular, AgentsScheduleImmediate: + case AgentsScheduleImmediate, AgentsScheduleRegular: return nil default: return trace.BadParameter("unsupported schedule type: %q", schedule) diff --git a/api/types/autoupdate/version_test.go b/api/types/autoupdate/version_test.go index a59a4f6fe6c22..793d7d6a2a145 100644 --- a/api/types/autoupdate/version_test.go +++ b/api/types/autoupdate/version_test.go @@ -94,7 +94,7 @@ func TestNewAutoUpdateVersion(t *testing.T) { Agents: &autoupdate.AutoUpdateVersionSpecAgents{ StartVersion: "1.2.3-dev.1", TargetVersion: "1.2.3-dev.2", - Schedule: AgentsScheduleRegular, + Schedule: AgentsScheduleImmediate, Mode: AgentsUpdateModeEnabled, }, }, @@ -111,7 +111,7 @@ func TestNewAutoUpdateVersion(t *testing.T) { Agents: &autoupdate.AutoUpdateVersionSpecAgents{ StartVersion: "1.2.3-dev.1", TargetVersion: "1.2.3-dev.2", - Schedule: AgentsScheduleRegular, + Schedule: AgentsScheduleImmediate, Mode: AgentsUpdateModeEnabled, }, }, @@ -124,7 +124,7 @@ func TestNewAutoUpdateVersion(t *testing.T) { StartVersion: "", TargetVersion: "1.2.3", Mode: AgentsUpdateModeEnabled, - Schedule: AgentsScheduleRegular, + Schedule: AgentsScheduleImmediate, }, }, assertErr: func(t *testing.T, err error, a ...any) { @@ -138,7 +138,7 @@ func TestNewAutoUpdateVersion(t *testing.T) { StartVersion: "1.2.3-dev", TargetVersion: "", Mode: AgentsUpdateModeEnabled, - Schedule: AgentsScheduleRegular, + Schedule: AgentsScheduleImmediate, }, }, assertErr: func(t *testing.T, err error, a ...any) { @@ -152,7 +152,7 @@ func TestNewAutoUpdateVersion(t *testing.T) { StartVersion: "17-0-0", TargetVersion: "1.2.3", Mode: AgentsUpdateModeEnabled, - Schedule: AgentsScheduleRegular, + Schedule: AgentsScheduleImmediate, }, }, assertErr: func(t *testing.T, err error, a ...any) { @@ -166,7 +166,7 @@ func TestNewAutoUpdateVersion(t *testing.T) { StartVersion: "1.2.3", TargetVersion: "17-0-0", Mode: AgentsUpdateModeEnabled, - Schedule: AgentsScheduleRegular, + Schedule: AgentsScheduleImmediate, }, }, assertErr: func(t *testing.T, err error, a ...any) { diff --git a/api/types/maintenance.go b/api/types/maintenance.go index 9cab6a9ad4765..31a48472e6aa8 100644 --- a/api/types/maintenance.go +++ b/api/types/maintenance.go @@ -45,10 +45,10 @@ var validWeekdays = [7]time.Weekday{ time.Saturday, } -// parseWeekday attempts to interpret a string as a time.Weekday. In the interest of flexibility, +// ParseWeekday attempts to interpret a string as a time.Weekday. In the interest of flexibility, // parsing is case-insensitive and supports the common three-letter shorthand accepted by many // common scheduling utilites (e.g. contab, systemd timers). -func parseWeekday(s string) (day time.Weekday, ok bool) { +func ParseWeekday(s string) (day time.Weekday, ok bool) { for _, w := range validWeekdays { if strings.EqualFold(w.String(), s) || strings.EqualFold(w.String()[:3], s) { return w, true @@ -58,6 +58,42 @@ func parseWeekday(s string) (day time.Weekday, ok bool) { return time.Sunday, false } +// ParseWeekdays attempts to parse a slice of strings representing week days. +// The slice must not be empty but can also contain a single value "*", representing the whole week. +// Day order doesn't matter but the same week day must not be present multiple times. +// In the interest of flexibility, parsing is case-insensitive and supports the common three-letter shorthand +// accepted by many common scheduling utilites (e.g. contab, systemd timers). +func ParseWeekdays(days []string) (map[time.Weekday]struct{}, error) { + if len(days) == 0 { + return nil, trace.BadParameter("empty weekdays list") + } + // Special case, we support wildcards. + if len(days) == 1 && days[0] == Wildcard { + return map[time.Weekday]struct{}{ + time.Monday: {}, + time.Tuesday: {}, + time.Wednesday: {}, + time.Thursday: {}, + time.Friday: {}, + time.Saturday: {}, + time.Sunday: {}, + }, nil + } + weekdays := make(map[time.Weekday]struct{}, 7) + for _, day := range days { + weekday, ok := ParseWeekday(day) + if !ok { + return nil, trace.BadParameter("failed to parse weekday: %v", day) + } + // Check if this is a duplicate + if _, ok := weekdays[weekday]; ok { + return nil, trace.BadParameter("duplicate weekday: %v", weekday.String()) + } + weekdays[weekday] = struct{}{} + } + return weekdays, nil +} + // generator builds a closure that iterates valid maintenance config from the current day onward. Used in // schedule export logic and tests. func (w *AgentUpgradeWindow) generator(from time.Time) func() (start time.Time, end time.Time) { @@ -75,7 +111,7 @@ func (w *AgentUpgradeWindow) generator(from time.Time) func() (start time.Time, var weekdays []time.Weekday for _, d := range w.Weekdays { - if p, ok := parseWeekday(d); ok { + if p, ok := ParseWeekday(d); ok { weekdays = append(weekdays, p) } } @@ -203,7 +239,7 @@ func (m *ClusterMaintenanceConfigV1) CheckAndSetDefaults() error { } for _, day := range m.Spec.AgentUpgrades.Weekdays { - if _, ok := parseWeekday(day); !ok { + if _, ok := ParseWeekday(day); !ok { return trace.BadParameter("invalid weekday in agent upgrade window: %q", day) } } diff --git a/api/types/maintenance_test.go b/api/types/maintenance_test.go index 203006a8dee37..db604eb7b31a8 100644 --- a/api/types/maintenance_test.go +++ b/api/types/maintenance_test.go @@ -205,7 +205,7 @@ func TestWeekdayParser(t *testing.T) { } for _, tt := range tts { - day, ok := parseWeekday(tt.input) + day, ok := ParseWeekday(tt.input) if tt.fail { require.False(t, ok) continue @@ -271,3 +271,83 @@ func TestWithinUpgradeWindow(t *testing.T) { }) } } + +func TestParseWeekdays(t *testing.T) { + t.Parallel() + tests := []struct { + name string + input []string + expect map[time.Weekday]struct{} + expectError require.ErrorAssertionFunc + }{ + { + name: "Nil slice", + input: nil, + expect: nil, + expectError: require.Error, + }, + { + name: "Empty slice", + input: []string{}, + expect: nil, + expectError: require.Error, + }, + { + name: "Few valid days", + input: []string{"Mon", "Tuesday", "WEDNESDAY"}, + expect: map[time.Weekday]struct{}{ + time.Monday: {}, + time.Tuesday: {}, + time.Wednesday: {}, + }, + expectError: require.NoError, + }, + { + name: "Every day", + input: []string{"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"}, + expect: map[time.Weekday]struct{}{ + time.Monday: {}, + time.Tuesday: {}, + time.Wednesday: {}, + time.Thursday: {}, + time.Friday: {}, + time.Saturday: {}, + time.Sunday: {}, + }, + expectError: require.NoError, + }, + { + name: "Wildcard", + input: []string{"*"}, + expect: map[time.Weekday]struct{}{ + time.Monday: {}, + time.Tuesday: {}, + time.Wednesday: {}, + time.Thursday: {}, + time.Friday: {}, + time.Saturday: {}, + time.Sunday: {}, + }, + expectError: require.NoError, + }, + { + name: "Duplicated day", + input: []string{"Mon", "Monday"}, + expect: nil, + expectError: require.Error, + }, + { + name: "Invalid days", + input: []string{"Mon", "Tuesday", "frurfday"}, + expect: nil, + expectError: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := ParseWeekdays(tt.input) + tt.expectError(t, err) + require.Equal(t, tt.expect, result) + }) + } +} diff --git a/assets/install-scripts/install.sh b/assets/install-scripts/install.sh deleted file mode 100755 index 949bb1bcc4d8f..0000000000000 --- a/assets/install-scripts/install.sh +++ /dev/null @@ -1,430 +0,0 @@ -#!/bin/bash -# Copyright 2022 Gravitational, Inc - -# This script detects the current Linux distribution and installs Teleport -# through its package manager, if supported, or downloading a tarball otherwise. -# We'll download Teleport from the official website and checksum it to make sure it was properly -# downloaded before executing. - -# The script is wrapped inside a function to protect against the connection being interrupted -# in the middle of the stream. - -# For more download options, head to https://goteleport.com/download/ - -set -euo pipefail - -# download uses curl or wget to download a teleport binary -download() { - URL=$1 - TMP_PATH=$2 - - echo "Downloading $URL" - if type curl &>/dev/null; then - set -x - # shellcheck disable=SC2086 - $SUDO $CURL -o "$TMP_PATH" "$URL" - else - set -x - # shellcheck disable=SC2086 - $SUDO $CURL -O "$TMP_PATH" "$URL" - fi - set +x -} - -install_via_apt_get() { - echo "Installing Teleport v$TELEPORT_VERSION via apt-get" - add_apt_key - set -x - $SUDO apt-get install -y "teleport$TELEPORT_SUFFIX=$TELEPORT_VERSION" - set +x - if [ "$TELEPORT_EDITION" = "cloud" ]; then - set -x - $SUDO apt-get install -y teleport-ent-updater - set +x - fi -} - -add_apt_key() { - APT_REPO_ID=$ID - APT_REPO_VERSION_CODENAME=$VERSION_CODENAME - IS_LEGACY=0 - - # check if we must use legacy .asc key - case "$ID" in - ubuntu | pop | neon | zorin) - if ! expr "$VERSION_ID" : "2.*" >/dev/null; then - IS_LEGACY=1 - fi - ;; - debian | raspbian) - if [ "$VERSION_ID" -lt 11 ]; then - IS_LEGACY=1 - fi - ;; - linuxmint | parrot) - if [ "$VERSION_ID" -lt 5 ]; then - IS_LEGACY=1 - fi - ;; - elementary) - if [ "$VERSION_ID" -lt 6 ]; then - IS_LEGACY=1 - fi - ;; - kali) - YEAR="$(echo "$VERSION_ID" | cut -f1 -d.)" - if [ "$YEAR" -lt 2021 ]; then - IS_LEGACY=1 - fi - ;; - esac - - if [[ "$IS_LEGACY" == 0 ]]; then - # set APT_REPO_ID if necessary - case "$ID" in - linuxmint | kali | elementary | pop | raspbian | neon | zorin | parrot) - APT_REPO_ID=$ID_LIKE - ;; - esac - - # set APT_REPO_VERSION_CODENAME if necessary - case "$ID" in - linuxmint | elementary | pop | neon | zorin) - APT_REPO_VERSION_CODENAME=$UBUNTU_CODENAME - ;; - kali) - APT_REPO_VERSION_CODENAME="bullseye" - ;; - parrot) - APT_REPO_VERSION_CODENAME="buster" - ;; - esac - fi - - echo "Downloading Teleport's PGP public key..." - TEMP_DIR=$(mktemp -d -t teleport-XXXXXXXXXX) - MAJOR=$(echo "$TELEPORT_VERSION" | cut -f1 -d.) - TELEPORT_REPO="" - - CHANNEL="stable/v${MAJOR}" - if [ "$TELEPORT_EDITION" = "cloud" ]; then - CHANNEL="stable/cloud" - fi - - if [[ "$IS_LEGACY" == 1 ]]; then - if ! type gpg >/dev/null; then - echo "Installing gnupg" - set -x - $SUDO apt-get update - $SUDO apt-get install -y gnupg - set +x - fi - TMP_KEY="$TEMP_DIR/teleport-pubkey.asc" - download "https://deb.releases.teleport.dev/teleport-pubkey.asc" "$TMP_KEY" - set -x - $SUDO apt-key add "$TMP_KEY" - set +x - TELEPORT_REPO="deb https://apt.releases.teleport.dev/${APT_REPO_ID?} ${APT_REPO_VERSION_CODENAME?} ${CHANNEL}" - else - TMP_KEY="$TEMP_DIR/teleport-pubkey.gpg" - download "https://apt.releases.teleport.dev/gpg" "$TMP_KEY" - set -x - $SUDO mkdir -p /etc/apt/keyrings - $SUDO cp "$TMP_KEY" /etc/apt/keyrings/teleport-archive-keyring.asc - set +x - TELEPORT_REPO="deb [signed-by=/etc/apt/keyrings/teleport-archive-keyring.asc] https://apt.releases.teleport.dev/${APT_REPO_ID?} ${APT_REPO_VERSION_CODENAME?} ${CHANNEL}" - fi - - set -x - echo "$TELEPORT_REPO" | $SUDO tee /etc/apt/sources.list.d/teleport.list >/dev/null - set +x - - set -x - $SUDO apt-get update - set +x -} - -# $1 is the value of the $ID path segment in the YUM repo URL. In -# /etc/os-release, this is either the value of $ID or $ID_LIKE. -install_via_yum() { - # shellcheck source=/dev/null - source /etc/os-release - - # Get the major version from the version ID. - VERSION_ID=$(echo "$VERSION_ID" | grep -Eo "^[0-9]+") - TELEPORT_MAJOR_VERSION="v$(echo "$TELEPORT_VERSION" | grep -Eo "^[0-9]+")" - - CHANNEL="stable/${TELEPORT_MAJOR_VERSION}" - if [ "$TELEPORT_EDITION" = "cloud" ]; then - CHANNEL="stable/cloud" - fi - - if type dnf &>/dev/null; then - echo "Installing Teleport v$TELEPORT_VERSION through dnf" - $SUDO dnf install -y 'dnf-command(config-manager)' - $SUDO dnf config-manager --add-repo "$(rpm --eval "https://yum.releases.teleport.dev/$1/$VERSION_ID/Teleport/%{_arch}/$CHANNEL/teleport-yum.repo")" - $SUDO dnf install -y "teleport$TELEPORT_SUFFIX-$TELEPORT_VERSION" - - if [ "$TELEPORT_EDITION" = "cloud" ]; then - $SUDO dnf install -y teleport-ent-updater - fi - - else - echo "Installing Teleport v$TELEPORT_VERSION through yum" - $SUDO yum install -y yum-utils - $SUDO yum-config-manager --add-repo "$(rpm --eval "https://yum.releases.teleport.dev/$1/$VERSION_ID/Teleport/%{_arch}/$CHANNEL/teleport-yum.repo")" - $SUDO yum install -y "teleport$TELEPORT_SUFFIX-$TELEPORT_VERSION" - - if [ "$TELEPORT_EDITION" = "cloud" ]; then - $SUDO yum install -y teleport-ent-updater - fi - fi - set +x -} - -install_via_zypper() { - # shellcheck source=/dev/null - source /etc/os-release - - # Get the major version from the version ID. - VERSION_ID=$(echo "$VERSION_ID" | grep -Eo "^[0-9]+") - TELEPORT_MAJOR_VERSION="v$(echo "$TELEPORT_VERSION" | grep -Eo "^[0-9]+")" - - CHANNEL="stable/${TELEPORT_MAJOR_VERSION}" - if [ "$TELEPORT_EDITION" = "cloud" ]; then - CHANNEL="stable/cloud" - fi - - $SUDO rpm --import https://zypper.releases.teleport.dev/gpg - $SUDO zypper addrepo --refresh --repo "$(rpm --eval "https://zypper.releases.teleport.dev/$ID/$VERSION_ID/Teleport/%{_arch}/$CHANNEL/teleport-zypper.repo")" - $SUDO zypper --gpg-auto-import-keys refresh teleport - $SUDO zypper install -y "teleport$TELEPORT_SUFFIX" - - if [ "$TELEPORT_EDITION" = "cloud" ]; then - $SUDO zypper install -y teleport-ent-updater - fi - - set +x -} - - -# download .tar.gz file via curl/wget, unzip it and run the install script -install_via_curl() { - TEMP_DIR=$(mktemp -d -t teleport-XXXXXXXXXX) - - TELEPORT_FILENAME="teleport$TELEPORT_SUFFIX-v$TELEPORT_VERSION-linux-$ARCH-bin.tar.gz" - URL="https://cdn.teleport.dev/${TELEPORT_FILENAME}" - download "${URL}" "${TEMP_DIR}/${TELEPORT_FILENAME}" - - TMP_CHECKSUM="${TEMP_DIR}/${TELEPORT_FILENAME}.sha256" - download "${URL}.sha256" "$TMP_CHECKSUM" - - set -x - cd "$TEMP_DIR" - # shellcheck disable=SC2086 - $SUDO $SHA_COMMAND -c "$TMP_CHECKSUM" - cd - - - $SUDO tar -xzf "${TEMP_DIR}/${TELEPORT_FILENAME}" -C "$TEMP_DIR" - $SUDO "$TEMP_DIR/teleport/install" - set +x -} - -# wrap script in a function so a partially downloaded script -# doesn't execute -install_teleport() { - # exit if not on Linux - if [[ $(uname) != "Linux" ]]; then - echo "ERROR: This script works only for Linux. Please go to the downloads page to find the proper installation method for your operating system:" - echo "https://goteleport.com/download/" - exit 1 - fi - - KERNEL_VERSION=$(uname -r) - MIN_VERSION="2.6.23" - if [ $MIN_VERSION != "$(echo -e "$MIN_VERSION\n$KERNEL_VERSION" | sort -V | head -n1)" ]; then - echo "ERROR: Teleport requires Linux kernel version $MIN_VERSION+" - exit 1 - fi - - # check if can run as admin either by running as root or by - # having 'sudo' or 'doas' installed - IS_ROOT="" - SUDO="" - if [ "$(id -u)" = 0 ]; then - # running as root, no need for sudo/doas - IS_ROOT="YES" - SUDO="" - elif type sudo &>/dev/null; then - SUDO="sudo" - elif type doas &>/dev/null; then - SUDO="doas" - fi - - if [ -z "$SUDO" ] && [ -z "$IS_ROOT" ]; then - echo "ERROR: The installer requires a way to run commands as root." - echo "Either run this script as root or install sudo/doas." - exit 1 - fi - - # require curl/wget - CURL="" - if type curl &>/dev/null; then - CURL="curl -fL" - elif type wget &>/dev/null; then - CURL="wget" - fi - if [ -z "$CURL" ]; then - echo "ERROR: This script requires either curl or wget in order to download files. Please install one of them and try again." - exit 1 - fi - - # require shasum/sha256sum - SHA_COMMAND="" - if type shasum &>/dev/null; then - SHA_COMMAND="shasum -a 256" - elif type sha256sum &>/dev/null; then - SHA_COMMAND="sha256sum" - else - echo "ERROR: This script requires sha256sum or shasum to validate the download. Please install it and try again." - exit 1 - fi - - # detect distro - OS_RELEASE=/etc/os-release - ID="" - ID_LIKE="" - VERSION_CODENAME="" - UBUNTU_CODENAME="" - if [[ -f "$OS_RELEASE" ]]; then - # shellcheck source=/dev/null - . $OS_RELEASE - fi - # Some $ID_LIKE values include multiple distro names in an arbitrary order, so - # evaluate the first one. - ID_LIKE="${ID_LIKE%% *}" - - # detect architecture - ARCH="" - case $(uname -m) in - x86_64) - ARCH="amd64" - ;; - i386) - ARCH="386" - ;; - armv7l) - ARCH="arm" - ;; - aarch64) - ARCH="arm64" - ;; - **) - echo "ERROR: Your system's architecture isn't officially supported or couldn't be determined." - echo "Please refer to the installation guide for more information:" - echo "https://goteleport.com/docs/installation/" - exit 1 - ;; - esac - - # select install method based on distribution - # if ID is debian derivate, run apt-get - case "$ID" in - debian | ubuntu | kali | linuxmint | pop | raspbian | neon | zorin | parrot | elementary) - install_via_apt_get - ;; - # if ID is amazon Linux 2/RHEL/etc, run yum - centos | rhel | amzn) - install_via_yum "$ID" - ;; - sles) - install_via_zypper - ;; - *) - # before downloading manually, double check if we didn't miss any debian or - # rh/fedora derived distros using the ID_LIKE var. - case "${ID_LIKE}" in - ubuntu | debian) - install_via_apt_get - ;; - centos | fedora | rhel) - # There is no repository for "fedora", and there is no difference - # between the repositories for "centos" and "rhel", so pick an arbitrary - # one. - install_via_yum rhel - ;; - *) - if [ "$TELEPORT_EDITION" = "cloud" ]; then - echo "The system does not support a package manager, which is required for Teleport Enterprise Cloud." - exit 1 - fi - - # if ID and ID_LIKE didn't return a supported distro, download through curl - echo "There is no officially supported package for your package manager. Downloading and installing Teleport via curl." - install_via_curl - ;; - esac - ;; - esac - - GREEN='\033[0;32m' - COLOR_OFF='\033[0m' - - echo "" - echo -e "${GREEN}$(teleport version) installed successfully!${COLOR_OFF}" - echo "" - echo "The following commands are now available:" - if type teleport &>/dev/null; then - echo " teleport - The daemon that runs the Auth Service, Proxy Service, and other Teleport services." - fi - if type tsh &>/dev/null; then - echo " tsh - A tool that lets end users interact with Teleport." - fi - if type tctl &>/dev/null; then - echo " tctl - An administrative tool that can configure the Teleport Auth Service." - fi - if type tbot &>/dev/null; then - echo " tbot - Teleport Machine ID client." - fi - if type fdpass-teleport &>/dev/null; then - echo " fdpass-teleport - Teleport Machine ID client." - fi - if type teleport-update &>/dev/null; then - echo " teleport-update - Teleport auto-update agent." - fi -} - -# The suffix is "-ent" if we are installing a commercial edition of Teleport and -# empty for Teleport Community Edition. -TELEPORT_SUFFIX="" -TELEPORT_VERSION="" -TELEPORT_EDITION="" -if [ $# -ge 1 ] && [ -n "$1" ]; then - TELEPORT_VERSION=$1 -else - echo "ERROR: Please provide the version you want to install (e.g., 10.1.9)." - exit 1 -fi - -if ! echo "$1" | grep -qE "[0-9]+\.[0-9]+\.[0-9]+"; then - echo "ERROR: The first parameter must be a version number, e.g., 10.1.9." - exit 1 -fi - -if [ $# -ge 2 ] && [ -n "$2" ]; then - TELEPORT_EDITION=$2 - - case $TELEPORT_EDITION in - enterprise | cloud) - TELEPORT_SUFFIX="-ent" - ;; - # An empty edition defaults to OSS. - oss | "" ) - ;; - *) - echo 'ERROR: The second parameter must be "oss", "cloud", or "enterprise".' - exit 1 - ;; - esac -fi -install_teleport diff --git a/assets/install-scripts/install.sh b/assets/install-scripts/install.sh new file mode 120000 index 0000000000000..c41183ab5d100 --- /dev/null +++ b/assets/install-scripts/install.sh @@ -0,0 +1 @@ +../../lib/web/scripts/install/install.sh \ No newline at end of file diff --git a/constants.go b/constants.go index 6840111abaaea..30adf42be55ca 100644 --- a/constants.go +++ b/constants.go @@ -295,6 +295,9 @@ const ( // ComponentForwardingGit represents the SSH proxy that forwards Git commands. ComponentForwardingGit = "git:forward" + // ComponentRolloutController represents the autoupdate_agent_rollout controller. + ComponentRolloutController = "rollout-controller" + // VerboseLogsEnvVar forces all logs to be verbose (down to DEBUG level) VerboseLogsEnvVar = "TELEPORT_DEBUG" diff --git a/docs/pages/includes/helm-reference/zz_generated.teleport-kube-agent.mdx b/docs/pages/includes/helm-reference/zz_generated.teleport-kube-agent.mdx index 3cf958de6fbe6..78947f9849998 100644 --- a/docs/pages/includes/helm-reference/zz_generated.teleport-kube-agent.mdx +++ b/docs/pages/includes/helm-reference/zz_generated.teleport-kube-agent.mdx @@ -683,8 +683,16 @@ $ kubectl create secret generic my-root-ca --from-file=ca.pem=/path/to/root-ca.p `updater` controls whether the Kube Agent Updater should be deployed alongside the `teleport-kube-agent`. The updater fetches the target version, validates the -image signature, and updates the teleport deployment. The `enterprise` value should -have been set to `true`. +image signature, and updates the teleport deployment. + +The updater can fetch the update information using two protocols: +- the webapi update protocol (in this case the Teleport Proxy Service is the one driving the version rollout) +- the version server protocol (this is an HTTP server serving static files specifying the version and if the update is critical). + +The webapi protocol takes precedence over the version server one if the Teleport Proxy Service supports it. +The version server protocol failover can be disabled by unsetting `updater.versionServer`. +The webapi protocol can be disabled by setting `updater.proxyAddr` to `""`. +For backward compatibility reasons, the webapi protocol is not enabled if a custom `updater.versionServer` is set. All Kubernetes-specific fields such as `tolerations`, `affinity`, `nodeSelector`, ... default to the agent values. However, they can be overridden from the @@ -744,9 +752,8 @@ concatenating [`versionServer`](#updaterversionserver) and [`releaseChannel` ](#updaterreleasechannel). This field supports gotemplate. -You must set this if the updater is enabled, and you are not a Teleport Cloud user. - -You must not change the default values if you are a Teleport Cloud user. +Setting this field makes the updater fetch the version using the version server protocol. +Setting this field to a custom value disables the webapi update protocol to ensure backward compatibility. ### `updater.releaseChannel` @@ -762,8 +769,17 @@ The complete version endpoint is built by concatenating You must not change the default value if you are a Teleport Cloud user unless instructed by Teleport support. -You can change this value if the updater is enabled, you are not a Teleport -Cloud user, and manage your own version server. +This value is used when the updater is fetching the version using the version server protocol. +It is also used as a failover when fetching the version using the webapi protocol if `updater.group` is unset. + +### `updater.group` + +| Type | Default | +|------|---------| +| `string` | `""` | + +`updater.group` is the update group used when fetching the version using the webapi protocol. +When unset, the group defaults to `update.releaseChannel`. ### `updater.image` diff --git a/e b/e index 6776b3eca2189..392fd2d7d3293 160000 --- a/e +++ b/e @@ -1 +1 @@ -Subproject commit 6776b3eca2189ab0420d0b0d178c4fb7c2f4e695 +Subproject commit 392fd2d7d329330546ac0d107e21374793e7b10f diff --git a/examples/chart/teleport-kube-agent/templates/updater/deployment.yaml b/examples/chart/teleport-kube-agent/templates/updater/deployment.yaml index 0487aeab4dccd..5790f16cd8dfc 100644 --- a/examples/chart/teleport-kube-agent/templates/updater/deployment.yaml +++ b/examples/chart/teleport-kube-agent/templates/updater/deployment.yaml @@ -1,5 +1,6 @@ {{- if .Values.updater.enabled -}} {{- $updater := mustMergeOverwrite (mustDeepCopy .Values) .Values.updater -}} +{{- $versionServerOverride := and $updater.versionServer (ne $updater.versionServer "https://{{ .Values.proxyAddr }}/v1/webapi/automaticupgrades/channel") }} apiVersion: apps/v1 kind: Deployment metadata: @@ -62,8 +63,16 @@ spec: - "--agent-name={{ .Release.Name }}" - "--agent-namespace={{ .Release.Namespace }}" - "--base-image={{ include "teleport-kube-agent.baseImage" . }}" + {{- if $updater.versionServer}} - "--version-server={{ tpl $updater.versionServer . }}" - "--version-channel={{ $updater.releaseChannel }}" + {{- end }} + {{- /* We don't want to enable the RFD-184 update protocol if the user has set a custom versionServer as this + would be a breaking change when the teleport proxy starts override the explicitly set RFD-109 version server */ -}} + {{- if and $updater.proxyAddr (not $versionServerOverride)}} + - "--proxy-address={{ $updater.proxyAddr }}" + - "--update-group={{ default $updater.releaseChannel $updater.group }}" + {{- end }} {{- if $updater.pullCredentials }} - "--pull-credentials={{ $updater.pullCredentials }}" {{- end }} diff --git a/examples/chart/teleport-kube-agent/tests/updater_deployment_test.yaml b/examples/chart/teleport-kube-agent/tests/updater_deployment_test.yaml index 111039f0ac7ce..3e27f481b5a33 100644 --- a/examples/chart/teleport-kube-agent/tests/updater_deployment_test.yaml +++ b/examples/chart/teleport-kube-agent/tests/updater_deployment_test.yaml @@ -67,6 +67,69 @@ tests: - contains: path: spec.template.spec.containers[0].args content: "--version-server=https://proxy.teleport.example.com:443/v1/webapi/automaticupgrades/channel" + - it: defaults the updater proxy server to the proxy address + set: + proxyAddr: proxy.teleport.example.com:443 + roles: "custom" + updater: + enabled: true + versionServer: "" + asserts: + - contains: + path: spec.template.spec.containers[0].args + content: "--proxy-address=proxy.teleport.example.com:443" + - it: doesn't enable the RFD-184 proxy protocol if the versionServer is custom + set: + proxyAddr: proxy.teleport.example.com:443 + roles: "custom" + updater: + enabled: true + versionServer: "version-server.example.com" + group: foobar + asserts: + - notContains: + path: spec.template.spec.containers[0].args + content: "--proxy-address=proxy.teleport.example.com:443" + - notContains: + path: spec.template.spec.containers[0].args + content: "--update-group=foobar" + - it: defaults the update group to the release channel when group is unset + set: + proxyAddr: proxy.teleport.example.com:443 + roles: "custom" + updater: + enabled: true + versionServer: "" + asserts: + - contains: + path: spec.template.spec.containers[0].args + content: "--update-group=stable/cloud" + - it: uses the update group when set + set: + proxyAddr: proxy.teleport.example.com:443 + roles: "custom" + updater: + enabled: true + versionServer: "" + group: "foobar" + asserts: + - contains: + path: spec.template.spec.containers[0].args + content: "--update-group=foobar" + - it: unsets the version server when empty + set: + proxyAddr: proxy.teleport.example.com:443 + roles: "custom" + updater: + enabled: true + versionServer: "" + asserts: + - notContains: + path: spec.template.spec.containers[0].args + content: "--proxy-server=" + - notContains: + path: spec.template.spec.containers[0].args + content: "--version-channel=stable/cloud" - it: sets the updater version server values: - ../.lint/updater.yaml diff --git a/examples/chart/teleport-kube-agent/values.yaml b/examples/chart/teleport-kube-agent/values.yaml index 9b7783e022c11..9e713c49eb308 100644 --- a/examples/chart/teleport-kube-agent/values.yaml +++ b/examples/chart/teleport-kube-agent/values.yaml @@ -576,8 +576,16 @@ tls: # updater -- controls whether the Kube Agent Updater should be deployed alongside # the `teleport-kube-agent`. The updater fetches the target version, validates the -# image signature, and updates the teleport deployment. The `enterprise` value should -# have been set to `true`. +# image signature, and updates the teleport deployment. +# +# The updater can fetch the update information using two protocols: +# - the webapi update protocol (in this case the Teleport Proxy Service is the one driving the version rollout) +# - the version server protocol (this is an HTTP server serving static files specifying the version and if the update is critical). +# +# The webapi protocol takes precedence over the version server one if the Teleport Proxy Service supports it. +# The version server protocol failover can be disabled by unsetting `updater.versionServer`. +# The webapi protocol can be disabled by setting `updater.proxyAddr` to `""`. +# For backward compatibility reasons, the webapi protocol is not enabled if a custom `updater.versionServer` is set. # # All Kubernetes-specific fields such as `tolerations`, `affinity`, `nodeSelector`, # ... default to the agent values. However, they can be overridden from the @@ -626,9 +634,8 @@ updater: # ](#updaterreleasechannel). # This field supports gotemplate. # - # You must set this if the updater is enabled, and you are not a Teleport Cloud user. - # - # You must not change the default values if you are a Teleport Cloud user. + # Setting this field makes the updater fetch the version using the version server protocol. + # Setting this field to a custom value disables the webapi update protocol to ensure backward compatibility. versionServer: "https://{{ .Values.proxyAddr }}/v1/webapi/automaticupgrades/channel" # updater.releaseChannel(string) -- is the release channel the updater @@ -639,10 +646,14 @@ updater: # You must not change the default value if you are a Teleport Cloud user unless # instructed by Teleport support. # - # You can change this value if the updater is enabled, you are not a Teleport - # Cloud user, and manage your own version server. + # This value is used when the updater is fetching the version using the version server protocol. + # It is also used as a failover when fetching the version using the webapi protocol if `updater.group` is unset. releaseChannel: "stable/cloud" + # updater.group(string) -- is the update group used when fetching the version using the webapi protocol. + # When unset, the group defaults to `update.releaseChannel`. + group: "" + # updater.image(string) -- sets the container image used for Teleport updater # pods run when `updater.enabled` is true. # diff --git a/gen/preset-roles.json b/gen/preset-roles.json index f07a7b8c2ca79..90ae8235161ff 100755 --- a/gen/preset-roles.json +++ b/gen/preset-roles.json @@ -1 +1 @@ -{"access":{"kind":"role","version":"v7","metadata":{"name":"access","description":"Access cluster resources","labels":{"teleport.internal/resource-type":"preset"}},"spec":{"options":{"forward_agent":true,"max_session_ttl":"30h0m0s","cert_format":"standard","enhanced_recording":["command","network"],"record_session":{"desktop":true},"desktop_clipboard":true,"desktop_directory_sharing":true,"pin_source_ip":false,"ssh_file_copy":true,"idp":{"saml":{"enabled":true}},"create_desktop_user":false,"create_db_user":false,"ssh_port_forwarding":{"local":{"enabled":true},"remote":{"enabled":true}}},"allow":{"logins":["{{internal.logins}}"],"node_labels":{"*":"*"},"rules":[{"resources":["event"],"verbs":["list","read"]},{"resources":["session"],"verbs":["read","list"],"where":"contains(session.participants, user.metadata.name)"},{"resources":["instance"],"verbs":["list","read"]},{"resources":["cluster_maintenance_config"],"verbs":["list","read"]}],"kubernetes_groups":["{{internal.kubernetes_groups}}"],"kubernetes_users":["{{internal.kubernetes_users}}"],"app_labels":{"*":"*"},"kubernetes_labels":{"*":"*"},"db_labels":{"*":"*"},"db_names":["{{internal.db_names}}"],"db_users":["{{internal.db_users}}"],"aws_role_arns":["{{internal.aws_role_arns}}"],"windows_desktop_logins":["{{internal.windows_logins}}"],"windows_desktop_labels":{"*":"*"},"azure_identities":["{{internal.azure_identities}}"],"kubernetes_resources":[{"kind":"*","namespace":"*","name":"*","verbs":["*"]}],"gcp_service_accounts":["{{internal.gcp_service_accounts}}"],"db_service_labels":{"*":"*"},"db_roles":["{{internal.db_roles}}"],"github_permissions":[{"orgs":["{{internal.github_orgs}}"]}]},"deny":{}}},"auditor":{"kind":"role","version":"v7","metadata":{"name":"auditor","description":"Review cluster events and replay sessions","labels":{"teleport.internal/resource-type":"preset"}},"spec":{"options":{"forward_agent":false,"max_session_ttl":"30h0m0s","cert_format":"standard","enhanced_recording":["command","network"],"record_session":{"desktop":false},"desktop_clipboard":true,"desktop_directory_sharing":true,"pin_source_ip":false,"ssh_file_copy":true,"idp":{"saml":{"enabled":true}},"create_desktop_user":false,"create_db_user":false},"allow":{"rules":[{"resources":["session"],"verbs":["list","read"]},{"resources":["event"],"verbs":["list","read"]},{"resources":["session_tracker"],"verbs":["list","read"]},{"resources":["cluster_alert"],"verbs":["list","read"]},{"resources":["instance"],"verbs":["list","read"]},{"resources":["security_report"],"verbs":["list","read","use"]},{"resources":["audit_query"],"verbs":["list","read","use"]},{"resources":["bot_instance"],"verbs":["list","read"]},{"resources":["notification"],"verbs":["list","read"]}]},"deny":{}}},"editor":{"kind":"role","version":"v7","metadata":{"name":"editor","description":"Edit cluster configuration","labels":{"teleport.internal/resource-type":"preset"}},"spec":{"options":{"forward_agent":true,"max_session_ttl":"30h0m0s","cert_format":"standard","enhanced_recording":["command","network"],"record_session":{"desktop":false},"desktop_clipboard":true,"desktop_directory_sharing":true,"pin_source_ip":false,"ssh_file_copy":true,"idp":{"saml":{"enabled":true}},"create_desktop_user":false,"create_db_user":false,"ssh_port_forwarding":{"local":{"enabled":true},"remote":{"enabled":true}}},"allow":{"rules":[{"resources":["user"],"verbs":["list","create","read","update","delete"]},{"resources":["role"],"verbs":["list","create","read","update","delete"]},{"resources":["bot"],"verbs":["list","create","read","update","delete"]},{"resources":["crown_jewel"],"verbs":["list","create","read","update","delete"]},{"resources":["db_object_import_rule"],"verbs":["list","create","read","update","delete"]},{"resources":["oidc"],"verbs":["list","create","read","update","delete"]},{"resources":["saml"],"verbs":["list","create","read","update","delete"]},{"resources":["github"],"verbs":["list","create","read","update","delete"]},{"resources":["oidc_request"],"verbs":["list","create","read","update","delete"]},{"resources":["saml_request"],"verbs":["list","create","read","update","delete"]},{"resources":["github_request"],"verbs":["list","create","read","update","delete"]},{"resources":["cluster_audit_config"],"verbs":["list","create","read","update","delete"]},{"resources":["cluster_auth_preference"],"verbs":["list","create","read","update","delete"]},{"resources":["auth_connector"],"verbs":["list","create","read","update","delete"]},{"resources":["cluster_name"],"verbs":["list","create","read","update","delete"]},{"resources":["cluster_networking_config"],"verbs":["list","create","read","update","delete"]},{"resources":["session_recording_config"],"verbs":["list","create","read","update","delete"]},{"resources":["external_audit_storage"],"verbs":["list","create","read","update","delete"]},{"resources":["ui_config"],"verbs":["list","create","read","update","delete"]},{"resources":["trusted_cluster"],"verbs":["list","create","read","update","delete"]},{"resources":["remote_cluster"],"verbs":["list","create","read","update","delete"]},{"resources":["token"],"verbs":["list","create","read","update","delete"]},{"resources":["connection_diagnostic"],"verbs":["list","create","read","update","delete"]},{"resources":["db"],"verbs":["list","create","read","update","delete"]},{"resources":["database_certificate"],"verbs":["list","create","read","update","delete"]},{"resources":["installer"],"verbs":["list","create","read","update","delete"]},{"resources":["device"],"verbs":["list","create","read","update","delete","create_enroll_token","enroll"]},{"resources":["db_service"],"verbs":["list","read"]},{"resources":["instance"],"verbs":["list","read"]},{"resources":["login_rule"],"verbs":["list","create","read","update","delete"]},{"resources":["saml_idp_service_provider"],"verbs":["list","create","read","update","delete"]},{"resources":["user_group"],"verbs":["list","create","read","update","delete"]},{"resources":["plugin"],"verbs":["list","create","read","update","delete"]},{"resources":["okta_import_rule"],"verbs":["list","create","read","update","delete"]},{"resources":["okta_assignment"],"verbs":["list","create","read","update","delete"]},{"resources":["lock"],"verbs":["list","create","read","update","delete"]},{"resources":["integration"],"verbs":["list","create","read","update","delete","use"]},{"resources":["billing"],"verbs":["list","create","read","update","delete"]},{"resources":["cluster_alert"],"verbs":["list","create","read","update","delete"]},{"resources":["access_list"],"verbs":["list","create","read","update","delete"]},{"resources":["node"],"verbs":["list","create","read","update","delete"]},{"resources":["discovery_config"],"verbs":["list","create","read","update","delete"]},{"resources":["security_report"],"verbs":["list","create","read","update","delete","use"]},{"resources":["audit_query"],"verbs":["list","create","read","update","delete","use"]},{"resources":["access_graph"],"verbs":["list","create","read","update","delete"]},{"resources":["server_info"],"verbs":["list","create","read","update","delete"]},{"resources":["access_monitoring_rule"],"verbs":["list","create","read","update","delete"]},{"resources":["app_server"],"verbs":["list","create","read","update","delete"]},{"resources":["vnet_config"],"verbs":["list","create","read","update","delete"]},{"resources":["bot_instance"],"verbs":["list","create","read","update","delete"]},{"resources":["access_graph_settings"],"verbs":["list","create","read","update","delete"]},{"resources":["spiffe_federation"],"verbs":["list","create","read","update","delete"]},{"resources":["notification"],"verbs":["list","create","read","update","delete"]},{"resources":["static_host_user"],"verbs":["list","create","read","update","delete"]},{"resources":["user_task"],"verbs":["list","create","read","update","delete"]},{"resources":["aws_identity_center"],"verbs":["list","create","read","update","delete"]},{"resources":["contact"],"verbs":["list","create","read","update","delete"]},{"resources":["workload_identity"],"verbs":["list","create","read","update","delete"]},{"resources":["autoupdate_version"],"verbs":["list","create","read","update","delete"]},{"resources":["autoupdate_config"],"verbs":["list","create","read","update","delete"]},{"resources":["git_server"],"verbs":["list","create","read","update","delete"]},{"resources":["workload_identity_x509_revocation"],"verbs":["list","create","read","update","delete"]}]},"deny":{}}}} \ No newline at end of file +{"access":{"kind":"role","version":"v7","metadata":{"name":"access","description":"Access cluster resources","labels":{"teleport.internal/resource-type":"preset"}},"spec":{"options":{"forward_agent":true,"max_session_ttl":"30h0m0s","cert_format":"standard","enhanced_recording":["command","network"],"record_session":{"desktop":true},"desktop_clipboard":true,"desktop_directory_sharing":true,"pin_source_ip":false,"ssh_file_copy":true,"idp":{"saml":{"enabled":true}},"create_desktop_user":false,"create_db_user":false,"ssh_port_forwarding":{"local":{"enabled":true},"remote":{"enabled":true}}},"allow":{"logins":["{{internal.logins}}"],"node_labels":{"*":"*"},"rules":[{"resources":["event"],"verbs":["list","read"]},{"resources":["session"],"verbs":["read","list"],"where":"contains(session.participants, user.metadata.name)"},{"resources":["instance"],"verbs":["list","read"]},{"resources":["cluster_maintenance_config"],"verbs":["list","read"]}],"kubernetes_groups":["{{internal.kubernetes_groups}}"],"kubernetes_users":["{{internal.kubernetes_users}}"],"app_labels":{"*":"*"},"kubernetes_labels":{"*":"*"},"db_labels":{"*":"*"},"db_names":["{{internal.db_names}}"],"db_users":["{{internal.db_users}}"],"aws_role_arns":["{{internal.aws_role_arns}}"],"windows_desktop_logins":["{{internal.windows_logins}}"],"windows_desktop_labels":{"*":"*"},"azure_identities":["{{internal.azure_identities}}"],"kubernetes_resources":[{"kind":"*","namespace":"*","name":"*","verbs":["*"]}],"gcp_service_accounts":["{{internal.gcp_service_accounts}}"],"db_service_labels":{"*":"*"},"db_roles":["{{internal.db_roles}}"],"github_permissions":[{"orgs":["{{internal.github_orgs}}"]}]},"deny":{}}},"auditor":{"kind":"role","version":"v7","metadata":{"name":"auditor","description":"Review cluster events and replay sessions","labels":{"teleport.internal/resource-type":"preset"}},"spec":{"options":{"forward_agent":false,"max_session_ttl":"30h0m0s","cert_format":"standard","enhanced_recording":["command","network"],"record_session":{"desktop":false},"desktop_clipboard":true,"desktop_directory_sharing":true,"pin_source_ip":false,"ssh_file_copy":true,"idp":{"saml":{"enabled":true}},"create_desktop_user":false,"create_db_user":false},"allow":{"rules":[{"resources":["session"],"verbs":["list","read"]},{"resources":["event"],"verbs":["list","read"]},{"resources":["session_tracker"],"verbs":["list","read"]},{"resources":["cluster_alert"],"verbs":["list","read"]},{"resources":["instance"],"verbs":["list","read"]},{"resources":["security_report"],"verbs":["list","read","use"]},{"resources":["audit_query"],"verbs":["list","read","use"]},{"resources":["bot_instance"],"verbs":["list","read"]},{"resources":["notification"],"verbs":["list","read"]}]},"deny":{}}},"editor":{"kind":"role","version":"v7","metadata":{"name":"editor","description":"Edit cluster configuration","labels":{"teleport.internal/resource-type":"preset"}},"spec":{"options":{"forward_agent":true,"max_session_ttl":"30h0m0s","cert_format":"standard","enhanced_recording":["command","network"],"record_session":{"desktop":false},"desktop_clipboard":true,"desktop_directory_sharing":true,"pin_source_ip":false,"ssh_file_copy":true,"idp":{"saml":{"enabled":true}},"create_desktop_user":false,"create_db_user":false,"ssh_port_forwarding":{"local":{"enabled":true},"remote":{"enabled":true}}},"allow":{"rules":[{"resources":["user"],"verbs":["list","create","read","update","delete"]},{"resources":["role"],"verbs":["list","create","read","update","delete"]},{"resources":["bot"],"verbs":["list","create","read","update","delete"]},{"resources":["crown_jewel"],"verbs":["list","create","read","update","delete"]},{"resources":["db_object_import_rule"],"verbs":["list","create","read","update","delete"]},{"resources":["oidc"],"verbs":["list","create","read","update","delete"]},{"resources":["saml"],"verbs":["list","create","read","update","delete"]},{"resources":["github"],"verbs":["list","create","read","update","delete"]},{"resources":["oidc_request"],"verbs":["list","create","read","update","delete"]},{"resources":["saml_request"],"verbs":["list","create","read","update","delete"]},{"resources":["github_request"],"verbs":["list","create","read","update","delete"]},{"resources":["cluster_audit_config"],"verbs":["list","create","read","update","delete"]},{"resources":["cluster_auth_preference"],"verbs":["list","create","read","update","delete"]},{"resources":["auth_connector"],"verbs":["list","create","read","update","delete"]},{"resources":["cluster_name"],"verbs":["list","create","read","update","delete"]},{"resources":["cluster_networking_config"],"verbs":["list","create","read","update","delete"]},{"resources":["session_recording_config"],"verbs":["list","create","read","update","delete"]},{"resources":["external_audit_storage"],"verbs":["list","create","read","update","delete"]},{"resources":["ui_config"],"verbs":["list","create","read","update","delete"]},{"resources":["trusted_cluster"],"verbs":["list","create","read","update","delete"]},{"resources":["remote_cluster"],"verbs":["list","create","read","update","delete"]},{"resources":["token"],"verbs":["list","create","read","update","delete"]},{"resources":["connection_diagnostic"],"verbs":["list","create","read","update","delete"]},{"resources":["db"],"verbs":["list","create","read","update","delete"]},{"resources":["database_certificate"],"verbs":["list","create","read","update","delete"]},{"resources":["installer"],"verbs":["list","create","read","update","delete"]},{"resources":["device"],"verbs":["list","create","read","update","delete","create_enroll_token","enroll"]},{"resources":["db_service"],"verbs":["list","read"]},{"resources":["instance"],"verbs":["list","read"]},{"resources":["login_rule"],"verbs":["list","create","read","update","delete"]},{"resources":["saml_idp_service_provider"],"verbs":["list","create","read","update","delete"]},{"resources":["user_group"],"verbs":["list","create","read","update","delete"]},{"resources":["plugin"],"verbs":["list","create","read","update","delete"]},{"resources":["okta_import_rule"],"verbs":["list","create","read","update","delete"]},{"resources":["okta_assignment"],"verbs":["list","create","read","update","delete"]},{"resources":["lock"],"verbs":["list","create","read","update","delete"]},{"resources":["integration"],"verbs":["list","create","read","update","delete","use"]},{"resources":["billing"],"verbs":["list","create","read","update","delete"]},{"resources":["cluster_alert"],"verbs":["list","create","read","update","delete"]},{"resources":["access_list"],"verbs":["list","create","read","update","delete"]},{"resources":["node"],"verbs":["list","create","read","update","delete"]},{"resources":["discovery_config"],"verbs":["list","create","read","update","delete"]},{"resources":["security_report"],"verbs":["list","create","read","update","delete","use"]},{"resources":["audit_query"],"verbs":["list","create","read","update","delete","use"]},{"resources":["access_graph"],"verbs":["list","create","read","update","delete"]},{"resources":["server_info"],"verbs":["list","create","read","update","delete"]},{"resources":["access_monitoring_rule"],"verbs":["list","create","read","update","delete"]},{"resources":["app_server"],"verbs":["list","create","read","update","delete"]},{"resources":["vnet_config"],"verbs":["list","create","read","update","delete"]},{"resources":["bot_instance"],"verbs":["list","create","read","update","delete"]},{"resources":["access_graph_settings"],"verbs":["list","create","read","update","delete"]},{"resources":["spiffe_federation"],"verbs":["list","create","read","update","delete"]},{"resources":["notification"],"verbs":["list","create","read","update","delete"]},{"resources":["static_host_user"],"verbs":["list","create","read","update","delete"]},{"resources":["user_task"],"verbs":["list","create","read","update","delete"]},{"resources":["aws_identity_center"],"verbs":["list","create","read","update","delete"]},{"resources":["contact"],"verbs":["list","create","read","update","delete"]},{"resources":["workload_identity"],"verbs":["list","create","read","update","delete"]},{"resources":["autoupdate_version"],"verbs":["list","create","read","update","delete"]},{"resources":["autoupdate_config"],"verbs":["list","create","read","update","delete"]},{"resources":["git_server"],"verbs":["list","create","read","update","delete"]},{"resources":["workload_identity_x509_revocation"],"verbs":["list","create","read","update","delete"]},{"resources":["autoupdate_agent_rollout"],"verbs":["list","read"]}]},"deny":{}}}} \ No newline at end of file diff --git a/integrations/kube-agent-updater/cmd/teleport-kube-agent-updater/constants.go b/integrations/kube-agent-updater/cmd/teleport-kube-agent-updater/constants.go index 658e62eece936..c94ad4130bf50 100644 --- a/integrations/kube-agent-updater/cmd/teleport-kube-agent-updater/constants.go +++ b/integrations/kube-agent-updater/cmd/teleport-kube-agent-updater/constants.go @@ -36,3 +36,21 @@ EuIXJJox2oAL7NzdSi9VIUYnEnx+2EtkU/spAFRR6i1BnT6aoIy3521B76wnmRr9 atCSKjt6MdRxgj4htCjBWWJAGM9Z/avF4CYFmK7qiVxgpdrSM8Esbt2Ta+Lu3QMJ T8LjqFu3u3dxVOo9RuLk+BkCAwEAAQ== -----END PUBLIC KEY-----`) + +// teleportStageOCIPubKey is the key used to sign Teleport distroless images dev builds. +// The key lives in the Teleport staging AWS KMS. +// This key is only trusted on dev builds/pre-release versions of the kube updater. +var teleportStageOCIPubKey = []byte(`-----BEGIN PUBLIC KEY----- +MIICIjANBgkqhkiG9w0BAQEFAAOCAg8AMIICCgKCAgEA8MPaUO4fcN3gS1psn3U7 +Pm/iM7jLQVg5MgcG9jbAkFsVOvmk3eq7cv0r94voz63IXhs4wKLK/e2QMljW1kz1 +AX7NvdXecCxwcyntgYnDXtxYBhcPGSM6cVnWlZ3pLNb8uVK7oxm0HjGUblcLreaI +aoLGmpyK+eCCLJso0Y7Yw0qRTJHg+2JQenbWps23AO96a6nqab2Ix7zEa3HyNZLa +P6rYV9q6vqZ3MBsDz5Lrc76JYSliqGVMVONhdXcqS2PYNti4Wm8o2CTJ0gRf2zYx +z2how6+rWM8HVoRYqG8JvCDvY6SGr5AbqIz/UCGm7XDH1S7M7C4FZ3MNTazoHY7h +VGAYLNPOtnQeZTtJDyRPH7csq+2tyvDPin3ymgRvvBrMrpBSmnnr67TxSIAv4xgu +B2hAgTL501B+s2m06bBcbKc03JsxgJBu4sBxKqIh1yeF8AW861bh90oZGI8/d9xM +fyI0BiELvY08HioQaAoC2VJx44I+KVDA1SLnMEx9n44eZ5Bk8G6PiZe5bikVDizF +RBVos6fjDapmGqVGoj+eotrI755FTKA3egB8DYw/H5yD1CO0QBBWXDhqM0ruTt4i +LzfxsdKEiXFMFZmXYzqwut9RXguGa/7LYPT7ijtW57z/wLytIjyYRkZH1P0dffFs +tiben+kjeNwFJ7Kg/WIDjjUCAwEAAQ== +-----END PUBLIC KEY-----`) diff --git a/integrations/kube-agent-updater/cmd/teleport-kube-agent-updater/main.go b/integrations/kube-agent-updater/cmd/teleport-kube-agent-updater/main.go index 29a0bf46b9173..4acfdb3bee58c 100644 --- a/integrations/kube-agent-updater/cmd/teleport-kube-agent-updater/main.go +++ b/integrations/kube-agent-updater/cmd/teleport-kube-agent-updater/main.go @@ -28,6 +28,7 @@ import ( "github.com/distribution/reference" "github.com/gravitational/trace" + "golang.org/x/mod/semver" appsv1 "k8s.io/api/apps/v1" v1 "k8s.io/api/core/v1" "k8s.io/apimachinery/pkg/fields" @@ -40,6 +41,7 @@ import ( "sigs.k8s.io/controller-runtime/pkg/log/zap" metricsserver "sigs.k8s.io/controller-runtime/pkg/metrics/server" + "github.com/gravitational/teleport/api/client/webclient" kubeversionupdater "github.com/gravitational/teleport/integrations/kube-agent-updater" "github.com/gravitational/teleport/integrations/kube-agent-updater/pkg/controller" "github.com/gravitational/teleport/integrations/kube-agent-updater/pkg/img" @@ -72,6 +74,8 @@ func main() { var insecureNoResolve bool var disableLeaderElection bool var credSource string + var proxyAddress string + var updateGroup string flag.StringVar(&agentName, "agent-name", "", "The name of the agent that should be updated. This is mandatory.") flag.StringVar(&agentNamespace, "agent-namespace", "", "The namespace of the agent that should be updated. This is mandatory.") @@ -81,6 +85,8 @@ func main() { flag.BoolVar(&insecureNoVerify, "insecure-no-verify-image", false, "Disable image signature verification. The image tag is still resolved and image must exist.") flag.BoolVar(&insecureNoResolve, "insecure-no-resolve-image", false, "Disable image signature verification AND resolution. The updater can update to non-existing images.") flag.BoolVar(&disableLeaderElection, "disable-leader-election", false, "Disable leader election, used when running the kube-agent-updater outside of Kubernetes.") + flag.StringVar(&proxyAddress, "proxy-address", "", "The proxy address of the teleport cluster. When set, the updater will try to get update via the /find proxy endpoint.") + flag.StringVar(&updateGroup, "update-group", "", "The agent update group, as defined in the `autoupdate_config` resource. When unset or set to an unknown value, agent will update with the default group.") flag.StringVar(&versionServer, "version-server", "https://updates.releases.teleport.dev/v1/", "URL of the HTTP server advertising target version and critical maintenances. Trailing slash is optional.") flag.StringVar(&versionChannel, "version-channel", "stable/cloud", "Version channel to get updates from.") flag.StringVar(&baseImageName, "base-image", "public.ecr.aws/gravitational/teleport", "Image reference containing registry and repository.") @@ -98,6 +104,7 @@ func main() { ctrl.SetLogger(zap.New(zap.UseFlagOptions(&opts))) + // Validate configuration. if agentName == "" { ctrl.Log.Error(trace.BadParameter("--agent-name empty"), "agent-name must be provided") os.Exit(1) @@ -106,7 +113,16 @@ func main() { ctrl.Log.Error(trace.BadParameter("--agent-namespace empty"), "agent-namespace must be provided") os.Exit(1) } + if versionServer == "" && proxyAddress == "" { + ctrl.Log.Error( + trace.BadParameter("at least one of --proxy-address or --version-server must be provided"), + "the updater has no upstream configured, it cannot retrieve the version and check when to update", + ) + os.Exit(1) + } + // Build a new controller manager. We need to do this early as some trigger + // need a Kubernetes client and the manager is the one providing it. mgr, err := ctrl.NewManager(ctrl.GetConfigOrDie(), ctrl.Options{ Scheme: scheme, Metrics: metricsserver.Options{BindAddress: metricsAddr}, @@ -130,16 +146,81 @@ func main() { os.Exit(1) } - versionServerURL, err := url.Parse(strings.TrimRight(versionServer, "/") + "/" + versionChannel) - if err != nil { - ctrl.Log.Error(err, "failed to parse version server URL, exiting") - os.Exit(1) + // Craft the version getter and update triggers based on the configuration (use RFD-109 APIs, RFD-184, or both). + var criticalUpdateTriggers []maintenance.Trigger + var plannedMaintenanceTriggers []maintenance.Trigger + var versionGetters []version.Getter + + // If the proxy server is specified, we enabled RFD-184 updates + // See https://github.com/gravitational/teleport/blob/master/rfd/0184-agent-auto-updates.md#updater-apis + if proxyAddress != "" { + ctrl.Log.Info("fetching versions from the proxy /find endpoint", "proxy_server_url", proxyAddress, "update_group", updateGroup) + + proxyClt, err := webclient.NewReusableClient(&webclient.Config{ + Context: ctx, + ProxyAddr: proxyAddress, + UpdateGroup: updateGroup, + }) + if err != nil { + ctrl.Log.Error(err, "failed to create proxy client, exiting") + os.Exit(1) + } + + // We do a preflight check before starting to know if the proxy is correctly configured and reachable. + ctrl.Log.Info("preflight check: ping the proxy server", "proxy_server_url", proxyAddress) + pong, err := proxyClt.Ping() + if err != nil { + ctrl.Log.Error(err, "failed to ping proxy, either the proxy address is wrong, or the network blocks connections to the proxy", + "proxy_address", proxyAddress, + ) + os.Exit(1) + } + ctrl.Log.Info("proxy server successfully pinged", + "proxy_server_url", proxyAddress, + "proxy_cluster_name", pong.ClusterName, + "proxy_version", pong.ServerVersion, + ) + + versionGetters = append(versionGetters, version.NewProxyVersionGetter(proxyClt)) + + // In RFD 184, the server is driving the update, so both regular maintenances and + // critical ones are fetched from the proxy. Using the same trigger ensures we hit the cache if both triggers + // are evaluated and don't actually make 2 calls. + proxyTrigger := maintenance.NewProxyMaintenanceTrigger("proxy update protocol", proxyClt) + criticalUpdateTriggers = append(criticalUpdateTriggers, proxyTrigger) + plannedMaintenanceTriggers = append(plannedMaintenanceTriggers, proxyTrigger) } - versionGetter := version.NewBasicHTTPVersionGetter(versionServerURL) + + // If the version server is specified, we enable RFD-109 updates + // See https://github.com/gravitational/teleport/blob/master/rfd/0109-cloud-agent-upgrades.md#kubernetes-model + if versionServer != "" { + rawUrl := strings.TrimRight(versionServer, "/") + "/" + versionChannel + versionServerURL, err := url.Parse(rawUrl) + if err != nil { + ctrl.Log.Error(err, "failed to parse version server URL, exiting", "url", rawUrl) + os.Exit(1) + } + ctrl.Log.Info("fetching versions from the version server", "version_server_url", versionServerURL.String()) + + versionGetters = append(versionGetters, version.NewBasicHTTPVersionGetter(versionServerURL)) + // critical updates are advertised by the version channel + criticalUpdateTriggers = append(criticalUpdateTriggers, maintenance.NewBasicHTTPMaintenanceTrigger("critical update", versionServerURL)) + // planned maintenance windows are exported by the pods + plannedMaintenanceTriggers = append(plannedMaintenanceTriggers, podmaintenance.NewWindowTrigger("maintenance window", mgr.GetClient())) + } + maintenanceTriggers := maintenance.Triggers{ - maintenance.NewBasicHTTPMaintenanceTrigger("critical update", versionServerURL), + // We check if the update is critical. + maintenance.FailoverTrigger(criticalUpdateTriggers), + // We check if the agent in unhealthy. podmaintenance.NewUnhealthyWorkloadTrigger("unhealthy pods", mgr.GetClient()), - podmaintenance.NewWindowTrigger("maintenance window", mgr.GetClient()), + // We check if we're in a maintenance window. + maintenance.FailoverTrigger(plannedMaintenanceTriggers), + } + + kc, err := img.GetKeychain(credSource) + if err != nil { + ctrl.Log.Error(err, "failed to get keychain for registry auth") } var imageValidators img.Validators @@ -149,12 +230,17 @@ func main() { imageValidators = append(imageValidators, img.NewNopValidator("insecure no resolution")) case insecureNoVerify: ctrl.Log.Info("INSECURE: Image validation disabled") - imageValidators = append(imageValidators, img.NewInsecureValidator("insecure always verified")) - default: - kc, err := img.GetKeychain(credSource) + imageValidators = append(imageValidators, img.NewInsecureValidator("insecure always verified", kc)) + case semver.Prerelease("v"+kubeversionupdater.Version) != "": + ctrl.Log.Info("This is a pre-release updater version, the key usied to sign dev and pre-release builds of Teleport will be trusted.") + validator, err := img.NewCosignSingleKeyValidator(teleportStageOCIPubKey, "staging cosign signature validator", kc) if err != nil { - ctrl.Log.Error(err, "failed to get keychain for registry auth") + ctrl.Log.Error(err, "failed to build pre-release image validator, exiting") + os.Exit(1) } + imageValidators = append(imageValidators, validator) + fallthrough + default: validator, err := img.NewCosignSingleKeyValidator(teleportProdOCIPubKey, "cosign signature validator", kc) if err != nil { ctrl.Log.Error(err, "failed to build image validator, exiting") @@ -169,7 +255,12 @@ func main() { os.Exit(1) } - versionUpdater := controller.NewVersionUpdater(versionGetter, imageValidators, maintenanceTriggers, baseImage) + versionUpdater := controller.NewVersionUpdater( + version.FailoverGetter(versionGetters), + imageValidators, + maintenanceTriggers, + baseImage, + ) // Controller registration deploymentController := controller.DeploymentVersionUpdater{ @@ -203,7 +294,7 @@ func main() { os.Exit(1) } - ctrl.Log.Info("starting the updater", "version", kubeversionupdater.Version, "url", versionServerURL.String()) + ctrl.Log.Info("starting the updater", "version", kubeversionupdater.Version) if err := mgr.Start(ctx); err != nil { ctrl.Log.Error(err, "failed to start manager, exiting") diff --git a/integrations/kube-agent-updater/pkg/img/insecure.go b/integrations/kube-agent-updater/pkg/img/insecure.go index ec50bcb2ac197..823a64adbd09a 100644 --- a/integrations/kube-agent-updater/pkg/img/insecure.go +++ b/integrations/kube-agent-updater/pkg/img/insecure.go @@ -22,12 +22,16 @@ import ( "context" "github.com/distribution/reference" + "github.com/google/go-containerregistry/pkg/authn" + "github.com/google/go-containerregistry/pkg/v1/remote" "github.com/gravitational/trace" "github.com/opencontainers/go-digest" + ociremote "github.com/sigstore/cosign/v2/pkg/oci/remote" ) type insecureValidator struct { - name string + name string + registryOptions []ociremote.Option } // Name returns the validator name @@ -47,7 +51,7 @@ func (v *insecureValidator) Name() string { // image is valid. Using this validator makes you vulnerable in case of image // registry compromise. func (v *insecureValidator) ValidateAndResolveDigest(ctx context.Context, image reference.NamedTagged) (NamedTaggedDigested, error) { - ref, err := NamedTaggedToDigest(image) + ref, err := NamedTaggedToDigest(image, v.registryOptions...) if err != nil { return nil, trace.Wrap(err) } @@ -59,8 +63,9 @@ func (v *insecureValidator) ValidateAndResolveDigest(ctx context.Context, image // NewInsecureValidator returns an img.Validator that only resolves the image // but does not check its signature. This must not be confused with // NewNopValidator that returns a validator that always validate without resolving. -func NewInsecureValidator(name string) Validator { +func NewInsecureValidator(name string, keyChain authn.Keychain) Validator { return &insecureValidator{ - name: name, + name: name, + registryOptions: []ociremote.Option{ociremote.WithRemoteOptions(remote.WithAuthFromKeychain(keyChain))}, } } diff --git a/integrations/lib/testing/fakejoin/kubesigner.go b/integrations/lib/testing/fakejoin/kubesigner.go index 271c913d2758f..460bfff21320f 100644 --- a/integrations/lib/testing/fakejoin/kubesigner.go +++ b/integrations/lib/testing/fakejoin/kubesigner.go @@ -30,7 +30,7 @@ import ( "github.com/jonboulle/clockwork" "github.com/gravitational/teleport/lib/cryptosuites" - "github.com/gravitational/teleport/lib/kubernetestoken" + kubetoken "github.com/gravitational/teleport/lib/kube/token" ) // KubernetesSigner is a JWT signer that mimicks the Kubernetes one. The signer mock Kubernetes and @@ -87,7 +87,7 @@ func (s *KubernetesSigner) GetMarshaledJWKS() (string, error) { // This token has the Teleport cluster name in its audience as required by the Kubernetes JWKS join method. func (s *KubernetesSigner) SignServiceAccountJWT(pod, namespace, serviceAccount, clusterName string) (string, error) { now := s.clock.Now() - claims := kubernetestoken.ServiceAccountClaims{ + claims := kubetoken.ServiceAccountClaims{ Claims: jwt.Claims{ Subject: fmt.Sprintf("system:serviceaccount:%s:%s", namespace, serviceAccount), Audience: jwt.Audience{clusterName}, @@ -97,13 +97,13 @@ func (s *KubernetesSigner) SignServiceAccountJWT(pod, namespace, serviceAccount, // The Kubernetes JWKS join method rejects tokens valid more than 30 minutes. Expiry: jwt.NewNumericDate(now.Add(29 * time.Minute)), }, - Kubernetes: &kubernetestoken.KubernetesSubClaim{ + Kubernetes: &kubetoken.KubernetesSubClaim{ Namespace: namespace, - ServiceAccount: &kubernetestoken.ServiceAccountSubClaim{ + ServiceAccount: &kubetoken.ServiceAccountSubClaim{ Name: serviceAccount, UID: uuid.New().String(), }, - Pod: &kubernetestoken.PodSubClaim{ + Pod: &kubetoken.PodSubClaim{ Name: pod, UID: uuid.New().String(), }, diff --git a/integrations/terraform/testlib/machineid_join_test.go b/integrations/terraform/testlib/machineid_join_test.go index 52299c3cf457e..63d751f75630b 100644 --- a/integrations/terraform/testlib/machineid_join_test.go +++ b/integrations/terraform/testlib/machineid_join_test.go @@ -35,7 +35,7 @@ import ( "github.com/gravitational/teleport/api/types" "github.com/gravitational/teleport/integrations/lib/testing/fakejoin" "github.com/gravitational/teleport/integrations/lib/testing/integration" - "github.com/gravitational/teleport/lib/kubernetestoken" + kubetoken "github.com/gravitational/teleport/lib/kube/token" "github.com/gravitational/teleport/lib/services" "github.com/gravitational/teleport/integrations/terraform/provider" @@ -115,7 +115,7 @@ func TestTerraformJoin(t *testing.T) { tempDir := t.TempDir() jwtPath := filepath.Join(tempDir, "token") require.NoError(t, os.WriteFile(jwtPath, []byte(jwt), 0600)) - require.NoError(t, os.Setenv(kubernetestoken.EnvVarCustomKubernetesTokenPath, jwtPath)) + require.NoError(t, os.Setenv(kubetoken.EnvVarCustomKubernetesTokenPath, jwtPath)) // Test setup: craft a Terraform provider configuration terraformConfig := fmt.Sprintf(` diff --git a/lib/auth/auth.go b/lib/auth/auth.go index f53357b12f740..0f6c85be8a5e7 100644 --- a/lib/auth/auth.go +++ b/lib/auth/auth.go @@ -102,7 +102,7 @@ import ( "github.com/gravitational/teleport/lib/githubactions" "github.com/gravitational/teleport/lib/gitlab" "github.com/gravitational/teleport/lib/inventory" - "github.com/gravitational/teleport/lib/kubernetestoken" + kubetoken "github.com/gravitational/teleport/lib/kube/token" "github.com/gravitational/teleport/lib/limiter" "github.com/gravitational/teleport/lib/loginrule" "github.com/gravitational/teleport/lib/modules" @@ -641,10 +641,10 @@ func NewServer(cfg *InitConfig, opts ...ServerOption) (*Server, error) { as.tpmValidator = tpm.Validate } if as.k8sTokenReviewValidator == nil { - as.k8sTokenReviewValidator = &kubernetestoken.TokenReviewValidator{} + as.k8sTokenReviewValidator = &kubetoken.TokenReviewValidator{} } if as.k8sJWKSValidator == nil { - as.k8sJWKSValidator = kubernetestoken.ValidateTokenWithJWKS + as.k8sJWKSValidator = kubetoken.ValidateTokenWithJWKS } if as.gcpIDTokenValidator == nil { diff --git a/lib/auth/autoupdate/autoupdatev1/service.go b/lib/auth/autoupdate/autoupdatev1/service.go index ade0ce3dad04f..31d9962fb22d4 100644 --- a/lib/auth/autoupdate/autoupdatev1/service.go +++ b/lib/auth/autoupdate/autoupdatev1/service.go @@ -21,12 +21,14 @@ package autoupdatev1 import ( "context" "log/slog" + "maps" "github.com/gravitational/trace" "google.golang.org/protobuf/types/known/emptypb" "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" "github.com/gravitational/teleport/api/types" + update "github.com/gravitational/teleport/api/types/autoupdate" apievents "github.com/gravitational/teleport/api/types/events" "github.com/gravitational/teleport/lib/authz" "github.com/gravitational/teleport/lib/events" @@ -127,6 +129,10 @@ func (s *Service) CreateAutoUpdateConfig(ctx context.Context, req *autoupdate.Cr return nil, trace.Wrap(err) } + if err := validateServerSideAgentConfig(req.Config); err != nil { + return nil, trace.Wrap(err) + } + config, err := s.backend.CreateAutoUpdateConfig(ctx, req.Config) var errMsg string if err != nil { @@ -167,6 +173,10 @@ func (s *Service) UpdateAutoUpdateConfig(ctx context.Context, req *autoupdate.Up return nil, trace.Wrap(err) } + if err := validateServerSideAgentConfig(req.Config); err != nil { + return nil, trace.Wrap(err) + } + config, err := s.backend.UpdateAutoUpdateConfig(ctx, req.Config) var errMsg string if err != nil { @@ -207,6 +217,10 @@ func (s *Service) UpsertAutoUpdateConfig(ctx context.Context, req *autoupdate.Up return nil, trace.Wrap(err) } + if err := validateServerSideAgentConfig(req.Config); err != nil { + return nil, trace.Wrap(err) + } + config, err := s.backend.UpsertAutoUpdateConfig(ctx, req.Config) var errMsg string if err != nil { @@ -522,10 +536,11 @@ func (s *Service) CreateAutoUpdateAgentRollout(ctx context.Context, req *autoupd // Editing the AU agent plan is restricted to cluster administrators. As of today we don't have any way of having // resources that can only be edited by Teleport Cloud (when running cloud-hosted). - // The workaround is to check if the caller has the auth system role. - // This is not ideal as it forces local tctl usage. In the future, if we expand the permission system and make cloud + // The workaround is to check if the caller has the auth/admin system role. + // This is not ideal as it forces local tctl usage and can be bypassed if the user is very creative. + // In the future, if we expand the permission system and make cloud // a first class citizen, we'll want to update this permission check. - if !authz.HasBuiltinRole(*authCtx, string(types.RoleAuth)) { + if !(authz.HasBuiltinRole(*authCtx, string(types.RoleAuth)) || authz.HasBuiltinRole(*authCtx, string(types.RoleAdmin))) { return nil, trace.AccessDenied("this request can be only executed by an auth server") } @@ -550,10 +565,11 @@ func (s *Service) UpdateAutoUpdateAgentRollout(ctx context.Context, req *autoupd // Editing the AU agent plan is restricted to cluster administrators. As of today we don't have any way of having // resources that can only be edited by Teleport Cloud (when running cloud-hosted). - // The workaround is to check if the caller has the auth system role. - // This is not ideal as it forces local tctl usage. In the future, if we expand the permission system and make cloud + // The workaround is to check if the caller has the auth/admin system role. + // This is not ideal as it forces local tctl usage and can be bypassed if the user is very creative. + // In the future, if we expand the permission system and make cloud // a first class citizen, we'll want to update this permission check. - if !authz.HasBuiltinRole(*authCtx, string(types.RoleAuth)) { + if !(authz.HasBuiltinRole(*authCtx, string(types.RoleAuth)) || authz.HasBuiltinRole(*authCtx, string(types.RoleAdmin))) { return nil, trace.AccessDenied("this request can be only executed by an auth server") } @@ -578,10 +594,11 @@ func (s *Service) UpsertAutoUpdateAgentRollout(ctx context.Context, req *autoupd // Editing the AU agent plan is restricted to cluster administrators. As of today we don't have any way of having // resources that can only be edited by Teleport Cloud (when running cloud-hosted). - // The workaround is to check if the caller has the auth system role. - // This is not ideal as it forces local tctl usage. In the future, if we expand the permission system and make cloud + // The workaround is to check if the caller has the auth/admin system role. + // This is not ideal as it forces local tctl usage and can be bypassed if the user is very creative. + // In the future, if we expand the permission system and make cloud // a first class citizen, we'll want to update this permission check. - if !authz.HasBuiltinRole(*authCtx, string(types.RoleAuth)) { + if !(authz.HasBuiltinRole(*authCtx, string(types.RoleAuth)) || authz.HasBuiltinRole(*authCtx, string(types.RoleAdmin))) { return nil, trace.AccessDenied("this request can be only executed by an auth server") } @@ -606,10 +623,11 @@ func (s *Service) DeleteAutoUpdateAgentRollout(ctx context.Context, req *autoupd // Editing the AU agent plan is restricted to cluster administrators. As of today we don't have any way of having // resources that can only be edited by Teleport Cloud (when running cloud-hosted). - // The workaround is to check if the caller has the auth system role. - // This is not ideal as it forces local tctl usage. In the future, if we expand the permission system and make cloud + // The workaround is to check if the caller has the auth/admin system role. + // This is not ideal as it forces local tctl usage and can be bypassed if the user is very creative. + // In the future, if we expand the permission system and make cloud // a first class citizen, we'll want to update this permission check. - if !authz.HasBuiltinRole(*authCtx, string(types.RoleAuth)) { + if !(authz.HasBuiltinRole(*authCtx, string(types.RoleAuth)) || authz.HasBuiltinRole(*authCtx, string(types.RoleAdmin))) { return nil, trace.AccessDenied("this request can be only executed by an auth server") } @@ -646,3 +664,131 @@ func checkAdminCloudAccess(authCtx *authz.Context) error { } return nil } + +// Those values are arbitrary, we will want to increase them as we test. We will also want to modulate them based on the +// cluster context. We don't want people to craft schedules that can't realistically finish within a week on Cloud as +// we usually do weekly updates. However, self-hosted users can craft more complex schedules, slower rollouts, and shoot +// themselves in the foot if they want. +const ( + maxGroupsTimeBasedStrategy = 20 + maxGroupsHaltOnErrorStrategy = 10 + maxGroupsHaltOnErrorStrategyCloud = 4 + maxRolloutDurationCloudHours = 72 +) + +var ( + cloudGroupUpdateDays = []string{"Mon", "Tue", "Wed", "Thu"} +) + +// validateServerSideAgentConfig validates that the autoupdate_config.agent spec meets the cluster rules. +// Rules may vary based on the cluster, and over time. +// +// This function should not be confused with api/types/autoupdate.ValidateAutoUpdateConfig which validates the integrity +// of the resource and does not enforce potentially changing rules. +func validateServerSideAgentConfig(config *autoupdate.AutoUpdateConfig) error { + agentsSpec := config.GetSpec().GetAgents() + if agentsSpec == nil { + return nil + } + // We must check resource integrity before, because it makes no sense to try to enforce rules on an invalid resource. + // The generic backend service will likely check integrity again, but it's not a large performance problem. + err := update.ValidateAutoUpdateConfig(config) + if err != nil { + return trace.Wrap(err, "validating autoupdate config") + } + + var maxGroups int + isCloud := modules.GetModules().Features().Cloud + + switch { + case isCloud && agentsSpec.GetStrategy() == update.AgentsStrategyHaltOnError: + maxGroups = maxGroupsHaltOnErrorStrategyCloud + case agentsSpec.GetStrategy() == update.AgentsStrategyHaltOnError: + maxGroups = maxGroupsHaltOnErrorStrategy + case agentsSpec.GetStrategy() == update.AgentsStrategyTimeBased: + maxGroups = maxGroupsTimeBasedStrategy + default: + return trace.BadParameter("unknown max group for strategy %v", agentsSpec.GetStrategy()) + } + + if len(agentsSpec.GetSchedules().GetRegular()) > maxGroups { + return trace.BadParameter("max groups (%d) exceeded for strategy %s, %s schedule contains %d groups", maxGroups, agentsSpec.GetStrategy(), update.AgentsScheduleRegular, len(agentsSpec.GetSchedules().GetRegular())) + } + + if !isCloud { + return nil + } + + cloudWeekdays, err := types.ParseWeekdays(cloudGroupUpdateDays) + if err != nil { + return trace.Wrap(err, "parsing cloud weekdays") + } + + for i, group := range agentsSpec.GetSchedules().GetRegular() { + weekdays, err := types.ParseWeekdays(group.Days) + if err != nil { + return trace.Wrap(err, "parsing weekdays from group %d", i) + } + + if !maps.Equal(cloudWeekdays, weekdays) { + return trace.BadParameter("weekdays must be set to %v in cloud", cloudGroupUpdateDays) + } + + } + + if duration := computeMinRolloutTime(agentsSpec.GetSchedules().GetRegular()); duration > maxRolloutDurationCloudHours { + return trace.BadParameter("rollout takes more than %d hours to complete: estimated completion time is %d hours", maxRolloutDurationCloudHours, duration) + } + + return nil +} + +func computeMinRolloutTime(groups []*autoupdate.AgentAutoUpdateGroup) int { + if len(groups) == 0 { + return 0 + } + + // We start the rollout at the first group hour, and we wait for the group to update (1 hour). + hours := groups[0].StartHour + 1 + + for _, group := range groups[1:] { + previousStartHour := (hours - 1) % 24 + previousEndHour := hours % 24 + + // compute the difference between the current hour and the group start hour + // we then check if it's less than the WaitHours, in this case we wait a day + diff := hourDifference(previousStartHour, group.StartHour) + if diff < group.WaitHours%24 { + hours += 24 + hourDifference(previousEndHour, group.StartHour) + } else { + hours += hourDifference(previousEndHour, group.StartHour) + } + + // Handle the case where WaitHours is > 24 + // This is an integer division + waitDays := group.WaitHours / 24 + // There's a special case where the difference modulo 24 is zero, the + // wait hours are non-null, but we already waited 23 hours. + // To avoid double counting we reduce the number of wait days by 1 if + // it's not zero already. + if diff == 0 { + waitDays = max(waitDays-1, 0) + } + hours += waitDays * 24 + + // We assume the group took an hour to update + hours += 1 + } + + // We remove the group start hour we added initially + return int(hours - groups[0].StartHour) +} + +// hourDifference computed the difference between two hours. +func hourDifference(a, b int32) int32 { + diff := b - a + if diff < 0 { + diff = diff + 24 + } + return diff +} diff --git a/lib/auth/autoupdate/autoupdatev1/service_test.go b/lib/auth/autoupdate/autoupdatev1/service_test.go index be71b976d698a..ad1e041218b2c 100644 --- a/lib/auth/autoupdate/autoupdatev1/service_test.go +++ b/lib/auth/autoupdate/autoupdatev1/service_test.go @@ -20,10 +20,13 @@ import ( "context" "fmt" "slices" + "strconv" "testing" + "time" "github.com/gravitational/trace" "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/durationpb" autoupdatev1pb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" "github.com/gravitational/teleport/api/types" @@ -33,6 +36,7 @@ import ( "github.com/gravitational/teleport/lib/backend/memory" libevents "github.com/gravitational/teleport/lib/events" "github.com/gravitational/teleport/lib/events/eventstest" + "github.com/gravitational/teleport/lib/modules" "github.com/gravitational/teleport/lib/services" "github.com/gravitational/teleport/lib/services/local" "github.com/gravitational/teleport/lib/utils" @@ -450,3 +454,329 @@ func newServiceWithStorage(t *testing.T, authState authz.AdminActionAuthState, c require.NoError(t, err) return service } + +func TestComputeMinRolloutTime(t *testing.T) { + t.Parallel() + tests := []struct { + name string + groups []*autoupdatev1pb.AgentAutoUpdateGroup + expectedHours int + }{ + { + name: "nil groups", + groups: nil, + expectedHours: 0, + }, + { + name: "empty groups", + groups: []*autoupdatev1pb.AgentAutoUpdateGroup{}, + expectedHours: 0, + }, + { + name: "single group", + groups: []*autoupdatev1pb.AgentAutoUpdateGroup{ + { + Name: "g1", + }, + }, + expectedHours: 1, + }, + { + name: "two groups, same day, different start hour, no wait time", + groups: []*autoupdatev1pb.AgentAutoUpdateGroup{ + { + Name: "g1", + StartHour: 2, + }, + { + Name: "g2", + StartHour: 4, + }, + }, + // g1 updates from 2:00 to 3:00, g2 updates from 4:00 to 5:00, rollout updates from 2:00 to 5:00. + expectedHours: 3, + }, + { + name: "two groups, same day, same start hour, no wait time", + groups: []*autoupdatev1pb.AgentAutoUpdateGroup{ + { + Name: "g1", + StartHour: 2, + }, + { + Name: "g2", + StartHour: 2, + }, + }, + // g1 and g2 can't update at the same time, the g1 updates from 2:00 to 3:00 days one, + // and g2 updates from 2:00 to 3:00 the next day. Total update spans from 2:00 day 1, to 3:00 day 2 + expectedHours: 25, + }, + { + name: "two groups, cannot happen on the same day because of wait_hours", + groups: []*autoupdatev1pb.AgentAutoUpdateGroup{ + { + Name: "g1", + StartHour: 2, + }, + { + Name: "g2", + StartHour: 4, + WaitHours: 6, + }, + }, + // g1 updates from 2:00 to 3:00. At 4:00 g2 can't update yet, so we wait the next day. + // On day 2, g2 updates from 4:00 to 5:00. Rollout spans from 2:00 day on to 7:00 day 2. + expectedHours: 27, + }, + { + name: "two groups, wait hours is several days", + groups: []*autoupdatev1pb.AgentAutoUpdateGroup{ + { + Name: "g1", + StartHour: 2, + }, + { + Name: "g2", + StartHour: 4, + WaitHours: 48, + }, + }, + // g1 updates from 2:00 to 3:00. At 4:00 g2 can't update yet, so we wait 2 days. + // On day 3, g2 updates from 4:00 to 5:00. Rollout spans from 2:00 day on to 7:00 day 3. + expectedHours: 51, + }, + { + name: "two groups, one wait hour", + groups: []*autoupdatev1pb.AgentAutoUpdateGroup{ + { + Name: "g1", + StartHour: 2, + }, + { + Name: "g2", + StartHour: 3, + WaitHours: 1, + }, + }, + expectedHours: 2, + }, + { + name: "two groups different days", + groups: []*autoupdatev1pb.AgentAutoUpdateGroup{ + { + Name: "g1", + StartHour: 23, + }, + { + Name: "g2", + StartHour: 1, + }, + }, + expectedHours: 3, + }, + { + name: "two groups different days, hour diff == wait hours == 1 day", + groups: []*autoupdatev1pb.AgentAutoUpdateGroup{ + { + Name: "g1", + StartHour: 12, + }, + { + Name: "g2", + StartHour: 12, + WaitHours: 24, + }, + }, + expectedHours: 25, + }, + { + name: "two groups different days, hour diff == wait hours", + groups: []*autoupdatev1pb.AgentAutoUpdateGroup{ + { + Name: "g1", + StartHour: 12, + }, + { + Name: "g2", + StartHour: 11, + WaitHours: 23, + }, + }, + expectedHours: 24, + }, + { + name: "everything at once", + groups: []*autoupdatev1pb.AgentAutoUpdateGroup{ + { + Name: "g1", + StartHour: 23, + }, + { + Name: "g2", + StartHour: 1, + WaitHours: 4, + }, + { + Name: "g3", + StartHour: 1, + }, + { + Name: "g4", + StartHour: 10, + WaitHours: 6, + }, + }, + expectedHours: 60, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + require.Equal(t, tt.expectedHours, computeMinRolloutTime(tt.groups)) + }) + } +} + +func generateGroups(n int, days []string) []*autoupdatev1pb.AgentAutoUpdateGroup { + groups := make([]*autoupdatev1pb.AgentAutoUpdateGroup, n) + for i := range groups { + groups[i] = &autoupdatev1pb.AgentAutoUpdateGroup{ + Name: strconv.Itoa(i), + Days: days, + StartHour: int32(i % 24), + } + } + return groups +} + +func TestValidateServerSideAgentConfig(t *testing.T) { + cloudModules := &modules.TestModules{ + TestFeatures: modules.Features{ + Cloud: true, + }, + } + selfHostedModules := &modules.TestModules{ + TestFeatures: modules.Features{ + Cloud: false, + }, + } + tests := []struct { + name string + config *autoupdatev1pb.AutoUpdateConfigSpecAgents + modules modules.Modules + expectErr require.ErrorAssertionFunc + }{ + { + name: "empty agent config", + modules: selfHostedModules, + config: nil, + expectErr: require.NoError, + }, + { + name: "over max groups time-based", + modules: selfHostedModules, + config: &autoupdatev1pb.AutoUpdateConfigSpecAgents{ + Mode: autoupdate.AgentsUpdateModeEnabled, + Strategy: autoupdate.AgentsStrategyTimeBased, + MaintenanceWindowDuration: durationpb.New(time.Hour), + Schedules: &autoupdatev1pb.AgentAutoUpdateSchedules{ + Regular: generateGroups(maxGroupsTimeBasedStrategy+1, cloudGroupUpdateDays), + }, + }, + expectErr: require.Error, + }, + { + name: "over max groups halt-on-error", + modules: selfHostedModules, + config: &autoupdatev1pb.AutoUpdateConfigSpecAgents{ + Mode: autoupdate.AgentsUpdateModeEnabled, + Strategy: autoupdate.AgentsStrategyHaltOnError, + Schedules: &autoupdatev1pb.AgentAutoUpdateSchedules{ + Regular: generateGroups(maxGroupsHaltOnErrorStrategy+1, cloudGroupUpdateDays), + }, + }, + expectErr: require.Error, + }, + { + name: "over max groups halt-on-error cloud", + modules: cloudModules, + config: &autoupdatev1pb.AutoUpdateConfigSpecAgents{ + Mode: autoupdate.AgentsUpdateModeEnabled, + Strategy: autoupdate.AgentsStrategyHaltOnError, + Schedules: &autoupdatev1pb.AgentAutoUpdateSchedules{ + Regular: generateGroups(maxGroupsHaltOnErrorStrategyCloud+1, cloudGroupUpdateDays), + }, + }, + expectErr: require.Error, + }, + { + name: "cloud should reject custom weekdays", + modules: cloudModules, + config: &autoupdatev1pb.AutoUpdateConfigSpecAgents{ + Mode: autoupdate.AgentsUpdateModeEnabled, + Strategy: autoupdate.AgentsStrategyHaltOnError, + Schedules: &autoupdatev1pb.AgentAutoUpdateSchedules{ + Regular: generateGroups(maxGroupsHaltOnErrorStrategyCloud, []string{"Mon"}), + }, + }, + expectErr: require.Error, + }, + { + name: "self-hosted should allow custom weekdays", + modules: selfHostedModules, + config: &autoupdatev1pb.AutoUpdateConfigSpecAgents{ + Mode: autoupdate.AgentsUpdateModeEnabled, + Strategy: autoupdate.AgentsStrategyHaltOnError, + Schedules: &autoupdatev1pb.AgentAutoUpdateSchedules{ + Regular: generateGroups(maxGroupsHaltOnErrorStrategyCloud, []string{"Mon"}), + }, + }, + expectErr: require.NoError, + }, + { + name: "cloud should reject long rollouts", + modules: cloudModules, + config: &autoupdatev1pb.AutoUpdateConfigSpecAgents{ + Mode: autoupdate.AgentsUpdateModeEnabled, + Strategy: autoupdate.AgentsStrategyHaltOnError, + Schedules: &autoupdatev1pb.AgentAutoUpdateSchedules{ + Regular: []*autoupdatev1pb.AgentAutoUpdateGroup{ + {Name: "g1", Days: cloudGroupUpdateDays}, + {Name: "g2", Days: cloudGroupUpdateDays, WaitHours: maxRolloutDurationCloudHours}, + }, + }, + }, + expectErr: require.Error, + }, + { + name: "self-hosted should allow long rollouts", + modules: selfHostedModules, + config: &autoupdatev1pb.AutoUpdateConfigSpecAgents{ + Mode: autoupdate.AgentsUpdateModeEnabled, + Strategy: autoupdate.AgentsStrategyHaltOnError, + Schedules: &autoupdatev1pb.AgentAutoUpdateSchedules{ + Regular: []*autoupdatev1pb.AgentAutoUpdateGroup{ + {Name: "g1", Days: cloudGroupUpdateDays}, + {Name: "g2", Days: cloudGroupUpdateDays, WaitHours: maxRolloutDurationCloudHours}, + }, + }, + }, + expectErr: require.NoError, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test setup: crafing a config and setting modules + config, err := autoupdate.NewAutoUpdateConfig( + &autoupdatev1pb.AutoUpdateConfigSpec{ + Tools: nil, + Agents: tt.config, + }) + require.NoError(t, err) + modules.SetTestModules(t, tt.modules) + + // Test execution. + tt.expectErr(t, validateServerSideAgentConfig(config)) + }) + } +} diff --git a/lib/auth/bot_test.go b/lib/auth/bot_test.go index 24e1183ea6483..44a12bcb7585a 100644 --- a/lib/auth/bot_test.go +++ b/lib/auth/bot_test.go @@ -65,7 +65,8 @@ import ( libevents "github.com/gravitational/teleport/lib/events" "github.com/gravitational/teleport/lib/events/eventstest" "github.com/gravitational/teleport/lib/fixtures" - "github.com/gravitational/teleport/lib/kubernetestoken" + "github.com/gravitational/teleport/lib/kube/token" + kubetoken "github.com/gravitational/teleport/lib/kube/token" "github.com/gravitational/teleport/lib/reversetunnelclient" "github.com/gravitational/teleport/lib/tbot/identity" "github.com/gravitational/teleport/lib/tlsca" @@ -904,9 +905,9 @@ func TestRegisterBot_BotInstanceRejoin(t *testing.T) { k8sReadFileFunc := func(name string) ([]byte, error) { return []byte(k8sTokenName), nil } - a.k8sJWKSValidator = func(_ time.Time, _ []byte, _ string, token string) (*kubernetestoken.ValidationResult, error) { + a.k8sJWKSValidator = func(_ time.Time, _ []byte, _ string, token string) (*token.ValidationResult, error) { if token == k8sTokenName { - return &kubernetestoken.ValidationResult{Username: "system:serviceaccount:static-jwks:matching"}, nil + return &kubetoken.ValidationResult{Username: "system:serviceaccount:static-jwks:matching"}, nil } return nil, errMockInvalidToken @@ -1059,9 +1060,9 @@ func TestRegisterBotWithInvalidInstanceID(t *testing.T) { botName := "bot" k8sTokenName := "jwks-matching-service-account" - a.k8sJWKSValidator = func(_ time.Time, _ []byte, _ string, token string) (*kubernetestoken.ValidationResult, error) { + a.k8sJWKSValidator = func(_ time.Time, _ []byte, _ string, token string) (*token.ValidationResult, error) { if token == k8sTokenName { - return &kubernetestoken.ValidationResult{Username: "system:serviceaccount:static-jwks:matching"}, nil + return &kubetoken.ValidationResult{Username: "system:serviceaccount:static-jwks:matching"}, nil } return nil, errMockInvalidToken diff --git a/lib/auth/grpcserver.go b/lib/auth/grpcserver.go index b89452b184aa2..f4dcd27e22b64 100644 --- a/lib/auth/grpcserver.go +++ b/lib/auth/grpcserver.go @@ -4452,6 +4452,18 @@ func (g *GRPCServer) DeleteUIConfig(ctx context.Context, _ *emptypb.Empty) (*emp return &emptypb.Empty{}, nil } +func (g *GRPCServer) defaultInstaller(ctx context.Context) (*types.InstallerV1, error) { + _, err := g.AuthServer.GetAutoUpdateAgentRollout(ctx) + switch { + case trace.IsNotFound(err): + return installer.LegacyDefaultInstaller, nil + case err != nil: + return nil, trace.Wrap(err, "failed to get query autoupdate state to build installer") + default: + return installer.NewDefaultInstaller, nil + } +} + // GetInstaller retrieves the installer script resource func (g *GRPCServer) GetInstaller(ctx context.Context, req *types.ResourceRequest) (*types.InstallerV1, error) { auth, err := g.authenticate(ctx) @@ -4463,7 +4475,7 @@ func (g *GRPCServer) GetInstaller(ctx context.Context, req *types.ResourceReques if trace.IsNotFound(err) { switch req.Name { case installers.InstallerScriptName: - return installer.DefaultInstaller, nil + return g.defaultInstaller(ctx) case installers.InstallerScriptNameAgentless: return installers.DefaultAgentlessInstaller, nil } @@ -4488,8 +4500,14 @@ func (g *GRPCServer) GetInstallers(ctx context.Context, _ *emptypb.Empty) (*type return nil, trace.Wrap(err) } var installersV1 []*types.InstallerV1 + + defaultInstaller, err := g.defaultInstaller(ctx) + if err != nil { + return nil, trace.Wrap(err) + } + defaultInstallers := map[string]*types.InstallerV1{ - types.DefaultInstallerScriptName: installer.DefaultInstaller, + types.DefaultInstallerScriptName: defaultInstaller, installers.InstallerScriptNameAgentless: installers.DefaultAgentlessInstaller, } diff --git a/lib/auth/grpcserver_test.go b/lib/auth/grpcserver_test.go index 60ed4193c30ae..bc710572e3033 100644 --- a/lib/auth/grpcserver_test.go +++ b/lib/auth/grpcserver_test.go @@ -48,12 +48,14 @@ import ( otlpresourcev1 "go.opentelemetry.io/proto/otlp/resource/v1" otlptracev1 "go.opentelemetry.io/proto/otlp/trace/v1" "google.golang.org/protobuf/testing/protocmp" + "google.golang.org/protobuf/types/known/durationpb" "google.golang.org/protobuf/types/known/emptypb" "github.com/gravitational/teleport" "github.com/gravitational/teleport/api/client/proto" "github.com/gravitational/teleport/api/constants" apidefaults "github.com/gravitational/teleport/api/defaults" + autoupdatev1pb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" clusterconfigpb "github.com/gravitational/teleport/api/gen/proto/go/teleport/clusterconfig/v1" mfav1 "github.com/gravitational/teleport/api/gen/proto/go/teleport/mfa/v1" "github.com/gravitational/teleport/api/internalutils/stream" @@ -61,6 +63,7 @@ import ( "github.com/gravitational/teleport/api/mfa" "github.com/gravitational/teleport/api/observability/tracing" "github.com/gravitational/teleport/api/types" + "github.com/gravitational/teleport/api/types/autoupdate" "github.com/gravitational/teleport/api/types/installers" "github.com/gravitational/teleport/api/utils" "github.com/gravitational/teleport/api/utils/keys" @@ -4611,12 +4614,21 @@ func TestGRPCServer_GetInstallers(t *testing.T) { tests := []struct { name string inputInstallers map[string]string + hasAgentRollout bool expectedInstallers map[string]string }{ { name: "default installers only", expectedInstallers: map[string]string{ - types.DefaultInstallerScriptName: installer.DefaultInstaller.GetScript(), + types.DefaultInstallerScriptName: installer.LegacyDefaultInstaller.GetScript(), + installers.InstallerScriptNameAgentless: installers.DefaultAgentlessInstaller.GetScript(), + }, + }, + { + name: "new default installers", + hasAgentRollout: true, + expectedInstallers: map[string]string{ + types.DefaultInstallerScriptName: installer.NewDefaultInstaller.GetScript(), installers.InstallerScriptNameAgentless: installers.DefaultAgentlessInstaller.GetScript(), }, }, @@ -4627,7 +4639,7 @@ func TestGRPCServer_GetInstallers(t *testing.T) { }, expectedInstallers: map[string]string{ "my-custom-installer": "echo test", - types.DefaultInstallerScriptName: installer.DefaultInstaller.GetScript(), + types.DefaultInstallerScriptName: installer.LegacyDefaultInstaller.GetScript(), installers.InstallerScriptNameAgentless: installers.DefaultAgentlessInstaller.GetScript(), }, }, @@ -4649,6 +4661,25 @@ func TestGRPCServer_GetInstallers(t *testing.T) { require.NoError(t, err) }) + if tc.hasAgentRollout { + rollout, err := autoupdate.NewAutoUpdateAgentRollout( + &autoupdatev1pb.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + Schedule: autoupdate.AgentsScheduleImmediate, + AutoupdateMode: autoupdate.AgentsUpdateModeEnabled, + Strategy: autoupdate.AgentsStrategyTimeBased, + MaintenanceWindowDuration: durationpb.New(1 * time.Hour), + }) + require.NoError(t, err) + _, err = grpc.AuthServer.CreateAutoUpdateAgentRollout(ctx, rollout) + require.NoError(t, err) + + t.Cleanup(func() { + assert.NoError(t, grpc.AuthServer.DeleteAutoUpdateAgentRollout(ctx)) + }) + } + for name, script := range tc.inputInstallers { installer, err := types.NewInstallerV1(name, script) require.NoError(t, err) diff --git a/lib/auth/join/join.go b/lib/auth/join/join.go index 4d0ed52870c40..940cd3b211720 100644 --- a/lib/auth/join/join.go +++ b/lib/auth/join/join.go @@ -50,7 +50,7 @@ import ( "github.com/gravitational/teleport/lib/defaults" "github.com/gravitational/teleport/lib/githubactions" "github.com/gravitational/teleport/lib/gitlab" - "github.com/gravitational/teleport/lib/kubernetestoken" + kubetoken "github.com/gravitational/teleport/lib/kube/token" "github.com/gravitational/teleport/lib/spacelift" "github.com/gravitational/teleport/lib/terraformcloud" "github.com/gravitational/teleport/lib/tlsca" @@ -238,7 +238,7 @@ func Register(ctx context.Context, params RegisterParams) (result *RegisterResul return nil, trace.Wrap(err) } case types.JoinMethodKubernetes: - params.IDToken, err = kubernetestoken.GetIDToken(os.Getenv, params.KubernetesReadFileFunc) + params.IDToken, err = kubetoken.GetIDToken(os.Getenv, params.KubernetesReadFileFunc) if err != nil { return nil, trace.Wrap(err) } diff --git a/lib/auth/join_kubernetes.go b/lib/auth/join_kubernetes.go index bcf9eeea05b56..d5bbc6586d831 100644 --- a/lib/auth/join_kubernetes.go +++ b/lib/auth/join_kubernetes.go @@ -27,16 +27,16 @@ import ( "github.com/sirupsen/logrus" "github.com/gravitational/teleport/api/types" - "github.com/gravitational/teleport/lib/kubernetestoken" + kubetoken "github.com/gravitational/teleport/lib/kube/token" ) type k8sTokenReviewValidator interface { - Validate(ctx context.Context, token, clusterName string) (*kubernetestoken.ValidationResult, error) + Validate(ctx context.Context, token, clusterName string) (*kubetoken.ValidationResult, error) } -type k8sJWKSValidator func(now time.Time, jwksData []byte, clusterName string, token string) (*kubernetestoken.ValidationResult, error) +type k8sJWKSValidator func(now time.Time, jwksData []byte, clusterName string, token string) (*kubetoken.ValidationResult, error) -func (a *Server) checkKubernetesJoinRequest(ctx context.Context, req *types.RegisterUsingTokenRequest) (*kubernetestoken.ValidationResult, error) { +func (a *Server) checkKubernetesJoinRequest(ctx context.Context, req *types.RegisterUsingTokenRequest) (*kubetoken.ValidationResult, error) { if req.IDToken == "" { return nil, trace.BadParameter("IDToken not provided for Kubernetes join request") } @@ -58,7 +58,7 @@ func (a *Server) checkKubernetesJoinRequest(ctx context.Context, req *types.Regi } // Switch to join method subtype token validation. - var result *kubernetestoken.ValidationResult + var result *kubetoken.ValidationResult switch token.Spec.Kubernetes.Type { case types.KubernetesJoinTypeStaticJWKS: result, err = a.k8sJWKSValidator( @@ -90,10 +90,10 @@ func (a *Server) checkKubernetesJoinRequest(ctx context.Context, req *types.Regi return result, trace.Wrap(checkKubernetesAllowRules(token, result)) } -func checkKubernetesAllowRules(pt *types.ProvisionTokenV2, got *kubernetestoken.ValidationResult) error { +func checkKubernetesAllowRules(pt *types.ProvisionTokenV2, got *kubetoken.ValidationResult) error { // If a single rule passes, accept the token for _, rule := range pt.Spec.Kubernetes.Allow { - wantUsername := fmt.Sprintf("%s:%s", kubernetestoken.ServiceAccountNamePrefix, rule.ServiceAccount) + wantUsername := fmt.Sprintf("%s:%s", kubetoken.ServiceAccountNamePrefix, rule.ServiceAccount) if wantUsername != got.Username { continue } diff --git a/lib/auth/join_kubernetes_test.go b/lib/auth/join_kubernetes_test.go index 3af845ba5467f..00090d70a124d 100644 --- a/lib/auth/join_kubernetes_test.go +++ b/lib/auth/join_kubernetes_test.go @@ -28,14 +28,14 @@ import ( "github.com/gravitational/teleport/api/types" "github.com/gravitational/teleport/lib/auth/testauthority" - "github.com/gravitational/teleport/lib/kubernetestoken" + kubetoken "github.com/gravitational/teleport/lib/kube/token" ) type mockK8STokenReviewValidator struct { - tokens map[string]*kubernetestoken.ValidationResult + tokens map[string]*kubetoken.ValidationResult } -func (m *mockK8STokenReviewValidator) Validate(_ context.Context, token, _ string) (*kubernetestoken.ValidationResult, error) { +func (m *mockK8STokenReviewValidator) Validate(_ context.Context, token, _ string) (*kubetoken.ValidationResult, error) { result, ok := m.tokens[token] if !ok { return nil, errMockInvalidToken @@ -48,14 +48,14 @@ func TestAuth_RegisterUsingToken_Kubernetes(t *testing.T) { // Test setup // Creating an auth server with mock Kubernetes token validator - tokenReviewTokens := map[string]*kubernetestoken.ValidationResult{ + tokenReviewTokens := map[string]*kubetoken.ValidationResult{ "matching-implicit-in-cluster": {Username: "system:serviceaccount:namespace1:service-account1"}, // "matching-explicit-in-cluster" intentionally matches the second allow // rule of explicitInCluster to ensure all rules are processed. "matching-explicit-in-cluster": {Username: "system:serviceaccount:namespace2:service-account2"}, "user-token": {Username: "namespace1:service-account1"}, } - jwksTokens := map[string]*kubernetestoken.ValidationResult{ + jwksTokens := map[string]*kubetoken.ValidationResult{ "jwks-matching-service-account": {Username: "system:serviceaccount:static-jwks:matching"}, "jwks-mismatched-service-account": {Username: "system:serviceaccount:static-jwks:mismatched"}, } @@ -63,7 +63,7 @@ func TestAuth_RegisterUsingToken_Kubernetes(t *testing.T) { ctx := context.Background() p, err := newTestPack(ctx, t.TempDir(), func(server *Server) error { server.k8sTokenReviewValidator = &mockK8STokenReviewValidator{tokens: tokenReviewTokens} - server.k8sJWKSValidator = func(_ time.Time, _ []byte, _ string, token string) (*kubernetestoken.ValidationResult, error) { + server.k8sJWKSValidator = func(_ time.Time, _ []byte, _ string, token string) (*kubetoken.ValidationResult, error) { result, ok := jwksTokens[token] if !ok { return nil, errMockInvalidToken diff --git a/lib/automaticupgrades/maintenance/mock.go b/lib/automaticupgrades/maintenance/mock.go index f46b990ee7930..f705bcee71f8b 100644 --- a/lib/automaticupgrades/maintenance/mock.go +++ b/lib/automaticupgrades/maintenance/mock.go @@ -29,6 +29,7 @@ import ( type StaticTrigger struct { name string canStart bool + err error } // Name returns the StaticTrigger name. @@ -38,7 +39,7 @@ func (m StaticTrigger) Name() string { // CanStart returns the statically defined maintenance approval result. func (m StaticTrigger) CanStart(_ context.Context, _ client.Object) (bool, error) { - return m.canStart, nil + return m.canStart, m.err } // Default returns the default behavior if the trigger fails. This cannot diff --git a/lib/automaticupgrades/maintenance/proxy.go b/lib/automaticupgrades/maintenance/proxy.go new file mode 100644 index 0000000000000..ceb2495e5c17a --- /dev/null +++ b/lib/automaticupgrades/maintenance/proxy.go @@ -0,0 +1,85 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package maintenance + +import ( + "context" + + "github.com/gravitational/trace" + "sigs.k8s.io/controller-runtime/pkg/client" + + "github.com/gravitational/teleport/api/client/webclient" + "github.com/gravitational/teleport/lib/automaticupgrades/cache" + "github.com/gravitational/teleport/lib/automaticupgrades/constants" +) + +type proxyMaintenanceClient struct { + client *webclient.ReusableClient +} + +// Get does the HTTPS call to the Teleport Proxy sevrice to check if the update should happen now. +// If the proxy response does not contain the auto_update.agent_version field, +// this means the proxy does not support autoupdates. In this case we return trace.NotImplementedErr. +func (b *proxyMaintenanceClient) Get(ctx context.Context) (bool, error) { + resp, err := b.client.Find() + if err != nil { + return false, trace.Wrap(err) + } + // We check if a version is advertised to know if the proxy implements RFD-184 or not. + if resp.AutoUpdate.AgentVersion == "" { + return false, trace.NotImplemented("proxy does not seem to implement RFD-184") + } + return resp.AutoUpdate.AgentAutoUpdate, nil +} + +// ProxyMaintenanceTrigger checks if the maintenance should be triggered from the Teleport Proxy service /find endpoint, +// as specified in the RFD-184: https://github.com/gravitational/teleport/blob/master/rfd/0184-agent-auto-updates.md +// The Trigger returns trace.NotImplementedErr when running against a proxy that does not seem to +// expose automatic update instructions over the /find endpoint (proxy too old). +type ProxyMaintenanceTrigger struct { + name string + cachedGetter func(context.Context) (bool, error) +} + +// Name implements maintenance.Trigger returns the trigger name for logging +// and debugging purposes. +func (g ProxyMaintenanceTrigger) Name() string { + return g.name +} + +// Default implements maintenance.Trigger and returns what to do if the trigger can't be evaluated. +// ProxyMaintenanceTrigger should fail open, so the function returns true. +func (g ProxyMaintenanceTrigger) Default() bool { + return false +} + +// CanStart implements maintenance.Trigger. +func (g ProxyMaintenanceTrigger) CanStart(ctx context.Context, _ client.Object) (bool, error) { + result, err := g.cachedGetter(ctx) + return result, trace.Wrap(err) +} + +// NewProxyMaintenanceTrigger builds and return a Trigger checking a public HTTP endpoint. +func NewProxyMaintenanceTrigger(name string, clt *webclient.ReusableClient) Trigger { + maintenanceClient := &proxyMaintenanceClient{ + client: clt, + } + + return ProxyMaintenanceTrigger{name, cache.NewTimedMemoize[bool](maintenanceClient.Get, constants.CacheDuration).Get} +} diff --git a/lib/automaticupgrades/maintenance/trigger.go b/lib/automaticupgrades/maintenance/trigger.go index 53e12b26cdd4a..5d9449d7ad864 100644 --- a/lib/automaticupgrades/maintenance/trigger.go +++ b/lib/automaticupgrades/maintenance/trigger.go @@ -20,7 +20,9 @@ package maintenance import ( "context" + "strings" + "github.com/gravitational/trace" "sigs.k8s.io/controller-runtime/pkg/client" ctrllog "sigs.k8s.io/controller-runtime/pkg/log" ) @@ -51,7 +53,10 @@ func (t Triggers) CanStart(ctx context.Context, object client.Object) bool { start, err := trigger.CanStart(ctx, object) if err != nil { start = trigger.Default() - log.Error(err, "trigger failed to evaluate, using its default value", "trigger", trigger.Name(), "defaultValue", start) + log.Error( + err, "trigger failed to evaluate, using its default value", "trigger", trigger.Name(), "defaultValue", + start, + ) } else { log.Info("trigger evaluated", "trigger", trigger.Name(), "result", start) } @@ -62,3 +67,48 @@ func (t Triggers) CanStart(ctx context.Context, object client.Object) bool { } return false } + +// FailoverTrigger wraps multiple Triggers and tries them sequentially. +// Any error is considered fatal, except for the trace.NotImplementedErr +// which indicates the trigger is not supported yet and we should +// failover to the next trigger. +type FailoverTrigger []Trigger + +// Name implements Trigger +func (f FailoverTrigger) Name() string { + names := make([]string, len(f)) + for i, t := range f { + names[i] = t.Name() + } + + return strings.Join(names, ", failover ") +} + +// CanStart implements Trigger +// Triggers are evaluated sequentially, the result of the first trigger not returning +// trace.NotImplementedErr is used. +func (f FailoverTrigger) CanStart(ctx context.Context, object client.Object) (bool, error) { + for _, trigger := range f { + canStart, err := trigger.CanStart(ctx, object) + switch { + case err == nil: + return canStart, nil + case trace.IsNotImplemented(err): + continue + default: + return false, trace.Wrap(err) + } + } + return false, trace.NotFound("every trigger returned NotImplemented") +} + +// Default implements Trigger. +// The default is the logical OR of every Trigger.Default. +func (f FailoverTrigger) Default() bool { + for _, trigger := range f { + if trigger.Default() { + return true + } + } + return false +} diff --git a/lib/automaticupgrades/maintenance/trigger_test.go b/lib/automaticupgrades/maintenance/trigger_test.go new file mode 100644 index 0000000000000..435b73f0f9bc4 --- /dev/null +++ b/lib/automaticupgrades/maintenance/trigger_test.go @@ -0,0 +1,169 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package maintenance + +import ( + "context" + "testing" + + "github.com/gravitational/trace" + "github.com/stretchr/testify/require" +) + +// checkTraceError is a test helper that converts trace.IsXXXError into a require.ErrorAssertionFunc +func checkTraceError(check func(error) bool) require.ErrorAssertionFunc { + return func(t require.TestingT, err error, i ...interface{}) { + require.True(t, check(err), i...) + } +} + +func TestFailoverTrigger_CanStart(t *testing.T) { + t.Parallel() + + // Test setup + ctx := context.Background() + tests := []struct { + name string + triggers []Trigger + expectResult bool + expectErr require.ErrorAssertionFunc + }{ + { + name: "nil", + triggers: nil, + expectResult: false, + expectErr: checkTraceError(trace.IsNotFound), + }, + { + name: "empty", + triggers: []Trigger{}, + expectResult: false, + expectErr: checkTraceError(trace.IsNotFound), + }, + { + name: "first trigger success firing", + triggers: []Trigger{ + StaticTrigger{canStart: true}, + StaticTrigger{canStart: false}, + }, + expectResult: true, + expectErr: require.NoError, + }, + { + name: "first trigger success not firing", + triggers: []Trigger{ + StaticTrigger{canStart: false}, + StaticTrigger{canStart: true}, + }, + expectResult: false, + expectErr: require.NoError, + }, + { + name: "first trigger failure", + triggers: []Trigger{ + StaticTrigger{err: trace.LimitExceeded("got rate-limited")}, + StaticTrigger{canStart: true}, + }, + expectResult: false, + expectErr: checkTraceError(trace.IsLimitExceeded), + }, + { + name: "first trigger skipped, second getter success", + triggers: []Trigger{ + StaticTrigger{err: trace.NotImplemented("proxy does not seem to implement RFD-184")}, + StaticTrigger{canStart: true}, + }, + expectResult: true, + expectErr: require.NoError, + }, + { + name: "first trigger skipped, second getter failure", + triggers: []Trigger{ + StaticTrigger{err: trace.NotImplemented("proxy does not seem to implement RFD-184")}, + StaticTrigger{err: trace.LimitExceeded("got rate-limited")}, + }, + expectResult: false, + expectErr: checkTraceError(trace.IsLimitExceeded), + }, + { + name: "first trigger skipped, second getter skipped", + triggers: []Trigger{ + StaticTrigger{err: trace.NotImplemented("proxy does not seem to implement RFD-184")}, + StaticTrigger{err: trace.NotImplemented("proxy does not seem to implement RFD-184")}, + }, + expectResult: false, + expectErr: checkTraceError(trace.IsNotFound), + }, + } + for _, tt := range tests { + t.Run( + tt.name, func(t *testing.T) { + // Test execution + trigger := FailoverTrigger(tt.triggers) + result, err := trigger.CanStart(ctx, nil) + require.Equal(t, tt.expectResult, result) + tt.expectErr(t, err) + }, + ) + } +} + +func TestFailoverTrigger_Name(t *testing.T) { + tests := []struct { + name string + triggers []Trigger + expectResult string + }{ + { + name: "nil", + triggers: nil, + expectResult: "", + }, + { + name: "empty", + triggers: []Trigger{}, + expectResult: "", + }, + { + name: "one trigger", + triggers: []Trigger{ + StaticTrigger{name: "proxy"}, + }, + expectResult: "proxy", + }, + { + name: "two triggers", + triggers: []Trigger{ + StaticTrigger{name: "proxy"}, + StaticTrigger{name: "version-server"}, + }, + expectResult: "proxy, failover version-server", + }, + } + for _, tt := range tests { + t.Run( + tt.name, func(t *testing.T) { + // Test execution + trigger := FailoverTrigger(tt.triggers) + result := trigger.Name() + require.Equal(t, tt.expectResult, result) + }, + ) + } +} diff --git a/lib/automaticupgrades/version/proxy.go b/lib/automaticupgrades/version/proxy.go new file mode 100644 index 0000000000000..90ec4859586e2 --- /dev/null +++ b/lib/automaticupgrades/version/proxy.go @@ -0,0 +1,72 @@ +/* + * Teleport + * Copyright (C) 2023 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package version + +import ( + "context" + + "github.com/gravitational/trace" + + "github.com/gravitational/teleport/api/client/webclient" + "github.com/gravitational/teleport/lib/automaticupgrades/cache" + "github.com/gravitational/teleport/lib/automaticupgrades/constants" +) + +type Finder interface { + Find() (*webclient.PingResponse, error) +} + +type proxyVersionClient struct { + client Finder +} + +func (b *proxyVersionClient) Get(_ context.Context) (string, error) { + resp, err := b.client.Find() + if err != nil { + return "", trace.Wrap(err) + } + // We check if a version is advertised to know if the proxy implements RFD-184 or not. + if resp.AutoUpdate.AgentVersion == "" { + return "", trace.NotImplemented("proxy does not seem to implement RFD-184") + } + return EnsureSemver(resp.AutoUpdate.AgentVersion) +} + +// ProxyVersionGetter gets the target version from the Teleport Proxy Service /find endpoint, as +// specified in the RFD-184: https://github.com/gravitational/teleport/blob/master/rfd/0184-agent-auto-updates.md +// The Getter returns trace.NotImplementedErr when running against a proxy that does not seem to +// expose automatic update instructions over the /find endpoint (proxy too old). +type ProxyVersionGetter struct { + cachedGetter func(context.Context) (string, error) +} + +// GetVersion implements Getter +func (g ProxyVersionGetter) GetVersion(ctx context.Context) (string, error) { + return g.cachedGetter(ctx) +} + +// NewProxyVersionGetter creates a ProxyVersionGetter from a webclient. +// The answer is cached for a minute. +func NewProxyVersionGetter(clt *webclient.ReusableClient) Getter { + versionClient := &proxyVersionClient{ + client: clt, + } + + return ProxyVersionGetter{cache.NewTimedMemoize[string](versionClient.Get, constants.CacheDuration).Get} +} diff --git a/lib/automaticupgrades/version/proxy_test.go b/lib/automaticupgrades/version/proxy_test.go new file mode 100644 index 0000000000000..2360f271c25a1 --- /dev/null +++ b/lib/automaticupgrades/version/proxy_test.go @@ -0,0 +1,116 @@ +/* + * Teleport + * Copyright (C) 2025 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package version + +import ( + "context" + "testing" + + "github.com/gravitational/trace" + "github.com/stretchr/testify/mock" + "github.com/stretchr/testify/require" + + "github.com/gravitational/teleport/api/client/webclient" +) + +type mockWebClient struct { + mock.Mock +} + +func (m *mockWebClient) Find() (*webclient.PingResponse, error) { + args := m.Called() + return args.Get(0).(*webclient.PingResponse), args.Error(1) +} + +func TestProxyVersionClient(t *testing.T) { + ctx := context.Background() + tests := []struct { + name string + pong *webclient.PingResponse + pongErr error + expectedVersion string + expectErr require.ErrorAssertionFunc + }{ + { + name: "semver without leading v", + pong: &webclient.PingResponse{ + AutoUpdate: webclient.AutoUpdateSettings{ + AgentVersion: "1.2.3", + }, + }, + expectedVersion: "v1.2.3", + expectErr: require.NoError, + }, + { + name: "semver with leading v", + pong: &webclient.PingResponse{ + AutoUpdate: webclient.AutoUpdateSettings{ + AgentVersion: "v1.2.3", + }, + }, + expectedVersion: "v1.2.3", + expectErr: require.NoError, + }, + { + name: "semver with prerelease and no leading v", + pong: &webclient.PingResponse{ + AutoUpdate: webclient.AutoUpdateSettings{ + AgentVersion: "1.2.3-dev.bartmoss.1", + }, + }, + expectedVersion: "v1.2.3-dev.bartmoss.1", + expectErr: require.NoError, + }, + { + name: "invalid semver", + pong: &webclient.PingResponse{ + AutoUpdate: webclient.AutoUpdateSettings{ + AgentVersion: "v", + }, + }, + expectedVersion: "", + expectErr: require.Error, + }, + { + name: "empty response", + pong: &webclient.PingResponse{}, + expectedVersion: "", + expectErr: func(t require.TestingT, err error, i ...interface{}) { + require.ErrorIs(t, err, trace.NotImplemented("proxy does not seem to implement RFD-184")) + }, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test setup: create mock and load fixtures. + webClient := &mockWebClient{} + webClient.On("Find").Once().Return(tt.pong, tt.pongErr) + + // Test execution. + clt := proxyVersionClient{client: webClient} + v, err := clt.Get(ctx) + + // Test validation. + tt.expectErr(t, err) + require.Equal(t, tt.expectedVersion, v) + webClient.AssertExpectations(t) + }) + } +} diff --git a/lib/automaticupgrades/version/versionget.go b/lib/automaticupgrades/version/versionget.go index f1e7723a9a320..e2a1a893e5270 100644 --- a/lib/automaticupgrades/version/versionget.go +++ b/lib/automaticupgrades/version/versionget.go @@ -1,6 +1,6 @@ /* * Teleport - * Copyright (C) 2023 Gravitational, Inc. + * Copyright (C) 2024 Gravitational, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by @@ -36,13 +36,42 @@ type Getter interface { GetVersion(context.Context) (string, error) } +// FailoverGetter wraps multiple Getters and tries them sequentially. +// Any error is considered fatal, except for the trace.NotImplementedErr +// which indicates the version getter is not supported yet and we should +// failover to the next version getter. +type FailoverGetter []Getter + +// GetVersion implements Getter +// Getters are evaluated sequentially, the result of the first getter not returning +// trace.NotImplementedErr is used. +func (f FailoverGetter) GetVersion(ctx context.Context) (string, error) { + for _, getter := range f { + version, err := getter.GetVersion(ctx) + switch { + case err == nil: + return version, nil + case trace.IsNotImplemented(err): + continue + default: + return "", trace.Wrap(err) + } + } + return "", trace.NotFound("every versionGetter returned NotImplemented") +} + // ValidVersionChange receives the current version and the candidate next version // and evaluates if the version transition is valid. func ValidVersionChange(ctx context.Context, current, next string) bool { log := ctrllog.FromContext(ctx).V(1) // Cannot upgrade to a non-valid version if !semver.IsValid(next) { - log.Error(trace.BadParameter("next version is not following semver"), "version change is invalid", "nextVersion", next) + log.Error( + trace.BadParameter("next version is not following semver"), + "version change is invalid", + "current_version", current, + "next_version", next, + ) return false } switch semver.Compare(next, current) { diff --git a/lib/automaticupgrades/version/versionget_test.go b/lib/automaticupgrades/version/versionget_test.go index 80c2ec767b8fb..78f4940db229a 100644 --- a/lib/automaticupgrades/version/versionget_test.go +++ b/lib/automaticupgrades/version/versionget_test.go @@ -22,6 +22,7 @@ import ( "context" "testing" + "github.com/gravitational/trace" "github.com/stretchr/testify/require" ) @@ -66,8 +67,99 @@ func TestValidVersionChange(t *testing.T) { }, } for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - require.Equal(t, tt.want, ValidVersionChange(ctx, tt.current, tt.next)) - }) + t.Run( + tt.name, func(t *testing.T) { + require.Equal(t, tt.want, ValidVersionChange(ctx, tt.current, tt.next)) + }, + ) + } +} + +// checkTraceError is a test helper that converts trace.IsXXXError into a require.ErrorAssertionFunc +func checkTraceError(check func(error) bool) require.ErrorAssertionFunc { + return func(t require.TestingT, err error, i ...interface{}) { + require.True(t, check(err), i...) + } +} + +func TestFailoverGetter_GetVersion(t *testing.T) { + t.Parallel() + + // Test setup + ctx := context.Background() + tests := []struct { + name string + getters []Getter + expectResult string + expectErr require.ErrorAssertionFunc + }{ + { + name: "nil", + getters: nil, + expectResult: "", + expectErr: checkTraceError(trace.IsNotFound), + }, + { + name: "empty", + getters: []Getter{}, + expectResult: "", + expectErr: checkTraceError(trace.IsNotFound), + }, + { + name: "first getter success", + getters: []Getter{ + StaticGetter{version: semverMid}, + StaticGetter{version: semverHigh}, + }, + expectResult: semverMid, + expectErr: require.NoError, + }, + { + name: "first getter failure", + getters: []Getter{ + StaticGetter{err: trace.LimitExceeded("got rate-limited")}, + StaticGetter{version: semverHigh}, + }, + expectResult: "", + expectErr: checkTraceError(trace.IsLimitExceeded), + }, + { + name: "first getter skipped, second getter success", + getters: []Getter{ + StaticGetter{err: trace.NotImplemented("proxy does not seem to implement RFD-184")}, + StaticGetter{version: semverHigh}, + }, + expectResult: semverHigh, + expectErr: require.NoError, + }, + { + name: "first getter skipped, second getter failure", + getters: []Getter{ + StaticGetter{err: trace.NotImplemented("proxy does not seem to implement RFD-184")}, + StaticGetter{err: trace.LimitExceeded("got rate-limited")}, + }, + expectResult: "", + expectErr: checkTraceError(trace.IsLimitExceeded), + }, + { + name: "first getter skipped, second getter skipped", + getters: []Getter{ + StaticGetter{err: trace.NotImplemented("proxy does not seem to implement RFD-184")}, + StaticGetter{err: trace.NotImplemented("proxy does not seem to implement RFD-184")}, + }, + expectResult: "", + expectErr: checkTraceError(trace.IsNotFound), + }, + } + for _, tt := range tests { + t.Run( + tt.name, func(t *testing.T) { + // Test execution + getter := FailoverGetter(tt.getters) + result, err := getter.GetVersion(ctx) + require.Equal(t, tt.expectResult, result) + tt.expectErr(t, err) + }, + ) } } diff --git a/lib/autoupdate/rolloutcontroller/client.go b/lib/autoupdate/rollout/client.go similarity index 89% rename from lib/autoupdate/rolloutcontroller/client.go rename to lib/autoupdate/rollout/client.go index 4dead0f9dee19..bde2267d095de 100644 --- a/lib/autoupdate/rolloutcontroller/client.go +++ b/lib/autoupdate/rollout/client.go @@ -16,12 +16,13 @@ * along with this program. If not, see . */ -package rolloutcontroller +package rollout import ( "context" autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + "github.com/gravitational/teleport/api/types" ) // Client is the subset of the Teleport client RPCs the controller needs. @@ -43,4 +44,7 @@ type Client interface { // DeleteAutoUpdateAgentRollout deletes the AutoUpdateAgentRollout singleton resource. DeleteAutoUpdateAgentRollout(ctx context.Context) error + + // GetClusterMaintenanceConfig loads the current maintenance config singleton. + GetClusterMaintenanceConfig(ctx context.Context) (types.ClusterMaintenanceConfig, error) } diff --git a/lib/autoupdate/rolloutcontroller/client_test.go b/lib/autoupdate/rollout/client_test.go similarity index 80% rename from lib/autoupdate/rolloutcontroller/client_test.go rename to lib/autoupdate/rollout/client_test.go index ba204ffb77db3..782251a562025 100644 --- a/lib/autoupdate/rolloutcontroller/client_test.go +++ b/lib/autoupdate/rollout/client_test.go @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -package rolloutcontroller +package rollout import ( "context" @@ -24,12 +24,15 @@ import ( "github.com/stretchr/testify/require" "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/protoadapt" "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + "github.com/gravitational/teleport/api/types" + apiutils "github.com/gravitational/teleport/api/utils" ) // mockClient is a mock implementation if the Client interface for testing purposes. -// This is used to precisely check which calls are made by the Reconciler during tests. +// This is used to precisely check which calls are made by the reconciler during tests. // Use newMockClient to create one from stubs. Once the test is over, you must call // mockClient.checkIfEmpty to validate all expected calls were made. type mockClient struct { @@ -39,6 +42,7 @@ type mockClient struct { createAutoUpdateAgentRollout *createUpdateHandler[*autoupdate.AutoUpdateAgentRollout] updateAutoUpdateAgentRollout *createUpdateHandler[*autoupdate.AutoUpdateAgentRollout] deleteAutoUpdateAgentRollout *deleteHandler + getClusterMaintenanceConfig *legacyGetHandler[*types.ClusterMaintenanceConfigV1] } func (m mockClient) GetAutoUpdateConfig(ctx context.Context) (*autoupdate.AutoUpdateConfig, error) { @@ -65,6 +69,10 @@ func (m mockClient) DeleteAutoUpdateAgentRollout(ctx context.Context) error { return m.deleteAutoUpdateAgentRollout.handle(ctx) } +func (m mockClient) GetClusterMaintenanceConfig(ctx context.Context) (types.ClusterMaintenanceConfig, error) { + return m.getClusterMaintenanceConfig.handle(ctx) +} + func (m mockClient) checkIfEmpty(t *testing.T) { require.True(t, m.getAutoUpdateConfig.isEmpty(), "Get autoupdate_config mock not empty") require.True(t, m.getAutoUpdateVersion.isEmpty(), "Get autoupdate_version mock not empty") @@ -72,6 +80,7 @@ func (m mockClient) checkIfEmpty(t *testing.T) { require.True(t, m.createAutoUpdateAgentRollout.isEmpty(), "Create autoupdate_agent_rollout mock not empty") require.True(t, m.updateAutoUpdateAgentRollout.isEmpty(), "Update autoupdate_agent_rollout mock not empty") require.True(t, m.deleteAutoUpdateAgentRollout.isEmpty(), "Delete autoupdate_agent_rollout mock not empty") + require.True(t, m.getClusterMaintenanceConfig.isEmpty(), "Get cluster_maintenance config mock not empty") } func newMockClient(t *testing.T, stubs mockClientStubs) *mockClient { @@ -86,6 +95,7 @@ func newMockClient(t *testing.T, stubs mockClientStubs) *mockClient { createAutoUpdateAgentRollout: &createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.createRolloutExpects, stubs.createRolloutAnswers}, updateAutoUpdateAgentRollout: &createUpdateHandler[*autoupdate.AutoUpdateAgentRollout]{t, stubs.updateRolloutExpects, stubs.updateRolloutAnswers}, deleteAutoUpdateAgentRollout: &deleteHandler{t, stubs.deleteRolloutAnswers}, + getClusterMaintenanceConfig: &legacyGetHandler[*types.ClusterMaintenanceConfigV1]{t, stubs.cmcAnswers}, } } @@ -98,6 +108,7 @@ type mockClientStubs struct { updateRolloutAnswers []callAnswer[*autoupdate.AutoUpdateAgentRollout] updateRolloutExpects []require.ValueAssertionFunc deleteRolloutAnswers []error + cmcAnswers []callAnswer[*types.ClusterMaintenanceConfigV1] } type callAnswer[T any] struct { @@ -131,6 +142,35 @@ func (h *getHandler[T]) isEmpty() bool { return len(h.answers) == 0 } +// legacyGetHandler is a getHandler for legacy teleport types (gogo proto-based) +// A first iteration was trying to be smart and reuse the getHandler logic +// by converting fixtures before to protoadapt.MessageV2, and converting back to +// protoadapt.MessageV1 before returning. The resulting code was hard to read and +// duplicating the logic seems more maintainable. +type legacyGetHandler[T protoadapt.MessageV1] struct { + t *testing.T + answers []callAnswer[T] +} + +func (h *legacyGetHandler[T]) handle(_ context.Context) (T, error) { + if len(h.answers) == 0 { + require.Fail(h.t, "no answers left") + } + + entry := h.answers[0] + h.answers = h.answers[1:] + + // We need to deep copy because the reconciler might do updates in place. + // We don't want the original resource to be edited as this would mess with other tests. + result := apiutils.CloneProtoMsg(entry.result) + return result, entry.err +} + +// isEmpty returns true only if all stubs were consumed +func (h *legacyGetHandler[T]) isEmpty() bool { + return len(h.answers) == 0 +} + // createUpdateHandler is used in a mock client to answer create or update resource requests during tests (any request whose arity is 2). // It first validates the input using the provided validation function, then it returns the predefined answer and error. // If there are no stubs left it fails the test. diff --git a/lib/autoupdate/rollout/controller.go b/lib/autoupdate/rollout/controller.go new file mode 100644 index 0000000000000..0d9ee97d23f98 --- /dev/null +++ b/lib/autoupdate/rollout/controller.go @@ -0,0 +1,155 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rollout + +import ( + "context" + "log/slog" + "time" + + "github.com/gravitational/trace" + "github.com/jonboulle/clockwork" + "github.com/prometheus/client_golang/prometheus" + + "github.com/gravitational/teleport" + "github.com/gravitational/teleport/api/utils/retryutils" + "github.com/gravitational/teleport/lib/utils/interval" +) + +const ( + defaultReconcilerPeriod = time.Minute +) + +// Controller wakes up every minute to reconcile the autoupdate_agent_rollout resource. +// See the reconciler godoc for more details about the reconciliation process. +// We currently wake up every minute, in the future we might decide to also watch for events +// (from autoupdate_config and autoupdate_version changefeed) to react faster. +type Controller struct { + // TODO(hugoShaka) add prometheus metrics describing the reconciliation status + reconciler reconciler + clock clockwork.Clock + log *slog.Logger + period time.Duration + metrics *metrics +} + +// NewController creates a new Controller for the autoupdate_agent_rollout kind. +// The period can be specified to control the sync frequency. This is mainly +// used to speed up tests or for demo purposes. When empty, the controller picks +// a sane default value. +func NewController(client Client, log *slog.Logger, clock clockwork.Clock, period time.Duration, reg prometheus.Registerer) (*Controller, error) { + if client == nil { + return nil, trace.BadParameter("missing client") + } + if log == nil { + return nil, trace.BadParameter("missing log") + } + if clock == nil { + return nil, trace.BadParameter("missing clock") + } + if reg == nil { + return nil, trace.BadParameter("missing prometheus.Registerer") + } + + if period <= 0 { + period = defaultReconcilerPeriod + } + + log = log.With(teleport.ComponentLabel, teleport.ComponentRolloutController) + + haltOnError, err := newHaltOnErrorStrategy(log) + if err != nil { + return nil, trace.Wrap(err, "failed to initialize halt-on-error strategy") + } + timeBased, err := newTimeBasedStrategy(log) + if err != nil { + return nil, trace.Wrap(err, "failed to initialize time-based strategy") + } + + m, err := newMetrics(reg) + if err != nil { + return nil, trace.Wrap(err, "failed to initialize metrics") + } + + return &Controller{ + metrics: m, + clock: clock, + log: log, + reconciler: reconciler{ + clt: client, + log: log, + clock: clock, + metrics: m, + rolloutStrategies: []rolloutStrategy{ + timeBased, + haltOnError, + }, + }, + period: period, + }, nil +} + +// Run the autoupdate_agent_rollout controller. This function returns only when its context is canceled. +func (c *Controller) Run(ctx context.Context) error { + config := interval.Config{ + Duration: c.period, + FirstDuration: c.period, + Jitter: retryutils.SeventhJitter, + Clock: c.clock, + } + ticker := interval.New(config) + defer ticker.Stop() + + c.log.InfoContext(ctx, "Starting autoupdate_agent_rollout controller", "period", c.period) + for { + select { + case <-ctx.Done(): + c.log.InfoContext(ctx, "Stopping autoupdate_agent_rollout controller", "reason", ctx.Err()) + return ctx.Err() + case <-ticker.Next(): + c.log.DebugContext(ctx, "Reconciling autoupdate_agent_rollout") + if err := c.tryAndCatch(ctx); err != nil { + c.log.ErrorContext(ctx, "Failed to reconcile autoudpate_agent_controller", "error", err) + } + } + } +} + +// tryAndCatch tries to run the controller reconciliation logic and recovers from potential panic by converting them +// into errors. This ensures that a critical bug in the reconciler cannot bring down the whole Teleport cluster. +func (c *Controller) tryAndCatch(ctx context.Context) (err error) { + startTime := c.clock.Now() + // If something terribly bad happens during the reconciliation, we recover and return an error + defer func() { + if r := recover(); r != nil { + c.log.ErrorContext(ctx, "Recovered from panic in the autoupdate_agent_rollout controller", "panic", r) + err = trace.NewAggregate(err, trace.Errorf("Panic recovered during reconciliation: %v", r)) + c.metrics.observeReconciliation(metricsReconciliationResultLabelValuePanic, c.clock.Now().Sub(startTime)) + } + }() + + err = trace.Wrap(c.reconciler.reconcile(ctx)) + endTime := c.clock.Now() + result := metricsReconciliationResultLabelValueSuccess + if err != nil { + result = metricsReconciliationResultLabelValueFail + } + c.metrics.observeReconciliation(result, endTime.Sub(startTime)) + return +} diff --git a/lib/autoupdate/rollout/metrics.go b/lib/autoupdate/rollout/metrics.go new file mode 100644 index 0000000000000..0f68ef756510e --- /dev/null +++ b/lib/autoupdate/rollout/metrics.go @@ -0,0 +1,360 @@ +/* + * Teleport + * Copyright (C) 2025 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rollout + +import ( + "fmt" + "maps" + "slices" + "strconv" + "strings" + "sync" + "time" + + "github.com/gravitational/trace" + "github.com/prometheus/client_golang/prometheus" + "golang.org/x/exp/constraints" + + "github.com/gravitational/teleport" + autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + "github.com/gravitational/teleport/api/types/autoupdate" +) + +const ( + metricsSubsystem = "agent_autoupdates" + metricVersionLabelRetention = 24 * time.Hour +) + +type metrics struct { + // lock protects previousVersions and groupCount. + // This should only be acquired by setVersionMetric. + lock sync.Mutex + + // previousVersions is a list of the version we exported metrics for. + // We track those to zero every old version if metrics labels contain the version. + previousVersions map[string]time.Time + groupCount int + + // controller metrics + reconciliations *prometheus.CounterVec + reconciliationDuration *prometheus.HistogramVec + reconciliationTries *prometheus.CounterVec + reconciliationTryDuration *prometheus.HistogramVec + + // resource spec metrics + versionPresent prometheus.Gauge + versionStart *prometheus.GaugeVec + versionTarget *prometheus.GaugeVec + versionMode prometheus.Gauge + + configPresent prometheus.Gauge + configMode prometheus.Gauge + + rolloutPresent prometheus.Gauge + rolloutStart *prometheus.GaugeVec + rolloutTarget *prometheus.GaugeVec + rolloutMode prometheus.Gauge + rolloutStrategy *prometheus.GaugeVec + + // rollout status metrics + rolloutTimeOverride prometheus.Gauge + rolloutState prometheus.Gauge + rolloutGroupState *prometheus.GaugeVec +} + +const ( + metricsReconciliationResultLabelName = "result" + metricsReconciliationResultLabelValueFail = "fail" + metricsReconciliationResultLabelValuePanic = "panic" + metricsReconciliationResultLabelValueRetry = "retry" + metricsReconciliationResultLabelValueSuccess = "success" + + metricsGroupNumberLabelName = "group_number" + metricsVersionLabelName = "version" + + metricsStrategyLabelName = "strategy" +) + +func newMetrics(reg prometheus.Registerer) (*metrics, error) { + m := metrics{ + previousVersions: make(map[string]time.Time), + reconciliations: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "reconciliations_total", + Help: "Count the rollout reconciliations triggered by the controller, and their result (success, failure, panic). One reconciliation might imply several tries in case of conflict.", + }, []string{metricsReconciliationResultLabelName}), + reconciliationDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "reconciliation_duration_seconds", + Help: "Time spent reconciling the autoupdate_agent_rollout resource. One reconciliation might imply several tries in case of conflict.", + }, []string{metricsReconciliationResultLabelName}), + reconciliationTries: prometheus.NewCounterVec(prometheus.CounterOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "reconciliation_tries_total", + Help: "Count the rollout reconciliations tried by the controller, and their result (success, failure, conflict).", + }, []string{metricsReconciliationResultLabelName}), + reconciliationTryDuration: prometheus.NewHistogramVec(prometheus.HistogramOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "reconciliation_try_duration_seconds", + Help: "Time spent trying to reconcile the autoupdate_agent_rollout resource.", + }, []string{metricsReconciliationResultLabelName}), + + versionPresent: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "version_present", + Help: "Boolean describing if an autoupdate_version resource exists in Teleport and its 'spec.agents' field is not nil.", + }), + versionTarget: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "version_target", + Help: "Metric describing the agent target version from the autoupdate_version resource.", + }, []string{metricsVersionLabelName}), + versionStart: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "version_start", + Help: "Metric describing the agent start version from the autoupdate_version resource.", + }, []string{"version"}), + versionMode: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "version_mode", + Help: fmt.Sprintf("Metric describing the agent update mode from the autoupdate_version resource. %s", valuesHelpString(codeToAgentMode)), + }), + + configPresent: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "config_present", + Help: "Boolean describing if an autoupdate_config resource exists in Teleport and its 'spec.agents' field is not nil.", + }), + configMode: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "config_mode", + Help: fmt.Sprintf("Metric describing the agent update mode from the autoupdate_agent_config resource. %s", valuesHelpString(codeToAgentMode)), + }), + + rolloutPresent: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "rollout_present", + Help: "Boolean describing if an autoupdate_agent_rollout resource exists in Teleport.", + }), + rolloutTarget: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "rollout_target", + Help: "Metric describing the agent target version from the autoupdate_gent_rollout resource.", + }, []string{metricsVersionLabelName}), + rolloutStart: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "rollout_start", + Help: "Metric describing the agent start version from the autoupdate_agent_rollout resource.", + }, []string{metricsVersionLabelName}), + rolloutMode: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "rollout_mode", + Help: fmt.Sprintf("Metric describing the agent update mode from the autoupdate_agent_rollout resource. %s", valuesHelpString(codeToAgentMode)), + }), + rolloutStrategy: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "rollout_strategy", + Help: "Metric describing the strategy of the autoupdate_agent_rollout resource.", + }, []string{metricsStrategyLabelName}), + rolloutTimeOverride: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "rollout_time_override_timestamp_seconds", + Help: "Describes the autoupdate_agent_rollout time override if set in (seconds since epoch). Zero means no time override.", + }), + rolloutState: prometheus.NewGauge(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "rollout_state", + Help: fmt.Sprintf("Describes the autoupdate_agent_rollout state. %s", valuesHelpString(autoupdatepb.AutoUpdateAgentRolloutState_name)), + }), + rolloutGroupState: prometheus.NewGaugeVec(prometheus.GaugeOpts{ + Namespace: teleport.MetricNamespace, + Subsystem: metricsSubsystem, + Name: "rollout_group_state", + Help: fmt.Sprintf("Describes the autoupdate_agent_rollout state for each group. Groups are identified by their position in the schedule. %s", valuesHelpString(autoupdatepb.AutoUpdateAgentGroupState_name)), + }, []string{metricsGroupNumberLabelName}), + } + + errs := trace.NewAggregate( + reg.Register(m.reconciliations), + reg.Register(m.reconciliationDuration), + reg.Register(m.reconciliationTries), + reg.Register(m.reconciliationTryDuration), + + reg.Register(m.versionPresent), + reg.Register(m.versionTarget), + reg.Register(m.versionStart), + reg.Register(m.versionMode), + reg.Register(m.configPresent), + reg.Register(m.configMode), + reg.Register(m.rolloutPresent), + reg.Register(m.rolloutTarget), + reg.Register(m.rolloutStart), + reg.Register(m.rolloutMode), + reg.Register(m.rolloutStrategy), + + reg.Register(m.rolloutTimeOverride), + reg.Register(m.rolloutState), + reg.Register(m.rolloutGroupState), + ) + + return &m, errs +} + +func valuesHelpString[K constraints.Integer](possibleValues map[K]string) string { + sb := strings.Builder{} + sb.WriteString("Possible values are") + + // maps are nor ordered, so we must sort keys to consistently generate the help message. + keys := maps.Keys(possibleValues) + for _, k := range slices.Sorted(keys) { + sb.WriteString(fmt.Sprintf(" %d:%s", k, possibleValues[k])) + } + + sb.WriteRune('.') + return sb.String() +} + +func (m *metrics) setVersionMetric(version string, metric *prometheus.GaugeVec, now time.Time) { + m.lock.Lock() + defer m.lock.Unlock() + + // for every version we've seen + for v, ts := range m.previousVersions { + labels := prometheus.Labels{metricsVersionLabelName: v} + // if the version is too old, we forget about it to limit cardinality + if now.After(ts.Add(metricVersionLabelRetention)) { + metric.Delete(labels) + delete(m.previousVersions, v) + } else { + // Else we just mark the version as not set anymore + metric.With(labels).Set(0) + } + } + // We set the new version + metric.With(prometheus.Labels{metricsVersionLabelName: version}).Set(1) + m.previousVersions[version] = now +} + +func (m *metrics) observeReconciliation(result string, duration time.Duration) { + m.reconciliations.With(prometheus.Labels{metricsReconciliationResultLabelName: result}).Inc() + m.reconciliationDuration.With(prometheus.Labels{metricsReconciliationResultLabelName: result}).Observe(duration.Seconds()) +} + +func (m *metrics) observeReconciliationTry(result string, duration time.Duration) { + m.reconciliationTries.With(prometheus.Labels{metricsReconciliationResultLabelName: result}).Inc() + m.reconciliationTryDuration.With(prometheus.Labels{metricsReconciliationResultLabelName: result}).Observe(duration.Seconds()) +} + +func (m *metrics) observeConfig(config *autoupdatepb.AutoUpdateConfig) { + if config.GetSpec().GetAgents() == nil { + m.configPresent.Set(0) + m.configMode.Set(float64(agentModeCode[defaultConfigMode])) + return + } + m.configPresent.Set(1) + m.configMode.Set(float64(agentModeCode[config.GetSpec().GetAgents().GetMode()])) +} + +func (m *metrics) observeVersion(version *autoupdatepb.AutoUpdateVersion, now time.Time) { + if version.GetSpec().GetAgents() == nil { + m.versionPresent.Set(0) + m.versionMode.Set(float64(agentModeCode[defaultConfigMode])) + return + } + m.versionPresent.Set(1) + m.versionMode.Set(float64(agentModeCode[version.GetSpec().GetAgents().GetMode()])) + m.setVersionMetric(version.GetSpec().GetAgents().GetStartVersion(), m.versionStart, now) + m.setVersionMetric(version.GetSpec().GetAgents().GetTargetVersion(), m.versionTarget, now) +} + +func (m *metrics) setGroupStates(groups []*autoupdatepb.AutoUpdateAgentRolloutStatusGroup) { + m.lock.Lock() + defer m.lock.Unlock() + + // Set the state for the groups specified in the rollout. + for i, group := range groups { + labels := prometheus.Labels{metricsGroupNumberLabelName: strconv.Itoa(i)} + m.rolloutGroupState.With(labels).Set(float64(group.State)) + } + + // If we have as many or more groups than before, no cleanup to do. + if len(groups) >= m.groupCount { + m.groupCount = len(groups) + return + } + + // If we have less groups than before, we must unset the metrics for higher group numbers. + for i := len(groups); i < m.groupCount; i++ { + labels := prometheus.Labels{metricsGroupNumberLabelName: strconv.Itoa(i)} + m.rolloutGroupState.With(labels).Set(float64(0)) + } + m.groupCount = len(groups) +} + +func (m *metrics) observeRollout(rollout *autoupdatepb.AutoUpdateAgentRollout, now time.Time) { + if rollout.GetSpec() == nil { + m.rolloutPresent.Set(0) + m.rolloutMode.Set(0) + } else { + m.rolloutPresent.Set(1) + m.rolloutMode.Set(float64(agentModeCode[rollout.GetSpec().GetAutoupdateMode()])) + m.setVersionMetric(rollout.GetSpec().GetStartVersion(), m.rolloutStart, now) + m.setVersionMetric(rollout.GetSpec().GetTargetVersion(), m.rolloutTarget, now) + } + + m.setStrategyMetric(rollout.GetSpec().GetStrategy(), m.rolloutStrategy) + + if to := rollout.GetStatus().GetTimeOverride().AsTime(); !(to.IsZero() || to.Unix() == 0) { + m.rolloutTimeOverride.Set(float64(to.Second())) + } else { + m.rolloutTimeOverride.Set(0) + } + + m.rolloutState.Set(float64(rollout.GetStatus().GetState())) + m.setGroupStates(rollout.GetStatus().GetGroups()) +} + +var strategies = []string{autoupdate.AgentsStrategyHaltOnError, autoupdate.AgentsStrategyTimeBased} + +func (m *metrics) setStrategyMetric(strategy string, metric *prometheus.GaugeVec) { + for _, s := range strategies { + if s == strategy { + metric.With(prometheus.Labels{metricsStrategyLabelName: s}).Set(1) + } else { + metric.With(prometheus.Labels{metricsStrategyLabelName: s}).Set(0) + } + } +} diff --git a/lib/autoupdate/rollout/metrics_test.go b/lib/autoupdate/rollout/metrics_test.go new file mode 100644 index 0000000000000..948df1aca51e6 --- /dev/null +++ b/lib/autoupdate/rollout/metrics_test.go @@ -0,0 +1,291 @@ +/* + * Teleport + * Copyright (C) 2025 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rollout + +import ( + "testing" + "time" + + "github.com/jonboulle/clockwork" + "github.com/prometheus/client_golang/prometheus" + dto "github.com/prometheus/client_model/go" + "github.com/stretchr/testify/require" + + autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" +) + +func newMetricsForTest(t *testing.T) *metrics { + reg := prometheus.NewRegistry() + m, err := newMetrics(reg) + require.NoError(t, err) + return m +} + +func Test_setVersionMetric(t *testing.T) { + now := clockwork.NewFakeClock().Now() + aMinuteAgo := now.Add(-time.Minute) + aWeekAgo := now.Add(-time.Hour * 24 * 7) + testVersion := "1.2.3-alpha.1" + previousVersion := "1.2.1" + testMetricLabels := []string{metricsVersionLabelName} + tests := []struct { + name string + previousVersions map[string]time.Time + previousMetrics map[string]float64 + expectedVersions map[string]time.Time + expectedMetrics map[string]float64 + }{ + { + name: "no versions", + previousVersions: map[string]time.Time{}, + previousMetrics: map[string]float64{}, + expectedVersions: map[string]time.Time{ + testVersion: now, + }, + expectedMetrics: map[string]float64{ + testVersion: 1, + }, + }, + { + name: "same version, not expired", + previousVersions: map[string]time.Time{ + testVersion: aMinuteAgo, + }, + previousMetrics: map[string]float64{ + testVersion: 1, + }, + expectedVersions: map[string]time.Time{ + testVersion: now, + }, + expectedMetrics: map[string]float64{ + testVersion: 1, + }, + }, + { + name: "same version, expired", + previousVersions: map[string]time.Time{ + testVersion: aWeekAgo, + }, + previousMetrics: map[string]float64{ + testVersion: 1, + }, + expectedVersions: map[string]time.Time{ + testVersion: now, + }, + expectedMetrics: map[string]float64{ + testVersion: 1, + }, + }, + { + name: "old non-expired versions", + previousVersions: map[string]time.Time{ + previousVersion: aMinuteAgo, + }, + previousMetrics: map[string]float64{ + previousVersion: 1, + }, + expectedVersions: map[string]time.Time{ + previousVersion: aMinuteAgo, + testVersion: now, + }, + expectedMetrics: map[string]float64{ + previousVersion: 0, + testVersion: 1, + }, + }, + { + name: "old expired versions", + previousVersions: map[string]time.Time{ + previousVersion: aWeekAgo, + }, + previousMetrics: map[string]float64{ + previousVersion: 1, + }, + expectedVersions: map[string]time.Time{ + testVersion: now, + }, + expectedMetrics: map[string]float64{ + testVersion: 1, + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + // Test setup: create metrics and load previous metrics. + m := metrics{ + previousVersions: test.previousVersions, + } + + testGauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{}, testMetricLabels) + for k, v := range test.previousMetrics { + testGauge.With(prometheus.Labels{testMetricLabels[0]: k}).Set(v) + } + + // Test execution: set the version metric. + m.setVersionMetric(testVersion, testGauge, now) + + // Test validation: collect the metrics and check that the state match what we expect. + require.Equal(t, test.expectedVersions, m.previousVersions) + metricsChan := make(chan prometheus.Metric, 100) + testGauge.Collect(metricsChan) + close(metricsChan) + metricsResult := collectMetricsByLabel(t, metricsChan, testMetricLabels[0]) + require.Equal(t, test.expectedMetrics, metricsResult) + }) + } +} + +func Test_setGroupStates(t *testing.T) { + testMetricLabels := []string{metricsGroupNumberLabelName} + testGroups := []*autoupdatepb.AutoUpdateAgentRolloutStatusGroup{ + {State: autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE}, + {State: autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE}, + {State: autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED}, + } + tests := []struct { + name string + previousGroupCount int + previousMetrics map[string]float64 + expectedGroupCount int + expectedMetrics map[string]float64 + }{ + { + name: "no groups", + previousGroupCount: 0, + previousMetrics: map[string]float64{}, + expectedGroupCount: len(testGroups), + expectedMetrics: map[string]float64{ + "0": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE), + "1": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE), + "2": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED), + }, + }, + { + name: "same groups, same states", + previousGroupCount: len(testGroups), + previousMetrics: map[string]float64{ + "0": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE), + "1": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE), + "2": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED), + }, + expectedGroupCount: len(testGroups), + expectedMetrics: map[string]float64{ + "0": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE), + "1": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE), + "2": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED), + }, + }, + { + name: "same groups, different states", + previousGroupCount: len(testGroups), + previousMetrics: map[string]float64{ + "0": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE), + "1": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED), + "2": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED), + }, + expectedGroupCount: len(testGroups), + expectedMetrics: map[string]float64{ + "0": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE), + "1": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE), + "2": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED), + }, + }, + { + name: "less groups", + previousGroupCount: 1, + previousMetrics: map[string]float64{ + "0": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE), + }, + expectedGroupCount: len(testGroups), + expectedMetrics: map[string]float64{ + "0": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE), + "1": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE), + "2": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED), + }, + }, + { + name: "more groups", + previousGroupCount: 5, + previousMetrics: map[string]float64{ + "0": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE), + "1": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED), + "2": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED), + "3": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED), + "4": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED), + }, + expectedGroupCount: len(testGroups), + expectedMetrics: map[string]float64{ + "0": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE), + "1": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE), + "2": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED), + "3": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSPECIFIED), + "4": float64(autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSPECIFIED), + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + t.Parallel() + + testGauge := prometheus.NewGaugeVec(prometheus.GaugeOpts{}, testMetricLabels) + for k, v := range test.previousMetrics { + testGauge.With(prometheus.Labels{testMetricLabels[0]: k}).Set(v) + } + + // Test setup: create metrics and load previous metrics. + m := metrics{ + groupCount: test.previousGroupCount, + rolloutGroupState: testGauge, + } + + // Test execution: set the version metric. + m.setGroupStates(testGroups) + + // Test validation: collect the metrics and check that the state match what we expect. + require.Equal(t, test.expectedGroupCount, m.groupCount) + metricsChan := make(chan prometheus.Metric, 100) + m.rolloutGroupState.Collect(metricsChan) + close(metricsChan) + metricsResult := collectMetricsByLabel(t, metricsChan, testMetricLabels[0]) + require.Equal(t, test.expectedMetrics, metricsResult) + + }) + } +} + +func collectMetricsByLabel(t *testing.T, ch <-chan prometheus.Metric, labelName string) map[string]float64 { + t.Helper() + result := make(map[string]float64) + + var protoMetric dto.Metric + for { + m, ok := <-ch + if !ok { + return result + } + require.NoError(t, m.Write(&protoMetric)) + ll := protoMetric.GetLabel() + require.Len(t, ll, 1) + require.Equal(t, labelName, ll[0].GetName()) + gg := protoMetric.GetGauge() + require.NotNil(t, gg) + result[ll[0].GetValue()] = gg.GetValue() + } +} diff --git a/lib/autoupdate/rollout/reconciler.go b/lib/autoupdate/rollout/reconciler.go new file mode 100644 index 0000000000000..96ebf8791f257 --- /dev/null +++ b/lib/autoupdate/rollout/reconciler.go @@ -0,0 +1,440 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rollout + +import ( + "context" + "log/slog" + "sync" + "time" + + "github.com/gravitational/trace" + "github.com/jonboulle/clockwork" + "google.golang.org/protobuf/proto" + "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + "github.com/gravitational/teleport/api/types" + update "github.com/gravitational/teleport/api/types/autoupdate" + "github.com/gravitational/teleport/api/utils" +) + +const ( + reconciliationTimeout = 30 * time.Second + defaultConfigMode = update.AgentsUpdateModeEnabled + defaultStrategy = update.AgentsStrategyHaltOnError + maxConflictRetry = 3 + + defaultGroupName = "default" + defaultCMCGroupName = defaultGroupName + "-cmc" + defaultStartHour = 12 +) + +var ( + // defaultUpdateDays is the default list of days when groups can be updated. + defaultUpdateDays = []string{"Mon", "Tue", "Wed", "Thu"} +) + +// reconciler reconciles the AutoUpdateAgentRollout singleton based on the content of the AutoUpdateVersion and +// AutoUpdateConfig singletons. This reconciler is not based on the services.GenericReconciler because: +// - we reconcile 2 resources with one +// - both input and output are singletons, we don't need the multi resource logic nor stream/paginated APIs +type reconciler struct { + clt Client + log *slog.Logger + clock clockwork.Clock + metrics *metrics + + rolloutStrategies []rolloutStrategy + + // mutex ensures we only run one reconciliation at a time + mutex sync.Mutex +} + +// reconcile the AutoUpdateAgentRollout singleton. The reconciliation can fail because of a conflict (multiple auths +// are racing), in this case we retry the reconciliation immediately. +func (r *reconciler) reconcile(ctx context.Context) error { + r.mutex.Lock() + defer r.mutex.Unlock() + + ctx, cancel := context.WithTimeout(ctx, reconciliationTimeout) + defer cancel() + + var startTime time.Time + tries := 0 + var err error + for tries < maxConflictRetry { + tries++ + select { + case <-ctx.Done(): + return ctx.Err() + default: + startTime = r.clock.Now() + err = r.tryReconcile(ctx) + duration := r.clock.Since(startTime) + switch { + case err == nil: + r.metrics.observeReconciliationTry(metricsReconciliationResultLabelValueSuccess, duration) + return nil + case trace.IsCompareFailed(err), trace.IsNotFound(err): + // The resource changed since we last saw it + // We must have raced against another auth + // Let's retry the reconciliation + r.log.DebugContext(ctx, "retrying reconciliation", "error", err) + r.metrics.observeReconciliationTry(metricsReconciliationResultLabelValueRetry, duration) + default: + // error is non-nil and non-retryable + r.metrics.observeReconciliationTry(metricsReconciliationResultLabelValueFail, duration) + return trace.Wrap(err, "failed to reconcile rollout") + } + } + } + return trace.CompareFailed("compare failed, tried %d times, last error: %s", tries, err) +} + +// tryReconcile tries to reconcile the AutoUpdateAgentRollout singleton. +// This function should be nilpotent if the AutoUpdateAgentRollout is already up-to-date. +// The creation/update/deletion can fail with a trace.CompareFailedError or trace.NotFoundError +// if the resource change while we were computing it. +// The caller must handle those error and retry the reconciliation. +func (r *reconciler) tryReconcile(ctx context.Context) (err error) { + // get autoupdate_config + var config *autoupdate.AutoUpdateConfig + if c, err := r.clt.GetAutoUpdateConfig(ctx); err == nil { + config = c + } else if !trace.IsNotFound(err) { + return trace.Wrap(err, "getting autoupdate_config") + } + r.metrics.observeConfig(config) + + // get autoupdate_version + var version *autoupdate.AutoUpdateVersion + if v, err := r.clt.GetAutoUpdateVersion(ctx); err == nil { + version = v + } else if !trace.IsNotFound(err) { + return trace.Wrap(err, "getting autoupdate version") + } + r.metrics.observeVersion(version, r.clock.Now()) + + // get autoupdate_agent_rollout + rolloutExists := true + rollout, err := r.clt.GetAutoUpdateAgentRollout(ctx) + if err != nil && !trace.IsNotFound(err) { + return trace.Wrap(err, "getting autoupdate_agent_rollout") + } + if trace.IsNotFound(err) { + // rollout doesn't exist yet, we'll need to call Create instead of Update. + rolloutExists = false + } + + // We observe the current rollout. + r.metrics.observeRollout(rollout, r.clock.Now()) + // If the reconciliation succeeded, we observe the rollout again to reflect its new values. + defer func() { + if err != nil { + return + } + r.metrics.observeRollout(rollout, r.clock.Now()) + }() + + // if autoupdate_version does not exist or does not contain spec.agents, we should not configure a rollout + if version.GetSpec().GetAgents() == nil { + if !rolloutExists { + // the rollout doesn't exist, nothing to do + return nil + } + // the rollout exists, we must delete it. We also clear the rollout object for metrics purposes. + rollout = nil + return r.clt.DeleteAutoUpdateAgentRollout(ctx) + } + + // compute what the spec should look like + newSpec, err := r.buildRolloutSpec(config.GetSpec().GetAgents(), version.GetSpec().GetAgents()) + if err != nil { + return trace.Wrap(err, "mutating rollout") + } + newStatus, err := r.computeStatus(ctx, rollout, newSpec, config.GetSpec().GetAgents().GetSchedules()) + if err != nil { + return trace.Wrap(err, "computing rollout status") + } + + // We compute if something changed. + specChanged := !proto.Equal(rollout.GetSpec(), newSpec) + statusChanged := !proto.Equal(rollout.GetStatus(), newStatus) + rolloutChanged := specChanged || statusChanged + + // if nothing changed, no need to update the resource + if !rolloutChanged { + r.log.DebugContext(ctx, "rollout unchanged") + return nil + } + + // if there are no existing rollout, we create a new one and set the status + if !rolloutExists { + r.log.DebugContext(ctx, "creating rollout") + rollout, err = update.NewAutoUpdateAgentRollout(newSpec) + rollout.Status = newStatus + if err != nil { + return trace.Wrap(err, "validating new rollout") + } + rollout, err = r.clt.CreateAutoUpdateAgentRollout(ctx, rollout) + return trace.Wrap(err, "creating rollout") + } + + r.log.DebugContext(ctx, "updating rollout") + // If there was a previous rollout, we update its spec and status and do an update. + // We don't create a new resource to keep the metadata containing the revision ID. + rollout.Spec = newSpec + rollout.Status = newStatus + err = update.ValidateAutoUpdateAgentRollout(rollout) + if err != nil { + return trace.Wrap(err, "validating mutated rollout") + } + rollout, err = r.clt.UpdateAutoUpdateAgentRollout(ctx, rollout) + return trace.Wrap(err, "updating rollout") +} + +func (r *reconciler) buildRolloutSpec(config *autoupdate.AutoUpdateConfigSpecAgents, version *autoupdate.AutoUpdateVersionSpecAgents) (*autoupdate.AutoUpdateAgentRolloutSpec, error) { + // reconcile mode + mode, err := getMode(config.GetMode(), version.GetMode()) + if err != nil { + return nil, trace.Wrap(err, "computing agent update mode") + } + + strategy := config.GetStrategy() + if strategy == "" { + strategy = defaultStrategy + } + + return &autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: version.GetStartVersion(), + TargetVersion: version.GetTargetVersion(), + Schedule: version.GetSchedule(), + AutoupdateMode: mode, + Strategy: strategy, + MaintenanceWindowDuration: config.GetMaintenanceWindowDuration(), + }, nil + +} + +// agentModeCode maps agents mode to integers. +// When config and version modes don't match, the lowest integer takes precedence. +var ( + agentModeCode = map[string]int{ + update.AgentsUpdateModeDisabled: 1, + update.AgentsUpdateModeSuspended: 2, + update.AgentsUpdateModeEnabled: 3, + } + codeToAgentMode = map[int]string{ + 1: update.AgentsUpdateModeDisabled, + 2: update.AgentsUpdateModeSuspended, + 3: update.AgentsUpdateModeEnabled, + } +) + +// getMode merges the agent modes coming from the version and config resources into a single mode. +// "disabled" takes precedence over "suspended", which takes precedence over "enabled". +func getMode(configMode, versionMode string) (string, error) { + if configMode == "" { + configMode = defaultConfigMode + } + if versionMode == "" { + return "", trace.BadParameter("version mode empty") + } + + configCode, ok := agentModeCode[configMode] + if !ok { + return "", trace.BadParameter("unsupported agent config mode: %v", configMode) + } + versionCode, ok := agentModeCode[versionMode] + if !ok { + return "", trace.BadParameter("unsupported agent version mode: %v", versionMode) + } + + // The lowest code takes precedence + if configCode <= versionCode { + return codeToAgentMode[configCode], nil + } + return codeToAgentMode[versionCode], nil +} + +// computeStatus computes the new rollout status based on the existing rollout, +// new rollout spec, and autoupdate_config. existingRollout might be nil if this +// is a new rollout. +// Even if the returned new status might be derived from the existing rollout +// status, it is a new deep-cloned structure. +func (r *reconciler) computeStatus( + ctx context.Context, + existingRollout *autoupdate.AutoUpdateAgentRollout, + newSpec *autoupdate.AutoUpdateAgentRolloutSpec, + configSchedules *autoupdate.AgentAutoUpdateSchedules, +) (*autoupdate.AutoUpdateAgentRolloutStatus, error) { + + var status *autoupdate.AutoUpdateAgentRolloutStatus + + // First, we check if a major spec change happened and we should reset the rollout status + shouldResetRollout := existingRollout.GetSpec().GetStartVersion() != newSpec.GetStartVersion() || + existingRollout.GetSpec().GetTargetVersion() != newSpec.GetTargetVersion() || + existingRollout.GetSpec().GetSchedule() != newSpec.GetSchedule() || + existingRollout.GetSpec().GetStrategy() != newSpec.GetStrategy() + + // We create a new status if the rollout should be reset or the previous status was nil + if shouldResetRollout || existingRollout.GetStatus() == nil { + status = new(autoupdate.AutoUpdateAgentRolloutStatus) + // We set the start time if this is a new rollout + status.StartTime = timestamppb.New(r.clock.Now()) + } else { + status = utils.CloneProtoMsg(existingRollout.GetStatus()) + } + + // Then, we check if the selected schedule uses groups + switch newSpec.GetSchedule() { + case update.AgentsScheduleImmediate: + // There are no groups with the immediate schedule, we must clean them + status.Groups = nil + return status, nil + case update.AgentsScheduleRegular: + // Regular schedule has groups, we will compute them after + default: + return nil, trace.BadParameter("unsupported agent schedule type %q", newSpec.GetSchedule()) + } + + // capture the current time to put it in the status update timestamps and to + // compute the group state changes + now := r.clock.Now() + + // If timeOverride is set to a non-zero value (we have two potential zeros, go time's zero and timestamppb's zero) + // we use this instead of the clock's time. + if timeOverride := status.GetTimeOverride().AsTime(); !(timeOverride.IsZero() || timeOverride.Unix() == 0) { + r.log.DebugContext(ctx, "reconciling with synthetic time instead of real time", + "time_override", timeOverride, + "real_time", now, + ) + now = timeOverride + } + + // If this is a new rollout or the rollout has been reset, we create groups from the config + groups := status.GetGroups() + var err error + if len(groups) == 0 { + groups, err = r.makeGroupsStatus(ctx, configSchedules, now) + if err != nil { + return nil, trace.Wrap(err, "creating groups status") + } + } + status.Groups = groups + + err = r.progressRollout(ctx, newSpec, status, now) + // Failing to progress the update is not a hard failure. + // We want to update the status even if something went wrong to surface the failed reconciliation and potential errors to the user. + if err != nil { + r.log.ErrorContext(ctx, "Errors encountered during rollout progress. Some groups might not get updated properly.", + "error", err) + } + + status.State = computeRolloutState(groups) + return status, nil +} + +// progressRollout picks the right rollout strategy and updates groups to progress the rollout. +// groups are updated in place. +// If an error is returned, the groups should still be upserted, depending on the strategy, +// failing to update a group might not be fatal (other groups can still progress independently). +func (r *reconciler) progressRollout(ctx context.Context, spec *autoupdate.AutoUpdateAgentRolloutSpec, status *autoupdate.AutoUpdateAgentRolloutStatus, now time.Time) error { + for _, strategy := range r.rolloutStrategies { + if strategy.name() == spec.GetStrategy() { + return strategy.progressRollout(ctx, spec, status, now) + } + } + return trace.NotImplemented("rollout strategy %q not implemented", spec.GetStrategy()) +} + +// makeGroupStatus creates the autoupdate_agent_rollout.status.groups based on the autoupdate_config. +// This should be called if the status groups have not been initialized or must be reset. +func (r *reconciler) makeGroupsStatus(ctx context.Context, schedules *autoupdate.AgentAutoUpdateSchedules, now time.Time) ([]*autoupdate.AutoUpdateAgentRolloutStatusGroup, error) { + configGroups := schedules.GetRegular() + if len(configGroups) == 0 { + defaultGroup, err := r.defaultConfigGroup(ctx) + if err != nil { + return nil, trace.Wrap(err, "retrieving default group") + } + configGroups = []*autoupdate.AgentAutoUpdateGroup{defaultGroup} + } + + groups := make([]*autoupdate.AutoUpdateAgentRolloutStatusGroup, len(configGroups)) + for i, group := range configGroups { + groups[i] = &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: group.Name, + StartTime: nil, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(now), + LastUpdateReason: updateReasonCreated, + ConfigDays: group.Days, + ConfigStartHour: group.StartHour, + ConfigWaitHours: group.WaitHours, + } + } + return groups, nil +} + +// defaultConfigGroup returns the default group in case of missing autoupdate_config resource. +// This is a function and not a variable because we will need to add more logic there in the future +// lookup maintenance information from RFD 109's cluster_maintenance_config. +func (r *reconciler) defaultConfigGroup(ctx context.Context) (*autoupdate.AgentAutoUpdateGroup, error) { + cmc, err := r.clt.GetClusterMaintenanceConfig(ctx) + if err != nil { + if trace.IsNotFound(err) { + // There's no CMC, we return the default group. + return defaultGroup(), nil + } + + // If we had an error, and it's not trace.ErrNotFound, we stop. + return nil, trace.Wrap(err, "retrieving the cluster maintenance config") + } + // We got a CMC, we generate the default from it. + upgradeWindow, ok := cmc.GetAgentUpgradeWindow() + + if !ok { + // The CMC is here but does not contain upgrade window. + return defaultGroup(), nil + } + + weekdays := upgradeWindow.Weekdays + // A CMC upgrade window not specifying weekdays should update every day. + if len(weekdays) == 0 { + weekdays = []string{types.Wildcard} + } + + return &autoupdate.AgentAutoUpdateGroup{ + Name: defaultCMCGroupName, + Days: weekdays, + StartHour: int32(upgradeWindow.UTCStartHour), + WaitHours: 0, + }, nil + +} + +func defaultGroup() *autoupdate.AgentAutoUpdateGroup { + return &autoupdate.AgentAutoUpdateGroup{ + Name: defaultGroupName, + Days: defaultUpdateDays, + StartHour: defaultStartHour, + WaitHours: 0, + } +} diff --git a/lib/autoupdate/rolloutcontroller/reconciler_test.go b/lib/autoupdate/rollout/reconciler_test.go similarity index 57% rename from lib/autoupdate/rolloutcontroller/reconciler_test.go rename to lib/autoupdate/rollout/reconciler_test.go index 340451d8da46d..ea867fb01a742 100644 --- a/lib/autoupdate/rolloutcontroller/reconciler_test.go +++ b/lib/autoupdate/rollout/reconciler_test.go @@ -16,19 +16,23 @@ * along with this program. If not, see . */ -package rolloutcontroller +package rollout import ( "context" "testing" + "time" "github.com/google/go-cmp/cmp" "github.com/google/uuid" "github.com/gravitational/trace" + "github.com/jonboulle/clockwork" "github.com/stretchr/testify/require" "google.golang.org/protobuf/testing/protocmp" + "google.golang.org/protobuf/types/known/timestamppb" "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + "github.com/gravitational/teleport/api/types" update "github.com/gravitational/teleport/api/types/autoupdate" apiutils "github.com/gravitational/teleport/api/utils" "github.com/gravitational/teleport/lib/backend" @@ -39,7 +43,7 @@ import ( // The comparison does not take into account the proto internal state. func rolloutEquals(expected *autoupdate.AutoUpdateAgentRollout) require.ValueAssertionFunc { return func(t require.TestingT, i interface{}, _ ...interface{}) { - require.IsType(t, &autoupdate.AutoUpdateAgentRollout{}, i) + require.IsType(t, &autoupdate.AutoUpdateAgentRollout{}, i, "resource should be an autoupdate_agent_rollout") actual := i.(*autoupdate.AutoUpdateAgentRollout) require.Empty(t, cmp.Diff(expected, actual, protocmp.Transform())) } @@ -134,6 +138,8 @@ func TestTryReconcile(t *testing.T) { t.Parallel() log := utils.NewSlogLoggerForTests() ctx := context.Background() + clock := clockwork.NewFakeClock() + // Test setup: creating fixtures configOK, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{ Tools: &autoupdate.AutoUpdateConfigSpecTools{ @@ -181,6 +187,7 @@ func TestTryReconcile(t *testing.T) { Strategy: update.AgentsStrategyHaltOnError, }) require.NoError(t, err) + upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{StartTime: timestamppb.New(clock.Now())} outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{ StartVersion: "1.2.2", @@ -190,6 +197,7 @@ func TestTryReconcile(t *testing.T) { Strategy: update.AgentsStrategyHaltOnError, }) require.NoError(t, err) + outOfDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{} tests := []struct { name string @@ -307,9 +315,11 @@ func TestTryReconcile(t *testing.T) { // Test execution: Running the reconciliation - reconciler := &Reconciler{ - clt: client, - log: log, + reconciler := &reconciler{ + clt: client, + log: log, + clock: clock, + metrics: newMetricsForTest(t), } require.NoError(t, reconciler.tryReconcile(ctx)) @@ -323,6 +333,7 @@ func TestTryReconcile(t *testing.T) { func TestReconciler_Reconcile(t *testing.T) { log := utils.NewSlogLoggerForTests() ctx := context.Background() + clock := clockwork.NewFakeClock() // Test setup: creating fixtures config, err := update.NewAutoUpdateConfig(&autoupdate.AutoUpdateConfigSpec{ Tools: &autoupdate.AutoUpdateConfigSpecTools{ @@ -354,6 +365,7 @@ func TestReconciler_Reconcile(t *testing.T) { Strategy: update.AgentsStrategyHaltOnError, }) require.NoError(t, err) + upToDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{StartTime: timestamppb.New(clock.Now())} outOfDateRollout, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{ StartVersion: "1.2.2", @@ -363,6 +375,7 @@ func TestReconciler_Reconcile(t *testing.T) { Strategy: update.AgentsStrategyHaltOnError, }) require.NoError(t, err) + outOfDateRollout.Status = &autoupdate.AutoUpdateAgentRolloutStatus{} // Those tests are not written in table format because the fixture setup it too complex and this would harm // readability. @@ -375,13 +388,15 @@ func TestReconciler_Reconcile(t *testing.T) { } client := newMockClient(t, stubs) - reconciler := &Reconciler{ - clt: client, - log: log, + reconciler := &reconciler{ + clt: client, + log: log, + clock: clock, + metrics: newMetricsForTest(t), } // Test execution: run the reconciliation loop - require.NoError(t, reconciler.Reconcile(ctx)) + require.NoError(t, reconciler.reconcile(ctx)) // Test validation: check that all the expected calls were received client.checkIfEmpty(t) @@ -397,13 +412,15 @@ func TestReconciler_Reconcile(t *testing.T) { } client := newMockClient(t, stubs) - reconciler := &Reconciler{ - clt: client, - log: log, + reconciler := &reconciler{ + clt: client, + log: log, + clock: clock, + metrics: newMetricsForTest(t), } // Test execution: run the reconciliation loop - require.NoError(t, reconciler.Reconcile(ctx)) + require.NoError(t, reconciler.reconcile(ctx)) // Test validation: check that all the expected calls were received client.checkIfEmpty(t) @@ -421,13 +438,15 @@ func TestReconciler_Reconcile(t *testing.T) { } client := newMockClient(t, stubs) - reconciler := &Reconciler{ - clt: client, - log: log, + reconciler := &reconciler{ + clt: client, + log: log, + clock: clock, + metrics: newMetricsForTest(t), } // Test execution: run the reconciliation loop - require.NoError(t, reconciler.Reconcile(ctx)) + require.NoError(t, reconciler.reconcile(ctx)) // Test validation: check that all the expected calls were received client.checkIfEmpty(t) @@ -461,13 +480,15 @@ func TestReconciler_Reconcile(t *testing.T) { } client := newMockClient(t, stubs) - reconciler := &Reconciler{ - clt: client, - log: log, + reconciler := &reconciler{ + clt: client, + log: log, + clock: clock, + metrics: newMetricsForTest(t), } // Test execution: run the reconciliation loop - require.NoError(t, reconciler.Reconcile(ctx)) + require.NoError(t, reconciler.reconcile(ctx)) // Test validation: check that all the expected calls were received client.checkIfEmpty(t) @@ -499,13 +520,15 @@ func TestReconciler_Reconcile(t *testing.T) { } client := newMockClient(t, stubs) - reconciler := &Reconciler{ - clt: client, - log: log, + reconciler := &reconciler{ + clt: client, + log: log, + clock: clock, + metrics: newMetricsForTest(t), } // Test execution: run the reconciliation loop - require.NoError(t, reconciler.Reconcile(ctx)) + require.NoError(t, reconciler.reconcile(ctx)) // Test validation: check that all the expected calls were received client.checkIfEmpty(t) @@ -523,13 +546,15 @@ func TestReconciler_Reconcile(t *testing.T) { } client := newMockClient(t, stubs) - reconciler := &Reconciler{ - clt: client, - log: log, + reconciler := &reconciler{ + clt: client, + log: log, + clock: clock, + metrics: newMetricsForTest(t), } // Test execution: run the reconciliation loop - require.ErrorContains(t, reconciler.Reconcile(ctx), "the DB fell on the floor") + require.ErrorContains(t, reconciler.reconcile(ctx), "the DB fell on the floor") // Test validation: check that all the expected calls were received client.checkIfEmpty(t) @@ -553,15 +578,410 @@ func TestReconciler_Reconcile(t *testing.T) { } client := newMockClient(t, stubs) - reconciler := &Reconciler{ - clt: client, - log: log, + reconciler := &reconciler{ + clt: client, + log: log, + clock: clock, + metrics: newMetricsForTest(t), } // Test execution: run the reconciliation loop - require.ErrorContains(t, reconciler.Reconcile(cancelableCtx), "canceled") + require.ErrorIs(t, reconciler.reconcile(cancelableCtx), context.Canceled) // Test validation: check that all the expected calls were received client.checkIfEmpty(t) }) } + +func Test_makeGroupsStatus(t *testing.T) { + now := time.Now() + ctx := context.Background() + + tests := []struct { + name string + schedules *autoupdate.AgentAutoUpdateSchedules + expected []*autoupdate.AutoUpdateAgentRolloutStatusGroup + }{ + { + name: "nil schedules", + schedules: nil, + expected: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: defaultGroupName, + StartTime: nil, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(now), + LastUpdateReason: updateReasonCreated, + ConfigDays: defaultUpdateDays, + ConfigStartHour: defaultStartHour, + }, + }, + }, + { + name: "no groups in schedule", + schedules: &autoupdate.AgentAutoUpdateSchedules{Regular: make([]*autoupdate.AgentAutoUpdateGroup, 0)}, + expected: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: defaultGroupName, + StartTime: nil, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(now), + LastUpdateReason: updateReasonCreated, + ConfigDays: defaultUpdateDays, + ConfigStartHour: defaultStartHour, + }, + }, + }, + { + name: "one group in schedule", + schedules: &autoupdate.AgentAutoUpdateSchedules{ + Regular: []*autoupdate.AgentAutoUpdateGroup{ + { + Name: "group1", + Days: everyWeekday, + StartHour: matchingStartHour, + }, + }, + }, + expected: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: "group1", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(now), + LastUpdateReason: updateReasonCreated, + ConfigDays: everyWeekday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "multiple groups in schedule", + schedules: &autoupdate.AgentAutoUpdateSchedules{ + Regular: []*autoupdate.AgentAutoUpdateGroup{ + { + Name: "group1", + Days: everyWeekday, + StartHour: matchingStartHour, + }, + { + Name: "group2", + Days: everyWeekdayButSunday, + StartHour: nonMatchingStartHour, + WaitHours: 1, + }, + }, + }, + expected: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: "group1", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(now), + LastUpdateReason: updateReasonCreated, + ConfigDays: everyWeekday, + ConfigStartHour: matchingStartHour, + }, + { + Name: "group2", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(now), + LastUpdateReason: updateReasonCreated, + ConfigDays: everyWeekdayButSunday, + ConfigStartHour: nonMatchingStartHour, + ConfigWaitHours: 1, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // We craft a mock client always answering there's no cmc. + // It's not the point of this test to check the cmc client usage so we don't count the number of calls here. + // CMC-specific tests happen in TestDefaultConfigGroup(). + clt := newMockClient(t, mockClientStubs{cmcAnswers: []callAnswer[*types.ClusterMaintenanceConfigV1]{{ + result: nil, + err: trace.NotFound("no cmc"), + }}}) + r := reconciler{clt: clt} + result, err := r.makeGroupsStatus(ctx, tt.schedules, now) + require.NoError(t, err) + require.Equal(t, tt.expected, result) + }) + } +} + +const fakeRolloutStrategyName = "fake" + +type fakeRolloutStrategy struct { + strategyName string + // calls counts how many times the fake rollout strategy was called. + // This is not thread safe. + calls int +} + +func (f *fakeRolloutStrategy) name() string { + return f.strategyName +} + +func (f *fakeRolloutStrategy) progressRollout(ctx context.Context, spec *autoupdate.AutoUpdateAgentRolloutSpec, status *autoupdate.AutoUpdateAgentRolloutStatus, now time.Time) error { + f.calls++ + return nil +} + +func Test_reconciler_computeStatus(t *testing.T) { + log := utils.NewSlogLoggerForTests() + clock := clockwork.NewFakeClock() + ctx := context.Background() + + oldStatus := &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: "old group", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + }, + }, + State: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED, + } + oldSpec := &autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + Schedule: update.AgentsScheduleRegular, + AutoupdateMode: update.AgentsUpdateModeEnabled, + Strategy: fakeRolloutStrategyName, + } + schedules := &autoupdate.AgentAutoUpdateSchedules{ + Regular: []*autoupdate.AgentAutoUpdateGroup{ + { + Name: "new group", + Days: everyWeekday, + }, + }, + } + r := reconciler{} + newGroups, err := r.makeGroupsStatus(ctx, schedules, clock.Now()) + require.NoError(t, err) + newStatus := &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: newGroups, + State: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED, + StartTime: timestamppb.New(clock.Now()), + } + + tests := []struct { + name string + existingRollout *autoupdate.AutoUpdateAgentRollout + newSpec *autoupdate.AutoUpdateAgentRolloutSpec + expectedStatus *autoupdate.AutoUpdateAgentRolloutStatus + expectedStrategyCalls int + }{ + { + name: "status is reset if start version changes", + existingRollout: &autoupdate.AutoUpdateAgentRollout{ + Spec: oldSpec, + Status: oldStatus, + }, + newSpec: &autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.2", + TargetVersion: "1.2.4", + Schedule: update.AgentsScheduleRegular, + AutoupdateMode: update.AgentsUpdateModeEnabled, + Strategy: fakeRolloutStrategyName, + }, + // status should have been reset and is now the new status + expectedStatus: newStatus, + expectedStrategyCalls: 1, + }, + { + name: "status is reset if target version changes", + existingRollout: &autoupdate.AutoUpdateAgentRollout{ + Spec: oldSpec, + Status: oldStatus, + }, + newSpec: &autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.3", + TargetVersion: "1.2.5", + Schedule: update.AgentsScheduleRegular, + AutoupdateMode: update.AgentsUpdateModeEnabled, + Strategy: fakeRolloutStrategyName, + }, + // status should have been reset and is now the new status + expectedStatus: newStatus, + expectedStrategyCalls: 1, + }, + { + name: "status is reset if strategy changes", + existingRollout: &autoupdate.AutoUpdateAgentRollout{ + Spec: oldSpec, + Status: oldStatus, + }, + newSpec: &autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + Schedule: update.AgentsScheduleRegular, + AutoupdateMode: update.AgentsUpdateModeEnabled, + Strategy: fakeRolloutStrategyName + "2", + }, + // status should have been reset and is now the new status + expectedStatus: newStatus, + expectedStrategyCalls: 1, + }, + { + name: "status is not reset if mode changes", + existingRollout: &autoupdate.AutoUpdateAgentRollout{ + Spec: oldSpec, + Status: oldStatus, + }, + newSpec: &autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + Schedule: update.AgentsScheduleRegular, + AutoupdateMode: update.AgentsUpdateModeSuspended, + Strategy: fakeRolloutStrategyName, + }, + // status should NOT have been reset and still contain the old groups + expectedStatus: oldStatus, + expectedStrategyCalls: 1, + }, + { + name: "groups are unset if schedule is immediate", + existingRollout: &autoupdate.AutoUpdateAgentRollout{ + Spec: oldSpec, + Status: oldStatus, + }, + newSpec: &autoupdate.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + Schedule: update.AgentsScheduleImmediate, + AutoupdateMode: update.AgentsUpdateModeEnabled, + Strategy: fakeRolloutStrategyName, + }, + // groups should be unset + expectedStatus: &autoupdate.AutoUpdateAgentRolloutStatus{ + StartTime: timestamppb.New(clock.Now()), + }, + expectedStrategyCalls: 0, + }, + { + name: "new groups are populated if previous ones were empty", + existingRollout: &autoupdate.AutoUpdateAgentRollout{ + Spec: oldSpec, + // old groups were empty + Status: &autoupdate.AutoUpdateAgentRolloutStatus{ + StartTime: timestamppb.New(clock.Now()), + }, + }, + // no spec change + newSpec: oldSpec, + // still, we have the new groups set + expectedStatus: newStatus, + expectedStrategyCalls: 1, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + strategy := &fakeRolloutStrategy{strategyName: tt.newSpec.Strategy} + r := &reconciler{ + log: log, + clock: clock, + rolloutStrategies: []rolloutStrategy{strategy}, + metrics: newMetricsForTest(t), + } + result, err := r.computeStatus(ctx, tt.existingRollout, tt.newSpec, schedules) + require.NoError(t, err) + require.Empty(t, cmp.Diff(tt.expectedStatus, result, protocmp.Transform())) + require.Equal(t, tt.expectedStrategyCalls, strategy.calls) + }) + } +} + +func TestDefaultConfigGroup(t *testing.T) { + ctx := context.Background() + testStartHour := 16 + + tests := []struct { + name string + cmcAnswer callAnswer[*types.ClusterMaintenanceConfigV1] + expectedResult *autoupdate.AgentAutoUpdateGroup + expectError require.ErrorAssertionFunc + }{ + { + name: "no CMC", + cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{ + nil, trace.NotFound("no cmc"), + }, + expectedResult: defaultGroup(), + expectError: require.NoError, + }, + { + name: "CMC with no upgrade window", + cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{ + &types.ClusterMaintenanceConfigV1{ + Spec: types.ClusterMaintenanceConfigSpecV1{ + AgentUpgrades: nil, + }, + }, nil, + }, + expectedResult: defaultGroup(), + expectError: require.NoError, + }, + { + name: "CMC with no weekdays", + cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{ + &types.ClusterMaintenanceConfigV1{ + Spec: types.ClusterMaintenanceConfigSpecV1{ + AgentUpgrades: &types.AgentUpgradeWindow{ + UTCStartHour: uint32(testStartHour), + Weekdays: nil, + }, + }, + }, nil, + }, + expectedResult: &autoupdate.AgentAutoUpdateGroup{ + Name: defaultCMCGroupName, + Days: []string{"*"}, + StartHour: int32(testStartHour), + WaitHours: 0, + }, + expectError: require.NoError, + }, + { + name: "CMC with weekdays", + cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{ + &types.ClusterMaintenanceConfigV1{ + Spec: types.ClusterMaintenanceConfigSpecV1{ + AgentUpgrades: &types.AgentUpgradeWindow{ + UTCStartHour: uint32(testStartHour), + Weekdays: everyWeekdayButSunday, + }, + }, + }, nil, + }, + expectedResult: &autoupdate.AgentAutoUpdateGroup{ + Name: defaultCMCGroupName, + Days: everyWeekdayButSunday, + StartHour: int32(testStartHour), + WaitHours: 0, + }, + expectError: require.NoError, + }, + { + name: "unexpected error getting CMC", + cmcAnswer: callAnswer[*types.ClusterMaintenanceConfigV1]{ + nil, trace.ConnectionProblem(trace.Errorf("oh no"), "connection failed"), + }, + expectedResult: nil, + expectError: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test setup: loading fixtures. + clt := newMockClient(t, mockClientStubs{cmcAnswers: []callAnswer[*types.ClusterMaintenanceConfigV1]{tt.cmcAnswer}}) + r := &reconciler{clt: clt} + // Test execution. + result, err := r.defaultConfigGroup(ctx) + tt.expectError(t, err) + require.Equal(t, tt.expectedResult, result) + // Test validation: the mock client should be empty. + clt.checkIfEmpty(t) + }) + } +} diff --git a/lib/autoupdate/rollout/strategy.go b/lib/autoupdate/rollout/strategy.go new file mode 100644 index 0000000000000..d5b8236ce8f90 --- /dev/null +++ b/lib/autoupdate/rollout/strategy.go @@ -0,0 +1,153 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rollout + +import ( + "context" + "time" + + "github.com/gravitational/trace" + "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + "github.com/gravitational/teleport/api/types" +) + +const ( + // Common update reasons + updateReasonCreated = "created" + updateReasonReconcilerError = "reconciler_error" + updateReasonRolloutChanged = "rollout_changed_during_window" +) + +// rolloutStrategy is responsible for rolling out the update across groups. +// This interface allows us to inject dummy strategies for simpler testing. +type rolloutStrategy interface { + name() string + // progressRollout takes the new rollout spec, existing rollout status and current time. + // It updates the status resource in-place to progress the rollout to the next step if possible/needed. + progressRollout(context.Context, *autoupdate.AutoUpdateAgentRolloutSpec, *autoupdate.AutoUpdateAgentRolloutStatus, time.Time) error +} + +// inWindow checks if the time is in the group's maintenance window. +// The maintenance window is the semi-open interval: [windowStart, windowEnd). +func inWindow(group *autoupdate.AutoUpdateAgentRolloutStatusGroup, now time.Time, duration time.Duration) (bool, error) { + dayOK, err := canUpdateToday(group.ConfigDays, now) + if err != nil { + return false, trace.Wrap(err, "checking the day of the week") + } + if !dayOK { + return false, nil + } + + // We compute the theoretical window start and end + windowStart := now.Truncate(24 * time.Hour).Add(time.Duration(group.ConfigStartHour) * time.Hour) + windowEnd := windowStart.Add(duration) + + return !now.Before(windowStart) && now.Before(windowEnd), nil +} + +// rolloutChangedInWindow checks if the rollout got created after the theoretical group start time +func rolloutChangedInWindow(group *autoupdate.AutoUpdateAgentRolloutStatusGroup, now, rolloutStart time.Time, duration time.Duration) (bool, error) { + // If the rollout is older than 24h, we know it did not change during the window + if now.Sub(rolloutStart) > 24*time.Hour { + return false, nil + } + // Else we check if the rollout happened in the group window. + return inWindow(group, rolloutStart, duration) +} + +func canUpdateToday(allowedDays []string, now time.Time) (bool, error) { + for _, allowedDay := range allowedDays { + if allowedDay == types.Wildcard { + return true, nil + } + weekday, ok := types.ParseWeekday(allowedDay) + if !ok { + return false, trace.BadParameter("failed to parse weekday %q", allowedDay) + } + if weekday == now.Weekday() { + return true, nil + } + } + return false, nil +} + +func setGroupState(group *autoupdate.AutoUpdateAgentRolloutStatusGroup, newState autoupdate.AutoUpdateAgentGroupState, reason string, now time.Time) { + changed := false + previousState := group.State + + // Check if there is a state transition + if previousState != newState { + group.State = newState + changed = true + // If we just started the group, also update the start time + if newState == autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE { + group.StartTime = timestamppb.New(now) + } + } + + // Check if there is a reason change. Even if the state did not change, we + // might want to explain why. + if group.LastUpdateReason != reason { + group.LastUpdateReason = reason + changed = true + } + + if changed { + group.LastUpdateTime = timestamppb.New(now) + } +} + +func computeRolloutState(groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup) autoupdate.AutoUpdateAgentRolloutState { + groupCount := len(groups) + + if groupCount == 0 { + return autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSPECIFIED + } + + var doneGroups, unstartedGroups int + + for _, group := range groups { + switch group.State { + // If one or more groups have been rolled back, we consider the rollout rolledback + case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: + return autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_ROLLEDBACK + + case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED: + unstartedGroups++ + + case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: + doneGroups++ + } + } + + // If every group is done, the rollout is done. + if doneGroups == groupCount { + return autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_DONE + } + + // If every group is unstarted, the rollout is unstarted. + if unstartedGroups == groupCount { + return autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED + } + + // Else at least one group is active or done, but not everything is finished. We consider the rollout active. + return autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_ACTIVE +} diff --git a/lib/autoupdate/rollout/strategy_haltonerror.go b/lib/autoupdate/rollout/strategy_haltonerror.go new file mode 100644 index 0000000000000..fafc5d5ae30d3 --- /dev/null +++ b/lib/autoupdate/rollout/strategy_haltonerror.go @@ -0,0 +1,164 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rollout + +import ( + "context" + "log/slog" + "time" + + "github.com/gravitational/trace" + + "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + update "github.com/gravitational/teleport/api/types/autoupdate" +) + +const ( + updateReasonCanStart = "can_start" + updateReasonCannotStart = "cannot_start" + updateReasonPreviousGroupsNotDone = "previous_groups_not_done" + updateReasonUpdateComplete = "update_complete" + updateReasonUpdateInProgress = "update_in_progress" + haltOnErrorWindowDuration = time.Hour +) + +type haltOnErrorStrategy struct { + log *slog.Logger +} + +func (h *haltOnErrorStrategy) name() string { + return update.AgentsStrategyHaltOnError +} + +func newHaltOnErrorStrategy(log *slog.Logger) (rolloutStrategy, error) { + if log == nil { + return nil, trace.BadParameter("missing log") + } + return &haltOnErrorStrategy{ + log: log.With("strategy", update.AgentsStrategyHaltOnError), + }, nil +} + +func (h *haltOnErrorStrategy) progressRollout(ctx context.Context, _ *autoupdate.AutoUpdateAgentRolloutSpec, status *autoupdate.AutoUpdateAgentRolloutStatus, now time.Time) error { + // We process every group in order, all the previous groups must be in the DONE state + // for the next group to become active. Even if some early groups are not DONE, + // later groups might be ACTIVE and need to transition to DONE, so we cannot + // return early and must process every group. + // + // For example, in a dev/staging/prod setup, the "dev" group might get rolled + // back while "staging" is still ACTIVE. We must not start PROD but still need + // to transition "staging" to DONE. + previousGroupsAreDone := true + + for i, group := range status.Groups { + switch group.State { + case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED: + var previousGroup *autoupdate.AutoUpdateAgentRolloutStatusGroup + if i != 0 { + previousGroup = status.Groups[i-1] + } + canStart, err := canStartHaltOnError(group, previousGroup, now) + if err != nil { + // In halt-on-error rollouts, groups are dependent. + // Failing to transition a group should prevent other groups from transitioning. + setGroupState(group, group.State, updateReasonReconcilerError, now) + return err + } + + // Check if the rollout got created after the theoretical group start time + rolloutChangedDuringWindow, err := rolloutChangedInWindow(group, now, status.StartTime.AsTime(), haltOnErrorWindowDuration) + if err != nil { + setGroupState(group, group.State, updateReasonReconcilerError, now) + return err + } + + switch { + case !previousGroupsAreDone: + // All previous groups are not DONE + setGroupState(group, group.State, updateReasonPreviousGroupsNotDone, now) + case !canStart: + // All previous groups are DONE, but time-related criteria are not met + // This can be because we are outside an update window, or because the + // specified wait_hours doesn't let us update yet. + setGroupState(group, group.State, updateReasonCannotStart, now) + case rolloutChangedDuringWindow: + // All previous groups are DONE and time-related criteria are met. + // However, the rollout changed during the maintenance window. + setGroupState(group, group.State, updateReasonRolloutChanged, now) + default: + // All previous groups are DONE and time-related criteria are met. + // We can start. + setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonCanStart, now) + } + previousGroupsAreDone = false + case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: + // The group has been manually rolled back. We don't touch anything and + // don't process the next groups. + previousGroupsAreDone = false + case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: + // The group has already been updated, we can look at the next group + case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE: + // The group is currently being updated. We check if we can transition it to the done state + done, reason := isDoneHaltOnError(group, now) + + if done { + // We transition to the done state. We continue processing the groups as we might be able to start the next one. + setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, reason, now) + } else { + setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, reason, now) + } + previousGroupsAreDone = false + + default: + return trace.BadParameter("unknown autoupdate group state: %v", group.State) + } + } + return nil +} + +func canStartHaltOnError(group, previousGroup *autoupdate.AutoUpdateAgentRolloutStatusGroup, now time.Time) (bool, error) { + // check wait hours + if group.ConfigWaitHours != 0 { + if previousGroup == nil { + return false, trace.BadParameter("the first group cannot have non-zero wait hours") + } + + previousStart := previousGroup.StartTime.AsTime() + if previousStart.IsZero() || previousStart.Unix() == 0 { + return false, trace.BadParameter("the previous group doesn't have a start time, cannot check the 'wait_hours' criterion") + } + + // Check if the wait_hours criterion is OK, if we are at least after 'wait_hours' hours since the previous start. + if now.Before(previousGroup.StartTime.AsTime().Add(time.Duration(group.ConfigWaitHours) * time.Hour)) { + return false, nil + } + } + + return inWindow(group, now, haltOnErrorWindowDuration) +} + +func isDoneHaltOnError(group *autoupdate.AutoUpdateAgentRolloutStatusGroup, now time.Time) (bool, string) { + // Currently we don't implement status reporting from groups/agents. + // So we just wait 60 minutes and consider the maintenance done. + // This will change as we introduce agent status report and aggregated agent counts. + if group.StartTime.AsTime().Add(haltOnErrorWindowDuration).Before(now) { + return true, updateReasonUpdateComplete + } + return false, updateReasonUpdateInProgress +} diff --git a/lib/autoupdate/rollout/strategy_haltonerror_test.go b/lib/autoupdate/rollout/strategy_haltonerror_test.go new file mode 100644 index 0000000000000..2f59534ddd7db --- /dev/null +++ b/lib/autoupdate/rollout/strategy_haltonerror_test.go @@ -0,0 +1,512 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rollout + +import ( + "context" + "testing" + "time" + + "github.com/jonboulle/clockwork" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + "github.com/gravitational/teleport/lib/utils" +) + +func Test_canStartHaltOnError(t *testing.T) { + now := testSunday + yesterday := testSaturday + + tests := []struct { + name string + group *autoupdate.AutoUpdateAgentRolloutStatusGroup + previousGroup *autoupdate.AutoUpdateAgentRolloutStatusGroup + want bool + wantErr require.ErrorAssertionFunc + }{ + { + name: "first group, no wait_hours", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: "test-group", + ConfigDays: everyWeekday, + ConfigStartHour: int32(now.Hour()), + ConfigWaitHours: 0, + }, + want: true, + wantErr: require.NoError, + }, + { + name: "first group, wait_days (invalid)", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: "test-group", + ConfigDays: everyWeekday, + ConfigStartHour: int32(now.Hour()), + ConfigWaitHours: 1, + }, + want: false, + wantErr: require.Error, + }, + { + name: "second group, no wait_days", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: "test-group", + ConfigDays: everyWeekday, + ConfigStartHour: int32(now.Hour()), + ConfigWaitHours: 0, + }, + previousGroup: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: "previous-group", + StartTime: timestamppb.New(now), + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + ConfigDays: everyWeekday, + ConfigStartHour: int32(now.Hour()), + ConfigWaitHours: 0, + }, + want: true, + wantErr: require.NoError, + }, + { + name: "second group, wait_days not over", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: "test-group", + ConfigDays: everyWeekday, + ConfigStartHour: int32(now.Hour()), + ConfigWaitHours: 48, + }, + previousGroup: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: "previous-group", + StartTime: timestamppb.New(yesterday), + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + ConfigDays: everyWeekday, + ConfigStartHour: int32(now.Hour()), + ConfigWaitHours: 0, + }, + want: false, + wantErr: require.NoError, + }, + { + name: "second group, wait_days over", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: "test-group", + ConfigDays: everyWeekday, + ConfigStartHour: int32(now.Hour()), + ConfigWaitHours: 24, + }, + previousGroup: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: "previous-group", + StartTime: timestamppb.New(yesterday), + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + ConfigDays: everyWeekday, + ConfigStartHour: int32(now.Hour()), + ConfigWaitHours: 0, + }, + want: true, + wantErr: require.NoError, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := canStartHaltOnError(tt.group, tt.previousGroup, now) + tt.wantErr(t, err) + require.Equal(t, tt.want, got) + }) + } +} + +func Test_progressGroupsHaltOnError(t *testing.T) { + clock := clockwork.NewFakeClockAt(testSunday) + log := utils.NewSlogLoggerForTests() + strategy, err := newHaltOnErrorStrategy(log) + require.NoError(t, err) + + fewMinutesAgo := clock.Now().Add(-5 * time.Minute) + yesterday := testSaturday + canStartToday := everyWeekday + cannotStartToday := everyWeekdayButSunday + ctx := context.Background() + + group1Name := "group1" + group2Name := "group2" + group3Name := "group3" + + tests := []struct { + name string + initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup + rolloutStartTime *timestamppb.Timestamp + expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup + }{ + { + name: "single group unstarted -> unstarted", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonCreated, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonCannotStart, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "single group unstarted -> unstarted because rollout changed in window", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonCreated, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + rolloutStartTime: timestamppb.New(clock.Now()), + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonRolloutChanged, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "single group unstarted -> active", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonCreated, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(clock.Now()), + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonCanStart, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "single group active -> active", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(fewMinutesAgo), + LastUpdateTime: timestamppb.New(fewMinutesAgo), + LastUpdateReason: updateReasonCanStart, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(fewMinutesAgo), + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonUpdateInProgress, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "single group active -> done", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonUpdateInProgress, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonUpdateComplete, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "single group done -> done", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonUpdateComplete, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonUpdateComplete, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "single group rolledback -> rolledback", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: "manual_rollback", + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: "manual_rollback", + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "first group done, second should activate, third should not progress", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonUpdateComplete, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: group2Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonCreated, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + ConfigWaitHours: 24, + }, + { + Name: group3Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonCreated, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + ConfigWaitHours: 0, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonUpdateComplete, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: group2Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(clock.Now()), + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonCanStart, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + ConfigWaitHours: 24, + }, + { + Name: group3Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonPreviousGroupsNotDone, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + ConfigWaitHours: 0, + }, + }, + }, + { + name: "first group rolledback, second should not start", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: "manual_rollback", + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: group2Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonCreated, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + ConfigWaitHours: 24, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: "manual_rollback", + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: group2Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonPreviousGroupsNotDone, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + ConfigWaitHours: 24, + }, + }, + }, + { + name: "first group rolledback, second is active and should become done, third should not progress", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: "manual_rollback", + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: group2Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonCanStart, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + ConfigWaitHours: 0, + }, + { + Name: group3Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: updateReasonCreated, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + ConfigWaitHours: 0, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: group1Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(yesterday), + LastUpdateReason: "manual_rollback", + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: group2Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + StartTime: timestamppb.New(yesterday), + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonUpdateComplete, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + ConfigWaitHours: 0, + }, + { + Name: group3Name, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonPreviousGroupsNotDone, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + ConfigWaitHours: 0, + }, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + status := &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: tt.initialState, + State: 0, + StartTime: tt.rolloutStartTime, + } + err := strategy.progressRollout(ctx, nil, status, clock.Now()) + require.NoError(t, err) + // We use require.Equal instead of Elements match because group order matters. + // It's not super important for time-based, but is crucial for halt-on-error. + // So it's better to be more conservative and validate order never changes for + // both strategies. + require.Equal(t, tt.expectedState, tt.initialState) + }) + } +} diff --git a/lib/autoupdate/rollout/strategy_test.go b/lib/autoupdate/rollout/strategy_test.go new file mode 100644 index 0000000000000..0711d4043ae9c --- /dev/null +++ b/lib/autoupdate/rollout/strategy_test.go @@ -0,0 +1,468 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rollout + +import ( + "testing" + "time" + + "github.com/jonboulle/clockwork" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" +) + +var ( + // 2024-11-30 is a Saturday + testSaturday = time.Date(2024, 11, 30, 12, 30, 0, 0, time.UTC) + // 2024-12-01 is a Sunday + testSunday = time.Date(2024, 12, 1, 12, 30, 0, 0, time.UTC) + matchingStartHour = int32(12) + nonMatchingStartHour = int32(15) + everyWeekday = []string{"Mon", "Tue", "Wed", "Thu", "Fri", "Sat", "Sun"} + everyWeekdayButSunday = []string{"Mon", "Tue", "Wed", "Thu", "Fri", "Sat"} +) + +func Test_canUpdateToday(t *testing.T) { + tests := []struct { + name string + allowedDays []string + now time.Time + want bool + wantErr require.ErrorAssertionFunc + }{ + { + name: "Empty list", + allowedDays: []string{}, + now: time.Now(), + want: false, + wantErr: require.NoError, + }, + { + name: "Wildcard", + allowedDays: []string{"*"}, + now: time.Now(), + want: true, + wantErr: require.NoError, + }, + { + name: "Matching day", + allowedDays: everyWeekday, + now: testSunday, + want: true, + wantErr: require.NoError, + }, + { + name: "No matching day", + allowedDays: everyWeekdayButSunday, + now: testSunday, + want: false, + wantErr: require.NoError, + }, + { + name: "Malformed day", + allowedDays: []string{"Mon", "Tue", "HelloThereGeneralKenobi"}, + now: testSunday, + want: false, + wantErr: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := canUpdateToday(tt.allowedDays, tt.now) + tt.wantErr(t, err) + require.Equal(t, tt.want, got) + }) + } +} + +func Test_inWindow(t *testing.T) { + tests := []struct { + name string + group *autoupdate.AutoUpdateAgentRolloutStatusGroup + now time.Time + duration time.Duration + want bool + wantErr require.ErrorAssertionFunc + }{ + { + name: "out of window", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + ConfigDays: everyWeekdayButSunday, + ConfigStartHour: matchingStartHour, + }, + now: testSunday, + duration: time.Hour, + want: false, + wantErr: require.NoError, + }, + { + name: "inside window, wrong hour", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + ConfigDays: everyWeekday, + ConfigStartHour: nonMatchingStartHour, + }, + now: testSunday, + duration: time.Hour, + want: false, + wantErr: require.NoError, + }, + { + name: "inside window, correct hour", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + ConfigDays: everyWeekday, + ConfigStartHour: matchingStartHour, + }, + now: testSunday, + duration: time.Hour, + want: true, + wantErr: require.NoError, + }, + { + name: "invalid weekdays", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + ConfigDays: []string{"HelloThereGeneralKenobi"}, + ConfigStartHour: matchingStartHour, + }, + now: testSunday, + duration: time.Hour, + want: false, + wantErr: require.Error, + }, + { + name: "short window", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + ConfigDays: everyWeekday, + ConfigStartHour: matchingStartHour, + }, + now: testSunday, + duration: time.Second, + want: false, + wantErr: require.NoError, + }, + { + name: "window start time is included", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + ConfigDays: everyWeekday, + ConfigStartHour: matchingStartHour, + }, + now: testSunday.Truncate(24 * time.Hour).Add(time.Duration(matchingStartHour) * time.Hour), + duration: time.Hour, + want: true, + wantErr: require.NoError, + }, + { + name: "window end time is not included", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + ConfigDays: everyWeekday, + ConfigStartHour: matchingStartHour, + }, + now: testSunday.Truncate(24 * time.Hour).Add(time.Duration(matchingStartHour+1) * time.Hour), + duration: time.Hour, + want: false, + wantErr: require.NoError, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := inWindow(tt.group, tt.now, tt.duration) + tt.wantErr(t, err) + require.Equal(t, tt.want, got) + }) + } +} + +func Test_rolloutChangedInWindow(t *testing.T) { + // Test setup: creating fixtures. + group := &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: "test-group", + ConfigDays: everyWeekdayButSunday, + ConfigStartHour: 12, + } + tests := []struct { + name string + now time.Time + rolloutStart time.Time + want bool + }{ + { + name: "zero rollout start time", + now: testSaturday, + rolloutStart: time.Time{}, + want: false, + }, + { + name: "epoch rollout start time", + now: testSaturday, + // tspb counts since epoch, wile go's zero is 0000-00-00 00:00:00 UTC + rolloutStart: (×tamppb.Timestamp{}).AsTime(), + want: false, + }, + { + name: "rollout changed a week ago", + now: testSaturday, + rolloutStart: testSaturday.Add(-7 * 24 * time.Hour), + want: false, + }, + { + name: "rollout changed the same day, before the window", + now: testSaturday, + rolloutStart: testSaturday.Add(-2 * time.Hour), + want: false, + }, + { + name: "rollout changed the same day, during the window", + now: testSaturday, + rolloutStart: testSaturday.Add(-2 * time.Minute), + want: true, + }, + { + name: "rollout just changed but we are not in a window", + now: testSunday, + rolloutStart: testSunday.Add(-2 * time.Minute), + want: false, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test execution. + result, err := rolloutChangedInWindow(group, tt.now, tt.rolloutStart, time.Hour) + require.NoError(t, err) + require.Equal(t, tt.want, result) + }) + } +} + +func Test_setGroupState(t *testing.T) { + groupName := "test-group" + + clock := clockwork.NewFakeClock() + // oldUpdateTime is 5 minutes in the past + oldUpdateTime := clock.Now() + clock.Advance(5 * time.Minute) + + tests := []struct { + name string + group *autoupdate.AutoUpdateAgentRolloutStatusGroup + newState autoupdate.AutoUpdateAgentGroupState + reason string + now time.Time + expected *autoupdate.AutoUpdateAgentRolloutStatusGroup + }{ + { + name: "same state, no change", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: groupName, + StartTime: nil, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(oldUpdateTime), + LastUpdateReason: updateReasonCannotStart, + }, + newState: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + reason: updateReasonCannotStart, + now: clock.Now(), + expected: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: groupName, + StartTime: nil, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + // update time has not been bumped as nothing changed + LastUpdateTime: timestamppb.New(oldUpdateTime), + LastUpdateReason: updateReasonCannotStart, + }, + }, + { + name: "same state, reason change", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: groupName, + StartTime: nil, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(oldUpdateTime), + LastUpdateReason: updateReasonCannotStart, + }, + newState: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + reason: updateReasonReconcilerError, + now: clock.Now(), + expected: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: groupName, + StartTime: nil, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + // update time has been bumped because reason changed + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonReconcilerError, + }, + }, + { + name: "new state, no reason change", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: groupName, + StartTime: nil, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(oldUpdateTime), + LastUpdateReason: updateReasonCannotStart, + }, + newState: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + reason: updateReasonCannotStart, + now: clock.Now(), + expected: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: groupName, + StartTime: nil, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + // update time has been bumped because state changed + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonCannotStart, + }, + }, + { + name: "new state, reason change", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: groupName, + StartTime: nil, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(oldUpdateTime), + LastUpdateReason: updateReasonCannotStart, + }, + newState: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + reason: updateReasonReconcilerError, + now: clock.Now(), + expected: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: groupName, + StartTime: nil, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + // update time has been bumped because state and reason changed + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonReconcilerError, + }, + }, + { + name: "new state, transition to active", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: groupName, + StartTime: nil, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(oldUpdateTime), + LastUpdateReason: updateReasonCannotStart, + }, + newState: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + reason: updateReasonCanStart, + now: clock.Now(), + expected: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: groupName, + // We set start time during the transition + StartTime: timestamppb.New(clock.Now()), + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + // update time has been bumped because state and reason changed + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonCanStart, + }, + }, + { + name: "same state, transition from active to active", + group: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: groupName, + StartTime: timestamppb.New(oldUpdateTime), + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + LastUpdateTime: timestamppb.New(oldUpdateTime), + LastUpdateReason: updateReasonCanStart, + }, + newState: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + reason: updateReasonReconcilerError, + now: clock.Now(), + expected: &autoupdate.AutoUpdateAgentRolloutStatusGroup{ + Name: groupName, + // As the state was already active, the start time should not be refreshed + StartTime: timestamppb.New(oldUpdateTime), + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + // update time has been bumped because reason changed + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonReconcilerError, + }, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + setGroupState(tt.group, tt.newState, tt.reason, tt.now) + require.Equal(t, tt.expected, tt.group) + }) + } +} + +func Test_computeRolloutState(t *testing.T) { + tests := []struct { + name string + groups []*autoupdate.AutoUpdateAgentRolloutStatusGroup + expectedState autoupdate.AutoUpdateAgentRolloutState + }{ + { + name: "empty groups", + groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{}, + expectedState: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSPECIFIED, + }, + { + name: "all groups unstarted", + groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED}, + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED}, + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED}, + }, + expectedState: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSTARTED, + }, + { + name: "one group active", + groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE}, + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED}, + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED}, + }, + expectedState: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_ACTIVE, + }, + { + name: "one group done", + groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE}, + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED}, + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED}, + }, + expectedState: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_ACTIVE, + }, + { + name: "every group done", + groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE}, + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE}, + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE}, + }, + expectedState: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_DONE, + }, + { + name: "one group rolledback", + groups: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE}, + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK}, + {State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE}, + }, + expectedState: autoupdate.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_ROLLEDBACK, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + require.Equal(t, tt.expectedState, computeRolloutState(tt.groups)) + }) + } +} diff --git a/lib/autoupdate/rollout/strategy_timebased.go b/lib/autoupdate/rollout/strategy_timebased.go new file mode 100644 index 0000000000000..e4df5c6e23789 --- /dev/null +++ b/lib/autoupdate/rollout/strategy_timebased.go @@ -0,0 +1,122 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rollout + +import ( + "context" + "log/slog" + "time" + + "github.com/gravitational/trace" + + "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + update "github.com/gravitational/teleport/api/types/autoupdate" +) + +const ( + updateReasonInWindow = "in_window" + updateReasonOutsideWindow = "outside_window" +) + +type timeBasedStrategy struct { + log *slog.Logger +} + +func (h *timeBasedStrategy) name() string { + return update.AgentsStrategyTimeBased +} + +func newTimeBasedStrategy(log *slog.Logger) (rolloutStrategy, error) { + if log == nil { + return nil, trace.BadParameter("missing log") + } + return &timeBasedStrategy{ + log: log.With("strategy", update.AgentsStrategyTimeBased), + }, nil +} + +func (h *timeBasedStrategy) progressRollout(ctx context.Context, spec *autoupdate.AutoUpdateAgentRolloutSpec, status *autoupdate.AutoUpdateAgentRolloutStatus, now time.Time) error { + windowDuration := spec.GetMaintenanceWindowDuration().AsDuration() + // Backward compatibility for resources previously created without duration. + if windowDuration == 0 { + windowDuration = haltOnErrorWindowDuration + } + + // We always process every group regardless of the order. + var errs []error + for _, group := range status.Groups { + switch group.State { + case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: + // We start any group unstarted group in window. + // Done groups can transition back to active if they enter their maintenance window again. + // Some agents might have missed the previous windows and might expected to try again. + shouldBeActive, err := inWindow(group, now, windowDuration) + if err != nil { + // In time-based rollouts, groups are not dependent. + // Failing to transition a group should affect other groups. + // We reflect that something went wrong in the status and go to the next group. + setGroupState(group, group.State, updateReasonReconcilerError, now) + errs = append(errs, err) + continue + } + + // Check if the rollout got created after the theoretical group start time + rolloutChangedDuringWindow, err := rolloutChangedInWindow(group, now, status.StartTime.AsTime(), windowDuration) + if err != nil { + setGroupState(group, group.State, updateReasonReconcilerError, now) + errs = append(errs, err) + continue + } + + switch { + case !shouldBeActive: + setGroupState(group, group.State, updateReasonOutsideWindow, now) + case rolloutChangedDuringWindow: + setGroupState(group, group.State, updateReasonRolloutChanged, now) + default: + setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonInWindow, now) + } + case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: + // We don't touch any group that was manually rolled back. + // Something happened and we should not try to update again. + case autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE: + // The group is currently being updated. We check if the maintenance + // is over and if we should transition it to the done state + shouldBeActive, err := inWindow(group, now, windowDuration) + if err != nil { + // In time-based rollouts, groups are not dependent. + // Failing to transition a group should affect other groups. + // We reflect that something went wrong in the status and go to the next group. + setGroupState(group, group.State, updateReasonReconcilerError, now) + errs = append(errs, err) + continue + } + + if shouldBeActive { + setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, updateReasonInWindow, now) + } else { + setGroupState(group, autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, updateReasonOutsideWindow, now) + } + default: + return trace.BadParameter("unknown autoupdate group state: %v", group.State) + } + } + return trace.NewAggregate(errs...) +} diff --git a/lib/autoupdate/rollout/strategy_timebased_test.go b/lib/autoupdate/rollout/strategy_timebased_test.go new file mode 100644 index 0000000000000..6fa6245598a15 --- /dev/null +++ b/lib/autoupdate/rollout/strategy_timebased_test.go @@ -0,0 +1,350 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package rollout + +import ( + "context" + "testing" + "time" + + "github.com/jonboulle/clockwork" + "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/durationpb" + "google.golang.org/protobuf/types/known/timestamppb" + + "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + "github.com/gravitational/teleport/lib/utils" +) + +func Test_progressGroupsTimeBased(t *testing.T) { + clock := clockwork.NewFakeClockAt(testSunday) + log := utils.NewSlogLoggerForTests() + strategy, err := newTimeBasedStrategy(log) + require.NoError(t, err) + + groupName := "test-group" + canStartToday := everyWeekday + cannotStartToday := everyWeekdayButSunday + lastUpdate := timestamppb.New(clock.Now().Add(-5 * time.Minute)) + ctx := context.Background() + + tests := []struct { + name string + initialState []*autoupdate.AutoUpdateAgentRolloutStatusGroup + rolloutStartTime *timestamppb.Timestamp + expectedState []*autoupdate.AutoUpdateAgentRolloutStatusGroup + }{ + { + name: "unstarted -> unstarted", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: lastUpdate, + LastUpdateReason: updateReasonCreated, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonOutsideWindow, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "unstarted -> unstarted because rollout just changed", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: lastUpdate, + LastUpdateReason: updateReasonCreated, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + rolloutStartTime: timestamppb.New(clock.Now()), + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonRolloutChanged, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "unstarted -> active", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: lastUpdate, + LastUpdateReason: updateReasonCreated, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(clock.Now()), + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "done -> done", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + LastUpdateTime: lastUpdate, + LastUpdateReason: updateReasonOutsideWindow, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + LastUpdateTime: lastUpdate, + LastUpdateReason: updateReasonOutsideWindow, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "done -> active", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + LastUpdateTime: lastUpdate, + StartTime: lastUpdate, + LastUpdateReason: updateReasonOutsideWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(clock.Now()), + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "active -> active", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: lastUpdate, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: lastUpdate, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "active -> done", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: lastUpdate, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + StartTime: lastUpdate, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonOutsideWindow, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "rolledback is a dead end", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName + "-in-maintenance", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + LastUpdateTime: lastUpdate, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: groupName + "-out-of-maintenance", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + LastUpdateTime: lastUpdate, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName + "-in-maintenance", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + LastUpdateTime: lastUpdate, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: groupName + "-out-of-maintenance", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + LastUpdateTime: lastUpdate, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + { + name: "mix of everything", + initialState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: "new group should start", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateTime: lastUpdate, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: "done group should start", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + LastUpdateTime: lastUpdate, + StartTime: lastUpdate, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: "rolledback group should do nothing", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + LastUpdateTime: lastUpdate, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: "old group should stop", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + LastUpdateTime: lastUpdate, + StartTime: lastUpdate, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + expectedState: []*autoupdate.AutoUpdateAgentRolloutStatusGroup{ + { + Name: "new group should start", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(clock.Now()), + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: "done group should start", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + StartTime: timestamppb.New(clock.Now()), + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonInWindow, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: "rolledback group should do nothing", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK, + LastUpdateTime: lastUpdate, + ConfigDays: canStartToday, + ConfigStartHour: matchingStartHour, + }, + { + Name: "old group should stop", + State: autoupdate.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + StartTime: lastUpdate, + LastUpdateTime: timestamppb.New(clock.Now()), + LastUpdateReason: updateReasonOutsideWindow, + ConfigDays: cannotStartToday, + ConfigStartHour: matchingStartHour, + }, + }, + }, + } + + spec := &autoupdate.AutoUpdateAgentRolloutSpec{ + MaintenanceWindowDuration: durationpb.New(time.Hour), + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + status := &autoupdate.AutoUpdateAgentRolloutStatus{ + Groups: tt.initialState, + State: 0, + StartTime: tt.rolloutStartTime, + } + err := strategy.progressRollout(ctx, spec, status, clock.Now()) + require.NoError(t, err) + // We use require.Equal instead of Elements match because group order matters. + // It's not super important for time-based, but is crucial for halt-on-error. + // So it's better to be more conservative and validate order never changes for + // both strategies. + require.Equal(t, tt.expectedState, status.Groups) + }) + } +} diff --git a/lib/autoupdate/rolloutcontroller/reconciler.go b/lib/autoupdate/rolloutcontroller/reconciler.go deleted file mode 100644 index 78989c4ec4a6b..0000000000000 --- a/lib/autoupdate/rolloutcontroller/reconciler.go +++ /dev/null @@ -1,235 +0,0 @@ -/* - * Teleport - * Copyright (C) 2024 Gravitational, Inc. - * - * This program is free software: you can redistribute it and/or modify - * it under the terms of the GNU Affero General Public License as published by - * the Free Software Foundation, either version 3 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU Affero General Public License for more details. - * - * You should have received a copy of the GNU Affero General Public License - * along with this program. If not, see . - */ - -package rolloutcontroller - -import ( - "context" - "log/slog" - "sync" - "time" - - "github.com/gravitational/trace" - - "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" - update "github.com/gravitational/teleport/api/types/autoupdate" -) - -const ( - reconciliationTimeout = 30 * time.Second - defaultConfigMode = update.AgentsUpdateModeEnabled - defaultStrategy = update.AgentsStrategyHaltOnError - maxConflictRetry = 3 -) - -// Reconciler reconciles the AutoUpdateAgentRollout singleton based on the content of the AutoUpdateVersion and -// AutoUpdateConfig singletons. This reconciler is not based on the services.GenericReconciler because: -// - we reconcile 2 resources with one -// - both input and output are singletons, we don't need the multi resource logic nor stream/paginated APIs -type Reconciler struct { - clt Client - log *slog.Logger - - // mutex ensures we only run one reconciliation at a time - mutex sync.Mutex -} - -// Reconcile the AutoUpdateAgentRollout singleton. The reconciliation can fail because of a conflict (multiple auths -// are racing), in this case we retry the reconciliation immediately. -func (r *Reconciler) Reconcile(ctx context.Context) error { - r.mutex.Lock() - defer r.mutex.Unlock() - - ctx, cancel := context.WithTimeout(ctx, reconciliationTimeout) - defer cancel() - tries := 0 - var err error - for tries < maxConflictRetry { - tries++ - select { - case <-ctx.Done(): - return ctx.Err() - default: - err = r.tryReconcile(ctx) - switch { - case err == nil: - return nil - case trace.IsCompareFailed(err), trace.IsNotFound(err): - // The resource changed since we last saw it - // We must have raced against another auth - // Let's retry the reconciliation - r.log.DebugContext(ctx, "retrying reconciliation", "error", err) - default: - // error is non-nil and non-retryable - return trace.Wrap(err, "failed to reconcile rollout") - } - } - } - return trace.CompareFailed("compare failed, tried %d times, last error: %s", tries, err) -} - -// tryReconcile tries to reconcile the AutoUpdateAgentRollout singleton. -// This function should be nilpotent if the AutoUpdateAgentRollout is already up-to-date. -// The creation/update/deletion can fail with a trace.CompareFailedError or trace.NotFoundError -// if the resource change while we were computing it. -// The caller must handle those error and retry the reconciliation. -func (r *Reconciler) tryReconcile(ctx context.Context) error { - // get autoupdate_config - var config *autoupdate.AutoUpdateConfig - if c, err := r.clt.GetAutoUpdateConfig(ctx); err == nil { - config = c - } else if !trace.IsNotFound(err) { - return trace.Wrap(err, "getting autoupdate_config") - } - - // get autoupdate_version - var version *autoupdate.AutoUpdateVersion - if v, err := r.clt.GetAutoUpdateVersion(ctx); err == nil { - version = v - } else if !trace.IsNotFound(err) { - return trace.Wrap(err, "getting autoupdate version") - } - - // get autoupdate_agent_rollout - rolloutExists := true - existingRollout, err := r.clt.GetAutoUpdateAgentRollout(ctx) - if err != nil && !trace.IsNotFound(err) { - return trace.Wrap(err, "getting autoupdate_agent_rollout") - } - if trace.IsNotFound(err) { - // rollout doesn't exist yet, we'll need to call Create instead of Update. - rolloutExists = false - } - - // if autoupdate_version does not exist or does not contain spec.agents, we should not configure a rollout - if version.GetSpec().GetAgents() == nil { - if !rolloutExists { - // the rollout doesn't exist, nothing to do - return nil - } - // the rollout exists, we must delete it - return r.clt.DeleteAutoUpdateAgentRollout(ctx) - } - - // compute what the spec should look like - newSpec, err := r.buildRolloutSpec(config.GetSpec().GetAgents(), version.GetSpec().GetAgents()) - if err != nil { - return trace.Wrap(err, "mutating rollout") - } - - // if there are no existing rollout, we create a new one - if !rolloutExists { - rollout, err := update.NewAutoUpdateAgentRollout(newSpec) - if err != nil { - return trace.Wrap(err, "validating new rollout") - } - _, err = r.clt.CreateAutoUpdateAgentRollout(ctx, rollout) - return trace.Wrap(err, "creating rollout") - } - - // there was an existing rollout, we must figure if something changed - specChanged := existingRollout.GetSpec().GetStartVersion() != newSpec.GetStartVersion() || - existingRollout.GetSpec().GetTargetVersion() != newSpec.GetTargetVersion() || - existingRollout.GetSpec().GetAutoupdateMode() != newSpec.GetAutoupdateMode() || - existingRollout.GetSpec().GetStrategy() != newSpec.GetStrategy() || - existingRollout.GetSpec().GetSchedule() != newSpec.GetSchedule() - - // TODO: reconcile the status here when we'll add group support. - // Even if the spec does not change, we might still have to update the status: - // - sync groups with the ones from the user config - // - progress the rollout across groups - - // if nothing changed, no need to update the resource - if !specChanged { - r.log.DebugContext(ctx, "rollout unchanged") - return nil - } - - // something changed, we replace the old spec with the new one, validate and update the resource - // we don't create a new resource to keep the revision ID and - existingRollout.Spec = newSpec - err = update.ValidateAutoUpdateAgentRollout(existingRollout) - if err != nil { - return trace.Wrap(err, "validating mutated rollout") - } - _, err = r.clt.UpdateAutoUpdateAgentRollout(ctx, existingRollout) - return trace.Wrap(err, "updating rollout") -} - -func (r *Reconciler) buildRolloutSpec(config *autoupdate.AutoUpdateConfigSpecAgents, version *autoupdate.AutoUpdateVersionSpecAgents) (*autoupdate.AutoUpdateAgentRolloutSpec, error) { - // reconcile mode - mode, err := getMode(config.GetMode(), version.GetMode()) - if err != nil { - return nil, trace.Wrap(err, "computing agent update mode") - } - - strategy := config.GetStrategy() - if strategy == "" { - strategy = defaultStrategy - } - - return &autoupdate.AutoUpdateAgentRolloutSpec{ - StartVersion: version.GetStartVersion(), - TargetVersion: version.GetTargetVersion(), - Schedule: version.GetSchedule(), - AutoupdateMode: mode, - Strategy: strategy, - }, nil - -} - -// agentModeCode maps agents mode to integers. -// When config and version modes don't match, the lowest integer takes precedence. -var ( - agentModeCode = map[string]int{ - update.AgentsUpdateModeDisabled: 0, - update.AgentsUpdateModeSuspended: 1, - update.AgentsUpdateModeEnabled: 2, - } - codeToAgentMode = map[int]string{ - 0: update.AgentsUpdateModeDisabled, - 1: update.AgentsUpdateModeSuspended, - 2: update.AgentsUpdateModeEnabled, - } -) - -// getMode merges the agent modes coming from the version and config resources into a single mode. -// "disabled" takes precedence over "suspended", which takes precedence over "enabled". -func getMode(configMode, versionMode string) (string, error) { - if configMode == "" { - configMode = defaultConfigMode - } - if versionMode == "" { - return "", trace.BadParameter("version mode empty") - } - - configCode, ok := agentModeCode[configMode] - if !ok { - return "", trace.BadParameter("unsupported agent config mode: %v", configMode) - } - versionCode, ok := agentModeCode[versionMode] - if !ok { - return "", trace.BadParameter("unsupported agent version mode: %v", versionMode) - } - - // The lowest code takes precedence - if configCode <= versionCode { - return codeToAgentMode[configCode], nil - } - return codeToAgentMode[versionCode], nil -} diff --git a/lib/cache/cache_test.go b/lib/cache/cache_test.go index 2c49f974ee84e..2990af386f059 100644 --- a/lib/cache/cache_test.go +++ b/lib/cache/cache_test.go @@ -4145,9 +4145,9 @@ func newAutoUpdateAgentRollout(t *testing.T) *autoupdate.AutoUpdateAgentRollout r, err := update.NewAutoUpdateAgentRollout(&autoupdate.AutoUpdateAgentRolloutSpec{ StartVersion: "1.2.3", TargetVersion: "2.3.4", - Schedule: "regular", - AutoupdateMode: "enabled", - Strategy: "time-based", + Schedule: update.AgentsScheduleImmediate, + AutoupdateMode: update.AgentsUpdateModeEnabled, + Strategy: update.AgentsStrategyTimeBased, }) require.NoError(t, err) return r diff --git a/lib/config/configuration.go b/lib/config/configuration.go index 8e553f463c995..95d7bf80cdedf 100644 --- a/lib/config/configuration.go +++ b/lib/config/configuration.go @@ -2770,6 +2770,14 @@ func Configure(clf *CommandLineFlags, cfg *servicecfg.Config, legacyAppFlags boo cfg.Proxy.QUICProxyPeering = true } + if rawPeriod := os.Getenv("TELEPORT_UNSTABLE_AGENT_ROLLOUT_SYNC_PERIOD"); rawPeriod != "" { + period, err := time.ParseDuration(rawPeriod) + if err != nil { + return trace.Wrap(err, "invalid agent rollout period %q", rawPeriod) + } + cfg.Auth.AgentRolloutControllerSyncPeriod = period + } + return nil } diff --git a/lib/kubernetestoken/token_source.go b/lib/kube/token/source.go similarity index 98% rename from lib/kubernetestoken/token_source.go rename to lib/kube/token/source.go index 55a506937cc89..8a10c442088fa 100644 --- a/lib/kubernetestoken/token_source.go +++ b/lib/kube/token/source.go @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -package kubernetestoken +package token import ( "strings" diff --git a/lib/kubernetestoken/token_source_test.go b/lib/kube/token/source_test.go similarity index 99% rename from lib/kubernetestoken/token_source_test.go rename to lib/kube/token/source_test.go index 4089017378278..9d3e5fd5a4092 100644 --- a/lib/kubernetestoken/token_source_test.go +++ b/lib/kube/token/source_test.go @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -package kubernetestoken +package token import ( "io/fs" diff --git a/lib/kubernetestoken/token_validator.go b/lib/kube/token/validator.go similarity index 99% rename from lib/kubernetestoken/token_validator.go rename to lib/kube/token/validator.go index c600bfc63c75b..0d88af8d46735 100644 --- a/lib/kubernetestoken/token_validator.go +++ b/lib/kube/token/validator.go @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -package kubernetestoken +package token import ( "context" diff --git a/lib/kubernetestoken/token_validator_test.go b/lib/kube/token/validator_test.go similarity index 99% rename from lib/kubernetestoken/token_validator_test.go rename to lib/kube/token/validator_test.go index 38c6bd77e631e..70d68fddb766d 100644 --- a/lib/kubernetestoken/token_validator_test.go +++ b/lib/kube/token/validator_test.go @@ -16,7 +16,7 @@ * along with this program. If not, see . */ -package kubernetestoken +package token import ( "context" diff --git a/lib/kube/utils/utils.go b/lib/kube/utils/utils.go index f300ff12c2c20..0806453e356f1 100644 --- a/lib/kube/utils/utils.go +++ b/lib/kube/utils/utils.go @@ -32,7 +32,6 @@ import ( "github.com/gravitational/teleport/api/client" "github.com/gravitational/teleport/api/client/proto" "github.com/gravitational/teleport/api/types" - "github.com/gravitational/teleport/lib/automaticupgrades" "github.com/gravitational/teleport/lib/automaticupgrades/version" ) @@ -184,7 +183,7 @@ type Pinger interface { // GetKubeAgentVersion returns a version of the Kube agent appropriate for this Teleport cluster. Used for example when deciding version // for enrolling EKS clusters. -func GetKubeAgentVersion(ctx context.Context, pinger Pinger, clusterFeatures proto.Features, releaseChannels automaticupgrades.Channels) (string, error) { +func GetKubeAgentVersion(ctx context.Context, pinger Pinger, clusterFeatures proto.Features, versionGetter version.Getter) (string, error) { pingResponse, err := pinger.Ping(ctx) if err != nil { return "", trace.Wrap(err) @@ -192,7 +191,7 @@ func GetKubeAgentVersion(ctx context.Context, pinger Pinger, clusterFeatures pro agentVersion := pingResponse.ServerVersion if clusterFeatures.GetAutomaticUpgrades() && clusterFeatures.GetCloud() { - defaultVersion, err := releaseChannels.DefaultVersion(ctx) + defaultVersion, err := versionGetter.GetVersion(ctx) if err == nil { agentVersion = defaultVersion } else if !errors.Is(err, &version.NoNewVersionError{}) { diff --git a/lib/kube/utils/utils_test.go b/lib/kube/utils/utils_test.go index b963c4277c0c4..b7df332c51c07 100644 --- a/lib/kube/utils/utils_test.go +++ b/lib/kube/utils/utils_test.go @@ -80,12 +80,11 @@ func TestGetAgentVersion(t *testing.T) { err := channel.CheckAndSetDefaults() require.NoError(t, err) } - releaseChannels := automaticupgrades.Channels{automaticupgrades.DefaultChannelName: channel} - version, err := GetKubeAgentVersion(ctx, p, tt.clusterFeatures, releaseChannels) + result, err := GetKubeAgentVersion(ctx, p, tt.clusterFeatures, channel) tt.errorAssert(t, err) - require.Equal(t, tt.expectedVersion, version) + require.Equal(t, tt.expectedVersion, result) }) } } diff --git a/lib/service/service.go b/lib/service/service.go index ff7054fc88edf..a7b76ae337c90 100644 --- a/lib/service/service.go +++ b/lib/service/service.go @@ -96,6 +96,7 @@ import ( "github.com/gravitational/teleport/lib/auth/storage" "github.com/gravitational/teleport/lib/authz" "github.com/gravitational/teleport/lib/automaticupgrades" + "github.com/gravitational/teleport/lib/autoupdate/rollout" "github.com/gravitational/teleport/lib/backend" "github.com/gravitational/teleport/lib/backend/dynamo" _ "github.com/gravitational/teleport/lib/backend/etcdbk" @@ -2483,6 +2484,14 @@ func (process *TeleportProcess) initAuthService() error { return trace.Wrap(spiffeFedSyncer.Run(process.GracefulExitContext()), "running SPIFFEFederation Syncer") }) + agentRolloutController, err := rollout.NewController(authServer, logger, process.Clock, cfg.Auth.AgentRolloutControllerSyncPeriod, process.metricsRegistry) + if err != nil { + return trace.Wrap(err, "creating the rollout controller") + } + process.RegisterFunc("auth.autoupdate_agent_rollout_controller", func() error { + return trace.Wrap(agentRolloutController.Run(process.GracefulExitContext()), "running autoupdate_agent_rollout controller") + }) + process.RegisterFunc("auth.server_info", func() error { return trace.Wrap(auth.ReconcileServerInfos(process.GracefulExitContext(), authServer)) }) diff --git a/lib/service/service_test.go b/lib/service/service_test.go index 07f75a004e3d2..52ed5be8af8a4 100644 --- a/lib/service/service_test.go +++ b/lib/service/service_test.go @@ -52,7 +52,9 @@ import ( "github.com/gravitational/teleport" "github.com/gravitational/teleport/api/breaker" + autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" "github.com/gravitational/teleport/api/types" + autoupdate "github.com/gravitational/teleport/api/types/autoupdate" apiutils "github.com/gravitational/teleport/api/utils" "github.com/gravitational/teleport/entitlements" "github.com/gravitational/teleport/lib" @@ -2075,3 +2077,75 @@ func makeTempDir(t *testing.T) string { t.Cleanup(func() { os.RemoveAll(tempDir) }) return tempDir } + +// TestAgentRolloutController validates that the agent rollout controller is started +// when we run the Auth Service. It does so by creating a dummy autoupdate_version resource +// and checking that the corresponding autoupdate_agent_rollout resource is created by the auth. +// If you want to test the reconciliation logic, add tests to the rolloutcontroller package instead. +func TestAgentRolloutController(t *testing.T) { + t.Parallel() + + dataDir := makeTempDir(t) + + cfg := servicecfg.MakeDefaultConfig() + // We use a real clock because too many sevrices are using the clock and it's not possible to accurately wait for + // each one of them to reach the point where they wait for the clock to advance. If we add a WaitUntil(X waiters) + // check, this will break the next time we add a new waiter. + cfg.Clock = clockwork.NewRealClock() + cfg.DataDir = dataDir + cfg.SetAuthServerAddress(utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"}) + cfg.Auth.Enabled = true + cfg.Proxy.Enabled = false + cfg.SSH.Enabled = false + cfg.DebugService.Enabled = false + cfg.Auth.StorageConfig.Params["path"] = dataDir + cfg.Auth.ListenAddr = utils.NetAddr{AddrNetwork: "tcp", Addr: "127.0.0.1:0"} + // Speed up the reconciliation period for testing purposes. + cfg.Auth.AgentRolloutControllerSyncPeriod = 200 * time.Millisecond + cfg.CircuitBreakerConfig = breaker.NoopBreakerConfig() + + process, err := NewTeleport(cfg) + require.NoError(t, err) + + // Test setup: start the Teleport auth and wait for it to beocme ready + require.NoError(t, process.Start()) + + // Test setup: wait for every service to start + ctx, cancel := context.WithTimeout(process.ExitContext(), 30*time.Second) + defer cancel() + for _, eventName := range []string{AuthTLSReady, InstanceReady} { + _, err := process.WaitForEvent(ctx, eventName) + require.NoError(t, err) + } + + // Test cleanup: close the Teleport process and wait for every service to exist before returning. + // This ensures that a service will not make the test fail by writing a file to the temporary directory while it's + // being removed. + t.Cleanup(func() { + require.NoError(t, process.Close()) + require.NoError(t, process.Wait()) + }) + + // Test execution: create the autoupdate_version resource + authServer := process.GetAuthServer() + version, err := autoupdate.NewAutoUpdateVersion(&autoupdatepb.AutoUpdateVersionSpec{ + Agents: &autoupdatepb.AutoUpdateVersionSpecAgents{ + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + Schedule: autoupdate.AgentsScheduleImmediate, + Mode: autoupdate.AgentsUpdateModeEnabled, + }, + }) + require.NoError(t, err) + version, err = authServer.CreateAutoUpdateVersion(ctx, version) + require.NoError(t, err) + + // Test validation: check that a new autoupdate_agent_rollout config was created + require.Eventually(t, func() bool { + rollout, err := authServer.GetAutoUpdateAgentRollout(ctx) + if err != nil { + return false + } + return rollout.Spec.GetTargetVersion() == version.Spec.GetAgents().GetTargetVersion() + }, 5*time.Second, 10*time.Millisecond) +} diff --git a/lib/service/servicecfg/auth.go b/lib/service/servicecfg/auth.go index 1b09042215aed..a8fcfcd57b8f2 100644 --- a/lib/service/servicecfg/auth.go +++ b/lib/service/servicecfg/auth.go @@ -20,6 +20,7 @@ package servicecfg import ( "slices" + "time" "github.com/dustin/go-humanize" "github.com/gravitational/trace" @@ -116,6 +117,12 @@ type AuthConfig struct { // AccessMonitoring configures access monitoring. AccessMonitoring *AccessMonitoringOptions + + // AgentRolloutControllerSyncPeriod controls the period between two + // reconciliations of the agent rollout controller. This value is jittered. + // Empty value means the controller uses its default. + // Used in tests. + AgentRolloutControllerSyncPeriod time.Duration } // AccessMonitoringOptions configures access monitoring. diff --git a/lib/services/presets.go b/lib/services/presets.go index e7ea741754228..4c3a3854e4bc4 100644 --- a/lib/services/presets.go +++ b/lib/services/presets.go @@ -209,6 +209,7 @@ func NewPresetEditorRole() types.Role { types.NewRule(types.KindAutoUpdateConfig, RW()), types.NewRule(types.KindGitServer, RW()), types.NewRule(types.KindWorkloadIdentityX509Revocation, RW()), + types.NewRule(types.KindAutoUpdateAgentRollout, RO()), }, }, }, diff --git a/lib/srv/discovery/kube_integration_watcher.go b/lib/srv/discovery/kube_integration_watcher.go index 74a913bd61c9e..fa599da8cd4c4 100644 --- a/lib/srv/discovery/kube_integration_watcher.go +++ b/lib/srv/discovery/kube_integration_watcher.go @@ -20,8 +20,9 @@ package discovery import ( "context" - "fmt" "maps" + "net/url" + "path" "slices" "strings" "sync" @@ -30,10 +31,13 @@ import ( "github.com/gravitational/trace" "google.golang.org/protobuf/types/known/timestamppb" + "github.com/gravitational/teleport" + "github.com/gravitational/teleport/api/client/webclient" integrationv1 "github.com/gravitational/teleport/api/gen/proto/go/teleport/integration/v1" usertasksv1 "github.com/gravitational/teleport/api/gen/proto/go/teleport/usertasks/v1" "github.com/gravitational/teleport/api/types" "github.com/gravitational/teleport/lib/automaticupgrades" + "github.com/gravitational/teleport/lib/automaticupgrades/version" kubeutils "github.com/gravitational/teleport/lib/kube/utils" "github.com/gravitational/teleport/lib/srv/discovery/common" libslices "github.com/gravitational/teleport/lib/utils/slices" @@ -60,10 +64,24 @@ func (s *Server) startKubeIntegrationWatchers() error { } proxyPublicAddr := pingResponse.GetProxyPublicAddr() - releaseChannels := automaticupgrades.Channels{automaticupgrades.DefaultChannelName: &automaticupgrades.Channel{ - ForwardURL: fmt.Sprintf("https://%s/webapi/automaticupgrades/channel/%s", proxyPublicAddr, automaticupgrades.DefaultChannelName)}} - if err := releaseChannels.CheckAndSetDefaults(); err != nil { - return trace.Wrap(err) + var versionGetter version.Getter + if proxyPublicAddr == "" { + // If there are no proxy services running, we might fail to get the proxy URL and build a client. + // In this case we "gracefully" fallback to our own version. + // This is not supposed to happen outside of tests as the discovery service must join via a proxy. + s.Log.WarnContext(s.ctx, + "Failed to determine proxy public address, agents will install our own Teleport version instead of the one advertised by the proxy.", + "version", teleport.Version) + versionGetter = version.NewStaticGetter(teleport.Version, nil) + } else { + versionGetter, err = versionGetterForProxy(s.ctx, proxyPublicAddr) + if err != nil { + s.Log.WarnContext(s.ctx, + "Failed to build a version client, falling back to Discovery service Teleport version.", + "error", err, + "version", teleport.Version) + versionGetter = version.NewStaticGetter(teleport.Version, nil) + } } watcher, err := common.NewWatcher(s.ctx, common.WatcherConfig{ @@ -108,7 +126,7 @@ func (s *Server) startKubeIntegrationWatchers() error { continue } - agentVersion, err := s.getKubeAgentVersion(releaseChannels) + agentVersion, err := s.getKubeAgentVersion(versionGetter) if err != nil { s.Log.WarnContext(s.ctx, "Could not get agent version to enroll EKS clusters", "error", err) continue @@ -305,8 +323,8 @@ func (s *Server) enrollEKSClusters(region, integration, discoveryConfigName stri } } -func (s *Server) getKubeAgentVersion(releaseChannels automaticupgrades.Channels) (string, error) { - return kubeutils.GetKubeAgentVersion(s.ctx, s.AccessPoint, s.ClusterFeatures(), releaseChannels) +func (s *Server) getKubeAgentVersion(versionGetter version.Getter) (string, error) { + return kubeutils.GetKubeAgentVersion(s.ctx, s.AccessPoint, s.ClusterFeatures(), versionGetter) } type IntegrationFetcher interface { @@ -366,3 +384,29 @@ func (s *Server) getKubeIntegrationFetchers() []common.Fetcher { func (s *Server) getKubeNonIntegrationFetchers() []common.Fetcher { return s.getKubeFetchers(false) } + +func versionGetterForProxy(ctx context.Context, proxyPublicAddr string) (version.Getter, error) { + proxyClt, err := webclient.NewReusableClient(&webclient.Config{ + Context: ctx, + ProxyAddr: proxyPublicAddr, + }) + if err != nil { + return nil, trace.Wrap(err, "failed to build proxy client") + } + + baseURL := &url.URL{ + Scheme: "https", + Host: proxyPublicAddr, + RawPath: path.Join("/webapi/automaticupgrades/channel", automaticupgrades.DefaultChannelName), + } + if err != nil { + return nil, trace.Wrap(err, "crafting the channel base URL (this is a bug)") + } + + return version.FailoverGetter{ + // We try getting the version via the new webapi + version.NewProxyVersionGetter(proxyClt), + // If this is not implemented, we fallback to the release channels + version.NewBasicHTTPVersionGetter(baseURL), + }, nil +} diff --git a/lib/srv/server/installer/autodiscover.go b/lib/srv/server/installer/autodiscover.go index 744acbd737ec4..aa79d796391eb 100644 --- a/lib/srv/server/installer/autodiscover.go +++ b/lib/srv/server/installer/autodiscover.go @@ -226,6 +226,8 @@ func (ani *AutoDiscoverNodeInstaller) Install(ctx context.Context) error { ani.Logger.InfoContext(ctx, "Detected cloud provider", "cloud", imdsClient.GetType()) // Check if teleport is already installed and install it, if it's absent. + // In the new autoupdate install flow, teleport-update should have already + // taken care of installing teleport. if _, err := os.Stat(ani.binariesLocation.Teleport); err != nil { ani.Logger.InfoContext(ctx, "Installing teleport") if err := ani.installTeleportFromRepo(ctx); err != nil { diff --git a/lib/srv/server/installer/defaultinstallers.go b/lib/srv/server/installer/defaultinstallers.go index c5c2642903bb8..d153248e224ed 100644 --- a/lib/srv/server/installer/defaultinstallers.go +++ b/lib/srv/server/installer/defaultinstallers.go @@ -26,11 +26,46 @@ import ( "github.com/gravitational/teleport/lib/web/scripts/oneoff" ) -// DefaultInstaller represents the default installer script provided by teleport. -var DefaultInstaller = oneoffScriptToDefaultInstaller() +const ( + scriptShebangAndSetOptions = `#!/usr/bin/env sh +set -euo pipefail` + execGenericInstallScript = ` +INSTALL_SCRIPT_URL="https://{{.PublicProxyAddr}}/scripts/install.sh" -func oneoffScriptToDefaultInstaller() *types.InstallerV1 { - argsList := []string{ +echo "Offloading the installation part to the generic Teleport install script hosted at: $INSTALL_SCRIPT_URL" + +TEMP_INSTALLER_SCRIPT="$(mktemp)" +curl -sSf "$INSTALL_SCRIPT_URL" -o "$TEMP_INSTALLER_SCRIPT" + +chmod +x "$TEMP_INSTALLER_SCRIPT" + +sudo "$TEMP_INSTALLER_SCRIPT" || (echo "The install script ($TEMP_INSTALLER_SCRIPT) returned a non-zero exit code" && exit 1) +rm "$TEMP_INSTALLER_SCRIPT"` +) + +// LegacyDefaultInstaller represents the default installer script provided by teleport. +var ( + // LegacyDefaultInstaller uses oneoff.sh to download the Teleport tarball and run `teleport install`. + // The Teleport install command handles both Teleport installation and agent configuration. + LegacyDefaultInstaller = oneoffScriptToDefaultInstaller() + + // NewDefaultInstaller installs Teleport by calling the standard "/scripts/install.sh" route on the proxy. + // After successfully installing Teleport, it will invoke the same `teleport install` + // command as the LegacyDefaultInstaller which will only take care of configuring Teleport. + NewDefaultInstaller = types.MustNewInstallerV1( + installers.InstallerScriptName, + strings.Join( + []string{scriptShebangAndSetOptions, execGenericInstallScript, configureTeleport}, + "\n\n", + ), + ) + configureTeleport = ` +echo "Configuring the Teleport agent" + +set +x +sudo teleport ` + strings.Join(argsList, " ") + + argsList = []string{ "install", "autodiscover-node", "--public-proxy-addr={{.PublicProxyAddr}}", "--teleport-package={{.TeleportPackage}}", @@ -38,9 +73,11 @@ func oneoffScriptToDefaultInstaller() *types.InstallerV1 { "--auto-upgrade={{.AutomaticUpgrades}}", "--azure-client-id={{.AzureClientID}}", } +) +func oneoffScriptToDefaultInstaller() *types.InstallerV1 { script, err := oneoff.BuildScript(oneoff.OneOffScriptParams{ - TeleportArgs: strings.Join(argsList, " "), + EntrypointArgs: strings.Join(argsList, " "), SuccessMessage: "Teleport is installed and running.", TeleportCommandPrefix: oneoff.PrefixSUDO, }) diff --git a/lib/srv/server/installer/defaultinstallers_test.go b/lib/srv/server/installer/defaultinstallers_test.go new file mode 100644 index 0000000000000..8fee1109cac17 --- /dev/null +++ b/lib/srv/server/installer/defaultinstallers_test.go @@ -0,0 +1,77 @@ +/* + * Teleport + * Copyright (C) 2025 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package installer_test + +import ( + "bytes" + "testing" + "text/template" + + "github.com/stretchr/testify/require" + + "github.com/gravitational/teleport/api/types/installers" + "github.com/gravitational/teleport/lib/srv/server/installer" +) + +const defaultInstallerSnapshot = `#!/usr/bin/env sh +set -euo pipefail + + +INSTALL_SCRIPT_URL="https://teleport.example.com:443/scripts/install.sh" + +echo "Offloading the installation part to the generic Teleport install script hosted at: $INSTALL_SCRIPT_URL" + +TEMP_INSTALLER_SCRIPT="$(mktemp)" +curl -sSf "$INSTALL_SCRIPT_URL" -o "$TEMP_INSTALLER_SCRIPT" + +chmod +x "$TEMP_INSTALLER_SCRIPT" + +sudo "$TEMP_INSTALLER_SCRIPT" || (echo "The install script ($TEMP_INSTALLER_SCRIPT) returned a non-zero exit code" && exit 1) +rm "$TEMP_INSTALLER_SCRIPT" + + +echo "Configuring the Teleport agent" + +set +x +sudo teleport install autodiscover-node --public-proxy-addr=teleport.example.com:443 --teleport-package=teleport-ent --repo-channel=stable/cloud --auto-upgrade=true --azure-client-id=` + +// TestNewDefaultInstaller is a minimal +func TestNewDefaultInstaller(t *testing.T) { + // Test setup. + inputs := installers.Template{ + PublicProxyAddr: "teleport.example.com:443", + MajorVersion: "v16", + TeleportPackage: "teleport-ent", + RepoChannel: "stable/cloud", + AutomaticUpgrades: "true", + AzureClientID: "", + } + + // Test execution: check that the template can be parsed. + script := installer.NewDefaultInstaller.GetScript() + installationTemplate, err := template.New("").Parse(script) + require.NoError(t, err) + + // Test execution: render template. + buf := &bytes.Buffer{} + require.NoError(t, installationTemplate.Execute(buf, inputs)) + + // Test validation: rendered template must look like the snapshot. + require.Equal(t, defaultInstallerSnapshot, buf.String()) +} diff --git a/lib/utils/teleportassets/teleportassets.go b/lib/utils/teleportassets/teleportassets.go index e396798a5477b..302977a8bd9ef 100644 --- a/lib/utils/teleportassets/teleportassets.go +++ b/lib/utils/teleportassets/teleportassets.go @@ -28,9 +28,9 @@ import ( ) const ( - // teleportReleaseCDN is the Teleport CDN URL for release builds. + // TeleportReleaseCDN is the Teleport CDN URL for release builds. // This can be used to download the Teleport binary for release builds. - teleportReleaseCDN = "https://cdn.teleport.dev" + TeleportReleaseCDN = "https://cdn.teleport.dev" // teleportPreReleaseCDN is the Teleport CDN URL for pre-release builds. // This can be used to download the Teleport binary for pre-release builds. teleportPreReleaseCDN = "https://cdn.cloud.gravitational.io" @@ -48,7 +48,22 @@ func cdnBaseURL(version semver.Version) string { if version.PreRelease != "" { return teleportPreReleaseCDN } - return teleportReleaseCDN + return TeleportReleaseCDN +} + +// CDNBaseURLForVersion returns the CDN base URL for a given artifact version. +// This function ensures that a Teleport production build cannot download from +// the pre-release CDN while Teleport pre-release builds can download both form +// the production and pre-release CDN. +func CDNBaseURLForVersion(artifactVersion *semver.Version) string { + return cdnBaseURLForVersion(artifactVersion, teleport.SemVersion) +} + +func cdnBaseURLForVersion(artifactVersion, teleportVersion *semver.Version) string { + if teleportVersion.PreRelease != "" && artifactVersion.PreRelease != "" { + return teleportPreReleaseCDN + } + return TeleportReleaseCDN } const ( diff --git a/lib/utils/teleportassets/teleportassets_test.go b/lib/utils/teleportassets/teleportassets_test.go index 22dee4c13061c..ffc3a9471891c 100644 --- a/lib/utils/teleportassets/teleportassets_test.go +++ b/lib/utils/teleportassets/teleportassets_test.go @@ -80,3 +80,50 @@ func TestDistrolessTeleportImageRepo(t *testing.T) { }) } } + +func Test_cdnBaseURLForVersion(t *testing.T) { + t.Parallel() + tests := []struct { + name string + artifactVersion string + teleportVersion string + want string + }{ + { + name: "both official releases", + artifactVersion: "16.3.2", + teleportVersion: "16.1.0", + want: TeleportReleaseCDN, + }, + { + name: "both pre-releases", + artifactVersion: "16.3.2-dev.1", + teleportVersion: "16.1.0-foo.25", + want: teleportPreReleaseCDN, + }, + { + name: "official teleport should not be able to install pre-release artifacts", + artifactVersion: "16.3.2-dev.1", + teleportVersion: "16.1.0", + want: TeleportReleaseCDN, + }, + { + name: "pre-release teleport should be able to install official artifacts", + artifactVersion: "16.3.2", + teleportVersion: "16.1.0-dev.1", + want: TeleportReleaseCDN, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test setup: parse version. + av, err := semver.NewVersion(tt.artifactVersion) + require.NoError(t, err) + tv, err := semver.NewVersion(tt.teleportVersion) + require.NoError(t, err) + + // Test execution and validation. + require.Equal(t, tt.want, cdnBaseURLForVersion(av, tv)) + }) + } +} diff --git a/lib/web/apiserver.go b/lib/web/apiserver.go index 899c35e6441d1..c3533a49c5e66 100644 --- a/lib/web/apiserver.go +++ b/lib/web/apiserver.go @@ -60,19 +60,16 @@ import ( "google.golang.org/protobuf/types/known/timestamppb" "github.com/gravitational/teleport" - "github.com/gravitational/teleport/api" apiclient "github.com/gravitational/teleport/api/client" "github.com/gravitational/teleport/api/client/proto" "github.com/gravitational/teleport/api/client/webclient" "github.com/gravitational/teleport/api/constants" apidefaults "github.com/gravitational/teleport/api/defaults" - autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" mfav1 "github.com/gravitational/teleport/api/gen/proto/go/teleport/mfa/v1" notificationsv1 "github.com/gravitational/teleport/api/gen/proto/go/teleport/notifications/v1" "github.com/gravitational/teleport/api/mfa" apitracing "github.com/gravitational/teleport/api/observability/tracing" "github.com/gravitational/teleport/api/types" - "github.com/gravitational/teleport/api/types/autoupdate" apievents "github.com/gravitational/teleport/api/types/events" "github.com/gravitational/teleport/api/types/installers" "github.com/gravitational/teleport/api/utils/keys" @@ -139,6 +136,8 @@ const ( // This cache is here to protect against accidental or intentional DDoS, the TTL must be low to quickly reflect // cluster configuration changes. findEndpointCacheTTL = 10 * time.Second + // DefaultAgentUpdateJitterSeconds is the default jitter agents should wait before updating. + DefaultAgentUpdateJitterSeconds = 60 ) // healthCheckAppServerFunc defines a function used to perform a health check @@ -187,6 +186,9 @@ type Handler struct { // rate-limits, each call must cause minimal work. The cached answer can be modulated after, for example if the // caller specified its Automatic Updates UUID or group. findEndpointCache *utils.FnCache + + // clusterMaintenanceConfig is used to cache the cluster maintenance config from the AUth Service. + clusterMaintenanceConfigCache *utils.FnCache } // HandlerOption is a functional argument - an option that can be passed @@ -504,6 +506,18 @@ func NewHandler(cfg Config, opts ...HandlerOption) (*APIHandler, error) { } h.findEndpointCache = findCache + // We create the cache after applying the options to make sure we use the fake clock if it was passed. + cmcCache, err := utils.NewFnCache(utils.FnCacheConfig{ + TTL: findEndpointCacheTTL, + Clock: h.clock, + Context: cfg.Context, + ReloadOnErr: false, + }) + if err != nil { + return nil, trace.Wrap(err, "creating /find cache") + } + h.clusterMaintenanceConfigCache = cmcCache + sessionLingeringThreshold := cachedSessionLingeringThreshold if cfg.CachedSessionLingeringThreshold != nil { sessionLingeringThreshold = *cfg.CachedSessionLingeringThreshold @@ -899,6 +913,11 @@ func (h *Handler) bindDefaultEndpoints() { h.GET("/webapi/tokens", h.WithAuth(h.getTokens)) h.DELETE("/webapi/tokens", h.WithAuth(h.deleteToken)) + // install script, the ':token' wildcard is a hack to make the router happy and support + // the token-less route "/scripts/install.sh". + // h.installScriptHandle Will reject any unknown sub-route. + h.GET("/scripts/:token", h.WithHighLimiter(h.installScriptHandle)) + // join scripts h.GET("/scripts/:token/install-node.sh", h.WithLimiter(h.getNodeJoinScriptHandle)) h.GET("/scripts/:token/install-app.sh", h.WithLimiter(h.getAppJoinScriptHandle)) @@ -1125,7 +1144,7 @@ func (h *Handler) bindDefaultEndpoints() { // Implements the agent version server. // Channel can contain "/", hence the use of a catch-all parameter - h.GET("/webapi/automaticupgrades/channel/*request", h.WithUnauthenticatedHighLimiter(h.automaticUpgrades)) + h.GET("/webapi/automaticupgrades/channel/*request", h.WithUnauthenticatedHighLimiter(h.automaticUpgrades109)) // GET Machine ID bot by name h.GET("/webapi/sites/:site/machine-id/bot/:name", h.WithClusterAuth(h.getBot)) @@ -1606,6 +1625,8 @@ func (h *Handler) ping(w http.ResponseWriter, r *http.Request, p httprouter.Para return nil, trace.Wrap(err) } + group := r.URL.Query().Get(webclient.AgentUpdateGroupParameter) + return webclient.PingResponse{ Auth: authSettings, Proxy: *proxyConfig, @@ -1613,13 +1634,21 @@ func (h *Handler) ping(w http.ResponseWriter, r *http.Request, p httprouter.Para MinClientVersion: teleport.MinClientVersion, ClusterName: h.auth.clusterName, AutomaticUpgrades: pr.ServerFeatures.GetAutomaticUpgrades(), - AutoUpdate: h.automaticUpdateSettings(r.Context()), + AutoUpdate: h.automaticUpdateSettings184(r.Context(), group, "" /* updater UUID */), + Edition: modules.GetModules().BuildType(), + FIPS: modules.IsBoringBinary(), }, nil } func (h *Handler) find(w http.ResponseWriter, r *http.Request, p httprouter.Params) (interface{}, error) { + group := r.URL.Query().Get(webclient.AgentUpdateGroupParameter) + cacheKey := "find" + if group != "" { + cacheKey += "-" + group + } + // cache the generic answer to avoid doing work for each request - resp, err := utils.FnCacheGet[*webclient.PingResponse](r.Context(), h.findEndpointCache, "find", func(ctx context.Context) (*webclient.PingResponse, error) { + resp, err := utils.FnCacheGet[*webclient.PingResponse](r.Context(), h.findEndpointCache, cacheKey, func(ctx context.Context) (*webclient.PingResponse, error) { proxyConfig, err := h.cfg.ProxySettings.GetProxySettings(ctx) if err != nil { return nil, trace.Wrap(err) @@ -1636,33 +1665,12 @@ func (h *Handler) find(w http.ResponseWriter, r *http.Request, p httprouter.Para ServerVersion: teleport.Version, MinClientVersion: teleport.MinClientVersion, ClusterName: h.auth.clusterName, - AutoUpdate: h.automaticUpdateSettings(ctx), + Edition: modules.GetModules().BuildType(), + FIPS: modules.IsBoringBinary(), + AutoUpdate: h.automaticUpdateSettings184(ctx, group, "" /* updater UUID */), }, nil }) - if err != nil { - return nil, trace.Wrap(err) - } - return resp, nil -} - -// TODO: add the request as a parameter when we'll need to modulate the content based on the UUID and group -func (h *Handler) automaticUpdateSettings(ctx context.Context) webclient.AutoUpdateSettings { - autoUpdateConfig, err := h.cfg.AccessPoint.GetAutoUpdateConfig(ctx) - // TODO(vapopov) DELETE IN v18.0.0 check of IsNotImplemented, must be backported to all latest supported versions. - if err != nil && !trace.IsNotFound(err) && !trace.IsNotImplemented(err) { - h.logger.ErrorContext(ctx, "failed to receive AutoUpdateConfig", "error", err) - } - - autoUpdateVersion, err := h.cfg.AccessPoint.GetAutoUpdateVersion(ctx) - // TODO(vapopov) DELETE IN v18.0.0 check of IsNotImplemented, must be backported to all latest supported versions. - if err != nil && !trace.IsNotFound(err) && !trace.IsNotImplemented(err) { - h.logger.ErrorContext(ctx, "failed to receive AutoUpdateVersion", "error", err) - } - - return webclient.AutoUpdateSettings{ - ToolsAutoUpdate: getToolsAutoUpdate(autoUpdateConfig), - ToolsVersion: getToolsVersion(autoUpdateVersion), - } + return resp, err } func (h *Handler) pingWithConnector(w http.ResponseWriter, r *http.Request, p httprouter.Params) (interface{}, error) { @@ -1863,9 +1871,13 @@ func (h *Handler) getWebConfig(w http.ResponseWriter, r *http.Request, p httprou automaticUpgradesEnabled := clusterFeatures.GetAutomaticUpgrades() var automaticUpgradesTargetVersion string if automaticUpgradesEnabled { - automaticUpgradesTargetVersion, err = h.cfg.AutomaticUpgradesChannels.DefaultVersion(r.Context()) + const group, updaterUUID = "", "" + agentVersion, err := h.autoUpdateAgentVersion(r.Context(), group, updaterUUID) if err != nil { - h.log.WithError(err).Error("Cannot read target version") + h.logger.ErrorContext(r.Context(), "Cannot read autoupdate target version", "error", err) + } else { + // agentVersion doesn't have the leading "v" which is expected here. + automaticUpgradesTargetVersion = fmt.Sprintf("v%s", agentVersion) } } @@ -2260,7 +2272,7 @@ func (h *Handler) installer(w http.ResponseWriter, r *http.Request, p httprouter // https://updates.releases.teleport.dev/v1/stable/cloud/version installUpdater := automaticUpgrades(*ping.ServerFeatures) if installUpdater { - repoChannel = stableCloudChannelRepo + repoChannel = automaticupgrades.DefaultCloudChannelName } azureClientID := r.URL.Query().Get("azure-client-id") @@ -5269,23 +5281,3 @@ func readEtagFromAppHash(fs http.FileSystem) (string, error) { return etag, nil } - -func getToolsAutoUpdate(config *autoupdatepb.AutoUpdateConfig) bool { - // If we can't get the AU config or if AUs are not configured, we default to "disabled". - // This ensures we fail open and don't accidentally update agents if something is going wrong. - // If we want to enable AUs by default, it would be better to create a default "autoupdate_config" resource - // than changing this logic. - if config.GetSpec().GetTools() != nil { - return config.GetSpec().GetTools().GetMode() == autoupdate.ToolsUpdateModeEnabled - } - return false -} - -func getToolsVersion(version *autoupdatepb.AutoUpdateVersion) string { - // If we can't get the AU version or tools AU version is not specified, we default to the current proxy version. - // This ensures we always advertise a version compatible with the cluster. - if version.GetSpec().GetTools() == nil { - return api.Version - } - return version.GetSpec().GetTools().GetTargetVersion() -} diff --git a/lib/web/apiserver_ping_test.go b/lib/web/apiserver_ping_test.go index bee504fe7b20f..84e073ca7ae87 100644 --- a/lib/web/apiserver_ping_test.go +++ b/lib/web/apiserver_ping_test.go @@ -299,26 +299,69 @@ func TestPing_autoUpdateResources(t *testing.T) { name string config *autoupdatev1pb.AutoUpdateConfigSpec version *autoupdatev1pb.AutoUpdateVersionSpec + rollout *autoupdatev1pb.AutoUpdateAgentRolloutSpec cleanup bool expected webclient.AutoUpdateSettings }{ { name: "resources not defined", expected: webclient.AutoUpdateSettings{ - ToolsVersion: api.Version, - ToolsAutoUpdate: false, + ToolsVersion: api.Version, + ToolsAutoUpdate: false, + AgentUpdateJitterSeconds: DefaultAgentUpdateJitterSeconds, + AgentAutoUpdate: false, + AgentVersion: api.Version, }, }, { - name: "enable auto update", + name: "enable tools auto update", config: &autoupdatev1pb.AutoUpdateConfigSpec{ Tools: &autoupdatev1pb.AutoUpdateConfigSpecTools{ Mode: autoupdate.ToolsUpdateModeEnabled, }, }, expected: webclient.AutoUpdateSettings{ - ToolsAutoUpdate: true, - ToolsVersion: api.Version, + ToolsAutoUpdate: true, + ToolsVersion: api.Version, + AgentUpdateJitterSeconds: DefaultAgentUpdateJitterSeconds, + AgentAutoUpdate: false, + AgentVersion: api.Version, + }, + cleanup: true, + }, + { + name: "enable agent auto update, immediate schedule", + rollout: &autoupdatev1pb.AutoUpdateAgentRolloutSpec{ + AutoupdateMode: autoupdate.AgentsUpdateModeEnabled, + Strategy: autoupdate.AgentsStrategyHaltOnError, + Schedule: autoupdate.AgentsScheduleImmediate, + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + }, + expected: webclient.AutoUpdateSettings{ + ToolsVersion: api.Version, + ToolsAutoUpdate: false, + AgentUpdateJitterSeconds: DefaultAgentUpdateJitterSeconds, + AgentAutoUpdate: true, + AgentVersion: "1.2.4", + }, + cleanup: true, + }, + { + name: "agent rollout present but AU mode is disabled", + rollout: &autoupdatev1pb.AutoUpdateAgentRolloutSpec{ + AutoupdateMode: autoupdate.AgentsUpdateModeDisabled, + Strategy: autoupdate.AgentsStrategyHaltOnError, + Schedule: autoupdate.AgentsScheduleImmediate, + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + }, + expected: webclient.AutoUpdateSettings{ + ToolsVersion: api.Version, + ToolsAutoUpdate: false, + AgentUpdateJitterSeconds: DefaultAgentUpdateJitterSeconds, + AgentAutoUpdate: false, + AgentVersion: "1.2.4", }, cleanup: true, }, @@ -327,8 +370,11 @@ func TestPing_autoUpdateResources(t *testing.T) { config: &autoupdatev1pb.AutoUpdateConfigSpec{}, version: &autoupdatev1pb.AutoUpdateVersionSpec{}, expected: webclient.AutoUpdateSettings{ - ToolsVersion: api.Version, - ToolsAutoUpdate: false, + ToolsVersion: api.Version, + ToolsAutoUpdate: false, + AgentUpdateJitterSeconds: DefaultAgentUpdateJitterSeconds, + AgentAutoUpdate: false, + AgentVersion: api.Version, }, cleanup: true, }, @@ -340,8 +386,11 @@ func TestPing_autoUpdateResources(t *testing.T) { }, }, expected: webclient.AutoUpdateSettings{ - ToolsVersion: "1.2.3", - ToolsAutoUpdate: false, + ToolsVersion: "1.2.3", + ToolsAutoUpdate: false, + AgentUpdateJitterSeconds: DefaultAgentUpdateJitterSeconds, + AgentAutoUpdate: false, + AgentVersion: api.Version, }, cleanup: true, }, @@ -358,8 +407,11 @@ func TestPing_autoUpdateResources(t *testing.T) { }, }, expected: webclient.AutoUpdateSettings{ - ToolsAutoUpdate: true, - ToolsVersion: "1.2.3", + ToolsAutoUpdate: true, + ToolsVersion: "1.2.3", + AgentUpdateJitterSeconds: DefaultAgentUpdateJitterSeconds, + AgentAutoUpdate: false, + AgentVersion: api.Version, }, }, { @@ -375,8 +427,11 @@ func TestPing_autoUpdateResources(t *testing.T) { }, }, expected: webclient.AutoUpdateSettings{ - ToolsAutoUpdate: false, - ToolsVersion: "3.2.1", + ToolsAutoUpdate: false, + ToolsVersion: "3.2.1", + AgentUpdateJitterSeconds: DefaultAgentUpdateJitterSeconds, + AgentAutoUpdate: false, + AgentVersion: api.Version, }, }, } @@ -394,6 +449,12 @@ func TestPing_autoUpdateResources(t *testing.T) { _, err = env.server.Auth().UpsertAutoUpdateVersion(ctx, version) require.NoError(t, err) } + if tc.rollout != nil { + rollout, err := autoupdate.NewAutoUpdateAgentRollout(tc.rollout) + require.NoError(t, err) + _, err = env.server.Auth().UpsertAutoUpdateAgentRollout(ctx, rollout) + require.NoError(t, err) + } // expire the fn cache to force the next answer to be fresh for _, proxy := range env.proxies { @@ -412,6 +473,7 @@ func TestPing_autoUpdateResources(t *testing.T) { if tc.cleanup { require.NotErrorIs(t, env.server.Auth().DeleteAutoUpdateConfig(ctx), &trace.NotFoundError{}) require.NotErrorIs(t, env.server.Auth().DeleteAutoUpdateVersion(ctx), &trace.NotFoundError{}) + require.NotErrorIs(t, env.server.Auth().DeleteAutoUpdateAgentRollout(ctx), &trace.NotFoundError{}) } }) } diff --git a/lib/web/apiserver_test.go b/lib/web/apiserver_test.go index 175f0488d9907..724730121abb5 100644 --- a/lib/web/apiserver_test.go +++ b/lib/web/apiserver_test.go @@ -3669,6 +3669,7 @@ func TestKnownWebPathsWithAndWithoutV1Prefix(t *testing.T) { func TestInstallDatabaseScriptGeneration(t *testing.T) { const username = "test-user@example.com" + modules.SetTestModules(t, &modules.TestModules{TestBuildType: modules.BuildCommunity}) // Users should be able to create Tokens even if they can't update them roleTokenCRD, err := types.NewRole(services.RoleNameForUser(username), types.RoleSpecV6{ @@ -8571,9 +8572,9 @@ func createProxy(ctx context.Context, t *testing.T, proxyID string, node *regula }, ) handler.handler.cfg.ProxyKubeAddr = utils.FromAddr(kubeProxyAddr) + handler.handler.cfg.PublicProxyAddr = webServer.Listener.Addr().String() url, err := url.Parse("https://" + webServer.Listener.Addr().String()) require.NoError(t, err) - handler.handler.cfg.PublicProxyAddr = url.String() return &testProxy{ clock: clock, diff --git a/lib/web/autoupdate_common.go b/lib/web/autoupdate_common.go new file mode 100644 index 0000000000000..0daaadaec02ce --- /dev/null +++ b/lib/web/autoupdate_common.go @@ -0,0 +1,245 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package web + +import ( + "context" + "fmt" + "strings" + + "github.com/gravitational/trace" + + autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + "github.com/gravitational/teleport/api/types" + "github.com/gravitational/teleport/api/types/autoupdate" + "github.com/gravitational/teleport/lib/automaticupgrades" + "github.com/gravitational/teleport/lib/utils" +) + +// autoUpdateAgentVersion returns the version the agent should install/update to based on +// its group and updater UUID. +// If the cluster contains an autoupdate_agent_rollout resource from RFD184 it should take precedence. +// If the resource is not there, we fall back to RFD109-style updates with channels +// and maintenance window derived from the cluster_maintenance_config resource. +// Version returned follows semver without the leading "v". +func (h *Handler) autoUpdateAgentVersion(ctx context.Context, group, updaterUUID string) (string, error) { + rollout, err := h.cfg.AccessPoint.GetAutoUpdateAgentRollout(ctx) + if err != nil { + // Fallback to channels if there is no autoupdate_agent_rollout. + if trace.IsNotFound(err) || trace.IsNotImplemented(err) { + return getVersionFromChannel(ctx, h.cfg.AutomaticUpgradesChannels, group) + } + // Something is broken, we don't want to fallback to channels, this would be harmful. + return "", trace.Wrap(err, "getting autoupdate_agent_rollout") + } + + return getVersionFromRollout(rollout, group, updaterUUID) +} + +// handlerVersionGetter is a dummy struct implementing version.Getter by wrapping Handler.autoUpdateAgentVersion. +type handlerVersionGetter struct { + *Handler +} + +// GetVersion implements version.Getter. +func (h *handlerVersionGetter) GetVersion(ctx context.Context) (string, error) { + const group, updaterUUID = "", "" + agentVersion, err := h.autoUpdateAgentVersion(ctx, group, updaterUUID) + if err != nil { + return "", trace.Wrap(err) + } + // We add the leading v required by the version.Getter interface. + return fmt.Sprintf("v%s", agentVersion), nil +} + +// autoUpdateAgentShouldUpdate returns if the agent should update now to based on its group +// and updater UUID. +// If the cluster contains an autoupdate_agent_rollout resource from RFD184 it should take precedence. +// If the resource is not there, we fall back to RFD109-style updates with channels +// and maintenance window derived from the cluster_maintenance_config resource. +func (h *Handler) autoUpdateAgentShouldUpdate(ctx context.Context, group, updaterUUID string, windowLookup bool) (bool, error) { + rollout, err := h.cfg.AccessPoint.GetAutoUpdateAgentRollout(ctx) + if err != nil { + // Fallback to channels if there is no autoupdate_agent_rollout. + if trace.IsNotFound(err) || trace.IsNotImplemented(err) { + // Updaters using the RFD184 API are not aware of maintenance windows + // like RFD109 updaters are. To have both updaters adopt the same behavior + // we must do the CMC window lookup for them. + if windowLookup { + return h.getTriggerFromWindowThenChannel(ctx, group) + } + return getTriggerFromChannel(ctx, h.cfg.AutomaticUpgradesChannels, group) + } + // Something is broken, we don't want to fallback to channels, this would be harmful. + return false, trace.Wrap(err, "failed to get auto-update rollout") + } + + return getTriggerFromRollout(rollout, group, updaterUUID) +} + +// getVersionFromRollout returns the version we should serve to the agent based +// on the RFD184 agent rollout, the agent group name, and its UUID. +// This logic is pretty complex and described in RFD 184. +// The spec is summed up in the following table: +// https://github.com/gravitational/teleport/blob/master/rfd/0184-agent-auto-updates.md#rollout-status-disabled +// Version returned follows semver without the leading "v". +func getVersionFromRollout( + rollout *autoupdatepb.AutoUpdateAgentRollout, + groupName, updaterUUID string, +) (string, error) { + switch rollout.GetSpec().GetAutoupdateMode() { + case autoupdate.AgentsUpdateModeDisabled: + // If AUs are disabled, we always answer the target version + return rollout.GetSpec().GetTargetVersion(), nil + case autoupdate.AgentsUpdateModeSuspended, autoupdate.AgentsUpdateModeEnabled: + // If AUs are enabled or suspended, we modulate the response based on the schedule and agent group state + default: + return "", trace.BadParameter("unsupported agent update mode %q", rollout.GetSpec().GetAutoupdateMode()) + } + + // If the schedule is immediate, agents always update to the latest version + if rollout.GetSpec().GetSchedule() == autoupdate.AgentsScheduleImmediate { + return rollout.GetSpec().GetTargetVersion(), nil + } + + // Else we follow the regular schedule and answer based on the agent group state + group, err := getGroup(rollout, groupName) + if err != nil { + return "", trace.Wrap(err, "getting group %q", groupName) + } + + switch group.GetState() { + case autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: + return rollout.GetSpec().GetStartVersion(), nil + case autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: + return rollout.GetSpec().GetTargetVersion(), nil + default: + return "", trace.NotImplemented("unsupported group state %q", group.GetState()) + } +} + +// getTriggerFromRollout returns the version we should serve to the agent based +// on the RFD184 agent rollout, the agent group name, and its UUID. +// This logic is pretty complex and described in RFD 184. +// The spec is summed up in the following table: +// https://github.com/gravitational/teleport/blob/master/rfd/0184-agent-auto-updates.md#rollout-status-disabled +func getTriggerFromRollout(rollout *autoupdatepb.AutoUpdateAgentRollout, groupName, updaterUUID string) (bool, error) { + // If the mode is "paused" or "disabled", we never tell to update + switch rollout.GetSpec().GetAutoupdateMode() { + case autoupdate.AgentsUpdateModeDisabled, autoupdate.AgentsUpdateModeSuspended: + // If AUs are disabled or suspended, never tell to update + return false, nil + case autoupdate.AgentsUpdateModeEnabled: + // If AUs are enabled, we modulate the response based on the schedule and agent group state + default: + return false, trace.BadParameter("unsupported agent update mode %q", rollout.GetSpec().GetAutoupdateMode()) + } + + // If the schedule is immediate, agents always update to the latest version + if rollout.GetSpec().GetSchedule() == autoupdate.AgentsScheduleImmediate { + return true, nil + } + + // Else we follow the regular schedule and answer based on the agent group state + group, err := getGroup(rollout, groupName) + if err != nil { + return false, trace.Wrap(err, "getting group %q", groupName) + } + + switch group.GetState() { + case autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED: + return false, nil + case autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: + return true, nil + case autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: + return rollout.GetSpec().GetStrategy() == autoupdate.AgentsStrategyHaltOnError, nil + default: + return false, trace.NotImplemented("Unsupported group state %q", group.GetState()) + } +} + +// getGroup returns the agent rollout group the requesting agent belongs to. +// If a group matches the agent-provided group name, this group is returned. +// Else the default group is returned. The default group currently is the last +// one. This might change in the future. +func getGroup( + rollout *autoupdatepb.AutoUpdateAgentRollout, + groupName string, +) (*autoupdatepb.AutoUpdateAgentRolloutStatusGroup, error) { + groups := rollout.GetStatus().GetGroups() + if len(groups) == 0 { + return nil, trace.BadParameter("no groups found") + } + + // Try to find a group with our name + for _, group := range groups { + if group.Name == groupName { + return group, nil + } + } + + // Fallback to the default group (currently the last one but this might change). + return groups[len(groups)-1], nil +} + +// getVersionFromChannel gets the target version from the RFD109 channels. +// Version returned follows semver without the leading "v". +func getVersionFromChannel(ctx context.Context, channels automaticupgrades.Channels, groupName string) (version string, err error) { + // RFD109 channels return the version with the 'v' prefix. + // We can't change the internals for backward compatibility, so we must trim the prefix if it's here. + defer func() { + version = strings.TrimPrefix(version, "v") + }() + + if channel, ok := channels[groupName]; ok { + return channel.GetVersion(ctx) + } + return channels.DefaultVersion(ctx) +} + +// getTriggerFromWindowThenChannel gets the target version from the RFD109 maintenance window and channels. +func (h *Handler) getTriggerFromWindowThenChannel(ctx context.Context, groupName string) (bool, error) { + // Caching the CMC for 10 seconds because this resource is cached neither by the auth nor the proxy. + // And this function can be accessed via unauthenticated endpoints. + cmc, err := utils.FnCacheGet[types.ClusterMaintenanceConfig](ctx, h.clusterMaintenanceConfigCache, "cmc", func(ctx context.Context) (types.ClusterMaintenanceConfig, error) { + return h.cfg.ProxyClient.GetClusterMaintenanceConfig(ctx) + }) + + // If we have a CMC, we check if the window is active, else we just check if the update is critical. + if err == nil && cmc.WithinUpgradeWindow(h.clock.Now()) { + return true, nil + } + + return getTriggerFromChannel(ctx, h.cfg.AutomaticUpgradesChannels, groupName) +} + +// getTriggerFromWindowThenChannel gets the target version from the RFD109 channels. +func getTriggerFromChannel(ctx context.Context, channels automaticupgrades.Channels, groupName string) (bool, error) { + if channel, ok := channels[groupName]; ok { + return channel.GetCritical(ctx) + } + defaultChannel, err := channels.DefaultChannel() + if err != nil { + return false, trace.Wrap(err, "creating new default channel") + } + return defaultChannel.GetCritical(ctx) +} diff --git a/lib/web/autoupdate_common_test.go b/lib/web/autoupdate_common_test.go new file mode 100644 index 0000000000000..e0a1a31719586 --- /dev/null +++ b/lib/web/autoupdate_common_test.go @@ -0,0 +1,799 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package web + +import ( + "context" + "fmt" + "net/http" + "net/http/httptest" + "strings" + "testing" + "time" + + "github.com/gravitational/trace" + "github.com/jonboulle/clockwork" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + "github.com/gravitational/teleport/api/types" + "github.com/gravitational/teleport/api/types/autoupdate" + "github.com/gravitational/teleport/lib/auth/authclient" + "github.com/gravitational/teleport/lib/automaticupgrades" + "github.com/gravitational/teleport/lib/automaticupgrades/constants" + "github.com/gravitational/teleport/lib/utils" +) + +const ( + testVersionHigh = "2.3.4" + testVersionLow = "2.0.4" +) + +// fakeRolloutAccessPoint allows us to mock the ProxyAccessPoint in autoupdate +// tests. +type fakeRolloutAccessPoint struct { + authclient.ProxyAccessPoint + + rollout *autoupdatepb.AutoUpdateAgentRollout + err error +} + +func (ap *fakeRolloutAccessPoint) GetAutoUpdateAgentRollout(_ context.Context) (*autoupdatepb.AutoUpdateAgentRollout, error) { + return ap.rollout, ap.err +} + +// fakeRolloutAccessPoint allows us to mock the proxy's auth client in autoupdate +// tests. +type fakeCMCAuthClient struct { + authclient.ClientI + + cmc types.ClusterMaintenanceConfig + err error +} + +func (c *fakeCMCAuthClient) GetClusterMaintenanceConfig(_ context.Context) (types.ClusterMaintenanceConfig, error) { + return c.cmc, c.err +} + +func TestAutoUpdateAgentVersion(t *testing.T) { + t.Parallel() + groupName := "test-group" + ctx := context.Background() + + // brokenChannelUpstream is a buggy upstream version server. + // This allows us to craft version channels returning errors. + brokenChannelUpstream := httptest.NewServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + })) + t.Cleanup(brokenChannelUpstream.Close) + + tests := []struct { + name string + rollout *autoupdatepb.AutoUpdateAgentRollout + rolloutErr error + channel *automaticupgrades.Channel + expectedVersion string + expectError require.ErrorAssertionFunc + }{ + { + name: "version is looked up from rollout if it is here", + rollout: &autoupdatepb.AutoUpdateAgentRollout{ + Spec: &autoupdatepb.AutoUpdateAgentRolloutSpec{ + AutoupdateMode: autoupdate.AgentsUpdateModeEnabled, + TargetVersion: testVersionHigh, + Schedule: autoupdate.AgentsScheduleImmediate, + }, + }, + channel: &automaticupgrades.Channel{StaticVersion: testVersionLow}, + expectError: require.NoError, + expectedVersion: testVersionHigh, + }, + { + name: "version is looked up from channel if rollout is not here", + rolloutErr: trace.NotFound("rollout is not here"), + channel: &automaticupgrades.Channel{StaticVersion: testVersionLow}, + expectError: require.NoError, + expectedVersion: testVersionLow, + }, + { + name: "hard error getting rollout should not fallback to version channels", + rolloutErr: trace.AccessDenied("something is very broken"), + channel: &automaticupgrades.Channel{ + StaticVersion: testVersionLow, + }, + expectError: require.Error, + }, + { + name: "no rollout, error checking channel", + rolloutErr: trace.NotFound("rollout is not here"), + channel: &automaticupgrades.Channel{ForwardURL: brokenChannelUpstream.URL}, + expectError: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test setup: building the channel, mock client, and handler with test config. + require.NoError(t, tt.channel.CheckAndSetDefaults()) + h := &Handler{ + cfg: Config{ + AccessPoint: &fakeRolloutAccessPoint{ + rollout: tt.rollout, + err: tt.rolloutErr, + }, + AutomaticUpgradesChannels: map[string]*automaticupgrades.Channel{ + groupName: tt.channel, + }, + }, + } + + // Test execution + result, err := h.autoUpdateAgentVersion(ctx, groupName, "") + tt.expectError(t, err) + require.Equal(t, tt.expectedVersion, result) + }) + } +} + +// TestAutoUpdateAgentShouldUpdate also accidentally tests getTriggerFromWindowThenChannel. +func TestAutoUpdateAgentShouldUpdate(t *testing.T) { + t.Parallel() + + groupName := "test-group" + ctx := context.Background() + + // brokenChannelUpstream is a buggy upstream version server. + // This allows us to craft version channels returning errors. + brokenChannelUpstream := httptest.NewServer( + http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusBadRequest) + })) + t.Cleanup(brokenChannelUpstream.Close) + + cacheClock := clockwork.NewFakeClock() + cmcCache, err := utils.NewFnCache(utils.FnCacheConfig{ + TTL: findEndpointCacheTTL, + Clock: cacheClock, + Context: ctx, + ReloadOnErr: false, + }) + require.NoError(t, err) + t.Cleanup(func() { + cmcCache.Shutdown(ctx) + }) + + // We don't use the cache clock because we are advancing it to invalidate the cmc cache and + // this would interfere with the test logic + clock := clockwork.NewFakeClock() + activeUpgradeWindow := types.AgentUpgradeWindow{UTCStartHour: uint32(clock.Now().Hour())} + inactiveUpgradeWindow := types.AgentUpgradeWindow{UTCStartHour: uint32(clock.Now().Add(2 * time.Hour).Hour())} + tests := []struct { + name string + rollout *autoupdatepb.AutoUpdateAgentRollout + rolloutErr error + channel *automaticupgrades.Channel + upgradeWindow types.AgentUpgradeWindow + cmcErr error + windowLookup bool + expectedTrigger bool + expectError require.ErrorAssertionFunc + }{ + { + name: "trigger is looked up from rollout if it is here, trigger firing", + rollout: &autoupdatepb.AutoUpdateAgentRollout{ + Spec: &autoupdatepb.AutoUpdateAgentRolloutSpec{ + AutoupdateMode: autoupdate.AgentsUpdateModeEnabled, + TargetVersion: testVersionHigh, + Schedule: autoupdate.AgentsScheduleImmediate, + }, + }, + channel: &automaticupgrades.Channel{StaticVersion: testVersionLow}, + expectError: require.NoError, + expectedTrigger: true, + }, + { + name: "trigger is looked up from rollout if it is here, trigger not firing", + rollout: &autoupdatepb.AutoUpdateAgentRollout{ + Spec: &autoupdatepb.AutoUpdateAgentRolloutSpec{ + AutoupdateMode: autoupdate.AgentsUpdateModeDisabled, + TargetVersion: testVersionHigh, + Schedule: autoupdate.AgentsScheduleImmediate, + }, + }, + channel: &automaticupgrades.Channel{StaticVersion: testVersionLow}, + expectError: require.NoError, + expectedTrigger: false, + }, + { + name: "trigger is looked up from channel if rollout is not here and window lookup is disabled, trigger not firing", + rolloutErr: trace.NotFound("rollout is not here"), + channel: &automaticupgrades.Channel{ + StaticVersion: testVersionLow, + Critical: false, + }, + expectError: require.NoError, + expectedTrigger: false, + }, + { + name: "trigger is looked up from channel if rollout is not here and window lookup is disabled, trigger firing", + rolloutErr: trace.NotFound("rollout is not here"), + channel: &automaticupgrades.Channel{ + StaticVersion: testVersionLow, + Critical: true, + }, + expectError: require.NoError, + expectedTrigger: true, + }, + { + name: "trigger is looked up from cmc, then channel if rollout is not here and window lookup is enabled, cmc firing", + rolloutErr: trace.NotFound("rollout is not here"), + channel: &automaticupgrades.Channel{ + StaticVersion: testVersionLow, + Critical: false, + }, + upgradeWindow: activeUpgradeWindow, + windowLookup: true, + expectError: require.NoError, + expectedTrigger: true, + }, + { + name: "trigger is looked up from cmc, then channel if rollout is not here and window lookup is enabled, cmc not firing", + rolloutErr: trace.NotFound("rollout is not here"), + channel: &automaticupgrades.Channel{ + StaticVersion: testVersionLow, + Critical: false, + }, + upgradeWindow: inactiveUpgradeWindow, + windowLookup: true, + expectError: require.NoError, + expectedTrigger: false, + }, + { + name: "trigger is looked up from cmc, then channel if rollout is not here and window lookup is enabled, cmc not firing but channel firing", + rolloutErr: trace.NotFound("rollout is not here"), + channel: &automaticupgrades.Channel{ + StaticVersion: testVersionLow, + Critical: true, + }, + upgradeWindow: inactiveUpgradeWindow, + windowLookup: true, + expectError: require.NoError, + expectedTrigger: true, + }, + { + name: "trigger is looked up from cmc, then channel if rollout is not here and window lookup is enabled, no cmc and channel not firing", + rolloutErr: trace.NotFound("rollout is not here"), + channel: &automaticupgrades.Channel{ + StaticVersion: testVersionLow, + Critical: false, + }, + cmcErr: trace.NotFound("no cmc for this cluster"), + windowLookup: true, + expectError: require.NoError, + expectedTrigger: false, + }, + { + name: "trigger is looked up from cmc, then channel if rollout is not here and window lookup is enabled, no cmc and channel firing", + rolloutErr: trace.NotFound("rollout is not here"), + channel: &automaticupgrades.Channel{ + StaticVersion: testVersionLow, + Critical: true, + }, + cmcErr: trace.NotFound("no cmc for this cluster"), + windowLookup: true, + expectError: require.NoError, + expectedTrigger: true, + }, + { + name: "hard error getting rollout should not fallback to RFD109 trigger", + rolloutErr: trace.AccessDenied("something is very broken"), + channel: &automaticupgrades.Channel{ + StaticVersion: testVersionLow, + }, + expectError: require.Error, + }, + { + name: "no rollout, error checking channel", + rolloutErr: trace.NotFound("rollout is not here"), + channel: &automaticupgrades.Channel{ + ForwardURL: brokenChannelUpstream.URL, + }, + expectError: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test setup: building the channel, mock clients, and handler with test config. + cmc := types.NewClusterMaintenanceConfig() + cmc.SetAgentUpgradeWindow(tt.upgradeWindow) + require.NoError(t, tt.channel.CheckAndSetDefaults()) + // Advance cache clock to expire cached cmc + cacheClock.Advance(2 * findEndpointCacheTTL) + h := &Handler{ + cfg: Config{ + AccessPoint: &fakeRolloutAccessPoint{ + rollout: tt.rollout, + err: tt.rolloutErr, + }, + ProxyClient: &fakeCMCAuthClient{ + cmc: cmc, + err: tt.cmcErr, + }, + AutomaticUpgradesChannels: map[string]*automaticupgrades.Channel{ + groupName: tt.channel, + }, + }, + clock: clock, + clusterMaintenanceConfigCache: cmcCache, + } + + // Test execution + result, err := h.autoUpdateAgentShouldUpdate(ctx, groupName, "", tt.windowLookup) + tt.expectError(t, err) + require.Equal(t, tt.expectedTrigger, result) + }) + } +} + +func TestGetVersionFromRollout(t *testing.T) { + t.Parallel() + groupName := "test-group" + + // This test matrix is written based on: + // https://github.com/gravitational/teleport/blob/master/rfd/0184-agent-auto-updates.md#rollout-status-disabled + latestAllTheTime := map[autoupdatepb.AutoUpdateAgentGroupState]string{ + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED: testVersionHigh, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: testVersionHigh, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE: testVersionHigh, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: testVersionHigh, + } + + activeDoneOnly := map[autoupdatepb.AutoUpdateAgentGroupState]string{ + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED: testVersionLow, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: testVersionHigh, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE: testVersionHigh, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: testVersionLow, + } + + tests := map[string]map[string]map[autoupdatepb.AutoUpdateAgentGroupState]string{ + autoupdate.AgentsUpdateModeDisabled: { + autoupdate.AgentsScheduleImmediate: latestAllTheTime, + autoupdate.AgentsScheduleRegular: latestAllTheTime, + }, + autoupdate.AgentsUpdateModeSuspended: { + autoupdate.AgentsScheduleImmediate: latestAllTheTime, + autoupdate.AgentsScheduleRegular: activeDoneOnly, + }, + autoupdate.AgentsUpdateModeEnabled: { + autoupdate.AgentsScheduleImmediate: latestAllTheTime, + autoupdate.AgentsScheduleRegular: activeDoneOnly, + }, + } + for mode, scheduleCases := range tests { + for schedule, stateCases := range scheduleCases { + for state, expectedVersion := range stateCases { + t.Run(fmt.Sprintf("%s/%s/%s", mode, schedule, state), func(t *testing.T) { + rollout := &autoupdatepb.AutoUpdateAgentRollout{ + Spec: &autoupdatepb.AutoUpdateAgentRolloutSpec{ + StartVersion: testVersionLow, + TargetVersion: testVersionHigh, + Schedule: schedule, + AutoupdateMode: mode, + // Strategy does not affect which version are served + Strategy: autoupdate.AgentsStrategyTimeBased, + }, + Status: &autoupdatepb.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdatepb.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: state, + }, + }, + }, + } + version, err := getVersionFromRollout(rollout, groupName, "") + require.NoError(t, err) + require.Equal(t, expectedVersion, version) + }) + } + } + } +} + +func TestGetTriggerFromRollout(t *testing.T) { + t.Parallel() + groupName := "test-group" + + // This test matrix is written based on: + // https://github.com/gravitational/teleport/blob/master/rfd/0184-agent-auto-updates.md#rollout-status-disabled + neverUpdate := map[autoupdatepb.AutoUpdateAgentGroupState]bool{ + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED: false, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: false, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE: false, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: false, + } + alwaysUpdate := map[autoupdatepb.AutoUpdateAgentGroupState]bool{ + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED: true, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: true, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE: true, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: true, + } + + tests := map[string]map[string]map[string]map[autoupdatepb.AutoUpdateAgentGroupState]bool{ + autoupdate.AgentsUpdateModeDisabled: { + autoupdate.AgentsStrategyTimeBased: { + autoupdate.AgentsScheduleImmediate: neverUpdate, + autoupdate.AgentsScheduleRegular: neverUpdate, + }, + autoupdate.AgentsStrategyHaltOnError: { + autoupdate.AgentsScheduleImmediate: neverUpdate, + autoupdate.AgentsScheduleRegular: neverUpdate, + }, + }, + autoupdate.AgentsUpdateModeSuspended: { + autoupdate.AgentsStrategyTimeBased: { + autoupdate.AgentsScheduleImmediate: neverUpdate, + autoupdate.AgentsScheduleRegular: neverUpdate, + }, + autoupdate.AgentsStrategyHaltOnError: { + autoupdate.AgentsScheduleImmediate: neverUpdate, + autoupdate.AgentsScheduleRegular: neverUpdate, + }, + }, + autoupdate.AgentsUpdateModeEnabled: { + autoupdate.AgentsStrategyTimeBased: { + autoupdate.AgentsScheduleImmediate: alwaysUpdate, + autoupdate.AgentsScheduleRegular: { + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED: false, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: false, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE: true, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: true, + }, + }, + autoupdate.AgentsStrategyHaltOnError: { + autoupdate.AgentsScheduleImmediate: alwaysUpdate, + autoupdate.AgentsScheduleRegular: { + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED: false, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE: true, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE: true, + autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ROLLEDBACK: true, + }, + }, + }, + } + for mode, strategyCases := range tests { + for strategy, scheduleCases := range strategyCases { + for schedule, stateCases := range scheduleCases { + for state, expectedTrigger := range stateCases { + t.Run(fmt.Sprintf("%s/%s/%s/%s", mode, strategy, schedule, state), func(t *testing.T) { + rollout := &autoupdatepb.AutoUpdateAgentRollout{ + Spec: &autoupdatepb.AutoUpdateAgentRolloutSpec{ + StartVersion: testVersionLow, + TargetVersion: testVersionHigh, + Schedule: schedule, + AutoupdateMode: mode, + Strategy: strategy, + }, + Status: &autoupdatepb.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdatepb.AutoUpdateAgentRolloutStatusGroup{ + { + Name: groupName, + State: state, + }, + }, + }, + } + shouldUpdate, err := getTriggerFromRollout(rollout, groupName, "") + require.NoError(t, err) + require.Equal(t, expectedTrigger, shouldUpdate) + }) + } + } + } + } +} + +func TestGetGroup(t *testing.T) { + groupName := "test-group" + t.Parallel() + tests := []struct { + name string + rollout *autoupdatepb.AutoUpdateAgentRollout + expectedResult *autoupdatepb.AutoUpdateAgentRolloutStatusGroup + expectError require.ErrorAssertionFunc + }{ + { + name: "nil", + expectError: require.Error, + }, + { + name: "nil status", + rollout: &autoupdatepb.AutoUpdateAgentRollout{}, + expectError: require.Error, + }, + { + name: "nil status groups", + rollout: &autoupdatepb.AutoUpdateAgentRollout{Status: &autoupdatepb.AutoUpdateAgentRolloutStatus{}}, + expectError: require.Error, + }, + { + name: "empty status groups", + rollout: &autoupdatepb.AutoUpdateAgentRollout{ + Status: &autoupdatepb.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdatepb.AutoUpdateAgentRolloutStatusGroup{}, + }, + }, + expectError: require.Error, + }, + { + name: "group matching name", + rollout: &autoupdatepb.AutoUpdateAgentRollout{ + Status: &autoupdatepb.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdatepb.AutoUpdateAgentRolloutStatusGroup{ + {Name: "foo", State: 1}, + {Name: "bar", State: 1}, + {Name: groupName, State: 2}, + {Name: "baz", State: 1}, + }, + }, + }, + expectedResult: &autoupdatepb.AutoUpdateAgentRolloutStatusGroup{ + Name: groupName, + State: 2, + }, + expectError: require.NoError, + }, + { + name: "no group matching name, should fallback to default", + rollout: &autoupdatepb.AutoUpdateAgentRollout{ + Status: &autoupdatepb.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdatepb.AutoUpdateAgentRolloutStatusGroup{ + {Name: "foo", State: 1}, + {Name: "bar", State: 1}, + {Name: "baz", State: 1}, + }, + }, + }, + expectedResult: &autoupdatepb.AutoUpdateAgentRolloutStatusGroup{ + Name: "baz", + State: 1, + }, + expectError: require.NoError, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + result, err := getGroup(tt.rollout, groupName) + tt.expectError(t, err) + require.Equal(t, tt.expectedResult, result) + }) + } +} + +type mockRFD109VersionServer struct { + t *testing.T + channels map[string]channelStub +} + +type channelStub struct { + // with our without the leading "v" + version string + critical bool + fail bool +} + +func (m *mockRFD109VersionServer) ServeHTTP(w http.ResponseWriter, r *http.Request) { + var path string + var writeResp func(w http.ResponseWriter, stub channelStub) error + + switch { + case strings.HasSuffix(r.URL.Path, constants.VersionPath): + path = strings.Trim(strings.TrimSuffix(r.URL.Path, constants.VersionPath), "/") + writeResp = func(w http.ResponseWriter, stub channelStub) error { + _, err := w.Write([]byte(stub.version)) + return err + } + case strings.HasSuffix(r.URL.Path, constants.MaintenancePath): + path = strings.Trim(strings.TrimSuffix(r.URL.Path, constants.MaintenancePath), "/") + writeResp = func(w http.ResponseWriter, stub channelStub) error { + response := "no" + if stub.critical { + response = "yes" + } + _, err := w.Write([]byte(response)) + return err + } + default: + assert.Fail(m.t, "unsupported path %q", r.URL.Path) + w.WriteHeader(http.StatusNotFound) + return + } + + channel, ok := m.channels[path] + if !ok { + w.WriteHeader(http.StatusNotFound) + assert.Fail(m.t, "channel %q not found", path) + return + } + if channel.fail { + w.WriteHeader(http.StatusInternalServerError) + return + } + assert.NoError(m.t, writeResp(w, channel), "failed to write response") +} + +func TestGetVersionFromChannel(t *testing.T) { + t.Parallel() + ctx := context.Background() + + channelName := "test-channel" + + mock := mockRFD109VersionServer{ + t: t, + channels: map[string]channelStub{ + "broken": {fail: true}, + "with-leading-v": {version: "v" + testVersionHigh}, + "without-leading-v": {version: testVersionHigh}, + "low": {version: testVersionLow}, + }, + } + srv := httptest.NewServer(http.HandlerFunc(mock.ServeHTTP)) + t.Cleanup(srv.Close) + + tests := []struct { + name string + channels automaticupgrades.Channels + expectedResult string + expectError require.ErrorAssertionFunc + }{ + { + name: "channel with leading v", + channels: automaticupgrades.Channels{ + channelName: {ForwardURL: srv.URL + "/with-leading-v"}, + "default": {ForwardURL: srv.URL + "/low"}, + }, + expectedResult: testVersionHigh, + expectError: require.NoError, + }, + { + name: "channel without leading v", + channels: automaticupgrades.Channels{ + channelName: {ForwardURL: srv.URL + "/without-leading-v"}, + "default": {ForwardURL: srv.URL + "/low"}, + }, + expectedResult: testVersionHigh, + expectError: require.NoError, + }, + { + name: "fallback to default with leading v", + channels: automaticupgrades.Channels{ + "default": {ForwardURL: srv.URL + "/with-leading-v"}, + }, + expectedResult: testVersionHigh, + expectError: require.NoError, + }, + { + name: "fallback to default without leading v", + channels: automaticupgrades.Channels{ + "default": {ForwardURL: srv.URL + "/without-leading-v"}, + }, + expectedResult: testVersionHigh, + expectError: require.NoError, + }, + { + name: "broken channel", + channels: automaticupgrades.Channels{ + channelName: {ForwardURL: srv.URL + "/broken"}, + "default": {ForwardURL: srv.URL + "/without-leading-v"}, + }, + expectError: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test setup + require.NoError(t, tt.channels.CheckAndSetDefaults()) + + // Test execution + result, err := getVersionFromChannel(ctx, tt.channels, channelName) + tt.expectError(t, err) + require.Equal(t, tt.expectedResult, result) + }) + } +} + +func TestGetTriggerFromChannel(t *testing.T) { + t.Parallel() + ctx := context.Background() + + channelName := "test-channel" + + mock := mockRFD109VersionServer{ + t: t, + channels: map[string]channelStub{ + "broken": {fail: true}, + "critical": {critical: true}, + "non-critical": {critical: false}, + }, + } + srv := httptest.NewServer(http.HandlerFunc(mock.ServeHTTP)) + t.Cleanup(srv.Close) + + tests := []struct { + name string + channels automaticupgrades.Channels + expectedResult bool + expectError require.ErrorAssertionFunc + }{ + { + name: "critical channel", + channels: automaticupgrades.Channels{ + channelName: {ForwardURL: srv.URL + "/critical"}, + "default": {ForwardURL: srv.URL + "/non-critical"}, + }, + expectedResult: true, + expectError: require.NoError, + }, + { + name: "non-critical channel", + channels: automaticupgrades.Channels{ + channelName: {ForwardURL: srv.URL + "/non-critical"}, + "default": {ForwardURL: srv.URL + "/critical"}, + }, + expectedResult: false, + expectError: require.NoError, + }, + { + name: "fallback to default which is critical", + channels: automaticupgrades.Channels{ + "default": {ForwardURL: srv.URL + "/critical"}, + }, + expectedResult: true, + expectError: require.NoError, + }, + { + name: "fallback to default which is non-critical", + channels: automaticupgrades.Channels{ + "default": {ForwardURL: srv.URL + "/non-critical"}, + }, + expectedResult: false, + expectError: require.NoError, + }, + { + name: "broken channel", + channels: automaticupgrades.Channels{ + channelName: {ForwardURL: srv.URL + "/broken"}, + "default": {ForwardURL: srv.URL + "/critical"}, + }, + expectError: require.Error, + }, + } + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test setup + require.NoError(t, tt.channels.CheckAndSetDefaults()) + + // Test execution + result, err := getTriggerFromChannel(ctx, tt.channels, channelName) + tt.expectError(t, err) + require.Equal(t, tt.expectedResult, result) + }) + } +} diff --git a/lib/web/automaticupgrades.go b/lib/web/autoupdate_rfd109.go similarity index 70% rename from lib/web/automaticupgrades.go rename to lib/web/autoupdate_rfd109.go index 6b7833dc629e2..d2dd43fdb6f3f 100644 --- a/lib/web/automaticupgrades.go +++ b/lib/web/autoupdate_rfd109.go @@ -1,6 +1,6 @@ /* * Teleport - * Copyright (C) 2023 Gravitational, Inc. + * Copyright (C) 2024 Gravitational, Inc. * * This program is free software: you can redistribute it and/or modify * it under the terms of the GNU Affero General Public License as published by @@ -21,6 +21,7 @@ package web import ( "context" "errors" + "fmt" "net/http" "strings" "time" @@ -28,17 +29,16 @@ import ( "github.com/gravitational/trace" "github.com/julienschmidt/httprouter" - "github.com/gravitational/teleport/lib/automaticupgrades" "github.com/gravitational/teleport/lib/automaticupgrades/constants" "github.com/gravitational/teleport/lib/automaticupgrades/version" ) const defaultChannelTimeout = 5 * time.Second -// automaticUpgrades implements a version server in the Teleport Proxy. +// automaticUpgrades109 implements a version server in the Teleport Proxy following the RFD 109 spec. // It is configured through the Teleport Proxy configuration and tells agent updaters // which version they should install. -func (h *Handler) automaticUpgrades(w http.ResponseWriter, r *http.Request, p httprouter.Params) (interface{}, error) { +func (h *Handler) automaticUpgrades109(w http.ResponseWriter, r *http.Request, p httprouter.Params) (interface{}, error) { if h.cfg.AutomaticUpgradesChannels == nil { return nil, trace.AccessDenied("This proxy is not configured to serve automatic upgrades channels.") } @@ -59,31 +59,25 @@ func (h *Handler) automaticUpgrades(w http.ResponseWriter, r *http.Request, p ht return nil, trace.BadParameter("a channel name is required") } - // We check if the channel is configured - channel, ok := h.cfg.AutomaticUpgradesChannels[channelName] - if !ok { - return nil, trace.NotFound("channel %s not found", channelName) - } - // Finally, we treat the request based on its type switch requestType { case "version": h.log.Debugf("Agent requesting version for channel %s", channelName) - return h.automaticUpgradesVersion(w, r, channel) + return h.automaticUpgradesVersion109(w, r, channelName) case "critical": h.log.Debugf("Agent requesting criticality for channel %s", channelName) - return h.automaticUpgradesCritical(w, r, channel) + return h.automaticUpgradesCritical109(w, r, channelName) default: return nil, trace.BadParameter("requestType path must end with 'version' or 'critical'") } } -// automaticUpgradesVersion handles version requests from upgraders -func (h *Handler) automaticUpgradesVersion(w http.ResponseWriter, r *http.Request, channel *automaticupgrades.Channel) (interface{}, error) { +// automaticUpgradesVersion109 handles version requests from upgraders +func (h *Handler) automaticUpgradesVersion109(w http.ResponseWriter, r *http.Request, channelName string) (interface{}, error) { ctx, cancel := context.WithTimeout(r.Context(), defaultChannelTimeout) defer cancel() - targetVersion, err := channel.GetVersion(ctx) + targetVersion, err := h.autoUpdateAgentVersion(ctx, channelName, "" /* updater UUID */) if err != nil { // If the error is that the upstream channel has no version // We gracefully handle by serving "none" @@ -96,16 +90,20 @@ func (h *Handler) automaticUpgradesVersion(w http.ResponseWriter, r *http.Reques return nil, trace.Wrap(err) } - _, err = w.Write([]byte(targetVersion)) + // RFD 109 specifies that version from channels must have the leading "v". + // As h.autoUpdateAgentVersion doesn't, we must add it. + _, err = fmt.Fprintf(w, "v%s", targetVersion) return nil, trace.Wrap(err) } -// automaticUpgradesCritical handles criticality requests from upgraders -func (h *Handler) automaticUpgradesCritical(w http.ResponseWriter, r *http.Request, channel *automaticupgrades.Channel) (interface{}, error) { +// automaticUpgradesCritical109 handles criticality requests from upgraders +func (h *Handler) automaticUpgradesCritical109(w http.ResponseWriter, r *http.Request, channelName string) (interface{}, error) { ctx, cancel := context.WithTimeout(r.Context(), defaultChannelTimeout) defer cancel() - critical, err := channel.GetCritical(ctx) + // RFD109 agents already retrieve maintenance windows from the CMC, no need to + // do a maintenance window lookup for them. + critical, err := h.autoUpdateAgentShouldUpdate(ctx, channelName, "" /* updater UUID */, false /* window lookup */) if err != nil { return nil, trace.Wrap(err) } diff --git a/lib/web/autoupdate_rfd184.go b/lib/web/autoupdate_rfd184.go new file mode 100644 index 0000000000000..6ac532650cb64 --- /dev/null +++ b/lib/web/autoupdate_rfd184.go @@ -0,0 +1,93 @@ +/* + * Teleport + * Copyright (C) 2024 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package web + +import ( + "context" + + "github.com/gravitational/trace" + + "github.com/gravitational/teleport" + "github.com/gravitational/teleport/api" + "github.com/gravitational/teleport/api/client/webclient" + autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + "github.com/gravitational/teleport/api/types/autoupdate" +) + +// automaticUpdateSettings184 crafts the automatic updates part of the ping/find response +// as described in RFD-184 (agents) and RFD-144 (tools). +func (h *Handler) automaticUpdateSettings184(ctx context.Context, group, updaterUUID string) webclient.AutoUpdateSettings { + // Tools auto updates section. + autoUpdateConfig, err := h.cfg.AccessPoint.GetAutoUpdateConfig(ctx) + // TODO(vapopov) DELETE IN v18.0.0 check of IsNotImplemented, must be backported to all latest supported versions. + if err != nil && !trace.IsNotFound(err) && !trace.IsNotImplemented(err) { + h.logger.ErrorContext(ctx, "failed to receive AutoUpdateConfig", "error", err) + } + + autoUpdateVersion, err := h.cfg.AccessPoint.GetAutoUpdateVersion(ctx) + // TODO(vapopov) DELETE IN v18.0.0 check of IsNotImplemented, must be backported to all latest supported versions. + if err != nil && !trace.IsNotFound(err) && !trace.IsNotImplemented(err) { + h.logger.ErrorContext(ctx, "failed to receive AutoUpdateVersion", "error", err) + } + + // Agent auto updates section. + agentVersion, err := h.autoUpdateAgentVersion(ctx, group, updaterUUID) + if err != nil { + h.logger.ErrorContext(ctx, "failed to resolve AgentVersion", "error", err) + // Defaulting to current version + agentVersion = teleport.Version + } + // If the source of truth is RFD 109 configuration (channels + CMC) we must emulate the + // RFD109 agent maintenance window behavior by looking up the CMC and checking if + // we are in a maintenance window. + shouldUpdate, err := h.autoUpdateAgentShouldUpdate(ctx, group, updaterUUID, true /* window lookup */) + if err != nil { + h.logger.ErrorContext(ctx, "failed to resolve AgentAutoUpdate", "error", err) + // Failing open + shouldUpdate = false + } + + return webclient.AutoUpdateSettings{ + ToolsAutoUpdate: getToolsAutoUpdate(autoUpdateConfig), + ToolsVersion: getToolsVersion(autoUpdateVersion), + AgentUpdateJitterSeconds: DefaultAgentUpdateJitterSeconds, + AgentVersion: agentVersion, + AgentAutoUpdate: shouldUpdate, + } +} + +func getToolsAutoUpdate(config *autoupdatepb.AutoUpdateConfig) bool { + // If we can't get the AU config or if AUs are not configured, we default to "disabled". + // This ensures we fail open and don't accidentally update agents if something is going wrong. + // If we want to enable AUs by default, it would be better to create a default "autoupdate_config" resource + // than changing this logic. + if config.GetSpec().GetTools() != nil { + return config.GetSpec().GetTools().GetMode() == autoupdate.ToolsUpdateModeEnabled + } + return false +} + +func getToolsVersion(version *autoupdatepb.AutoUpdateVersion) string { + // If we can't get the AU version or tools AU version is not specified, we default to the current proxy version. + // This ensures we always advertise a version compatible with the cluster. + if version.GetSpec().GetTools() == nil { + return api.Version + } + return version.GetSpec().GetTools().GetTargetVersion() +} diff --git a/lib/web/integrations_awsoidc.go b/lib/web/integrations_awsoidc.go index f9a228b65902e..70c5baeed19ea 100644 --- a/lib/web/integrations_awsoidc.go +++ b/lib/web/integrations_awsoidc.go @@ -158,13 +158,16 @@ func (h *Handler) awsOIDCDeployService(w http.ResponseWriter, r *http.Request, p teleportVersionTag := teleport.Version if automaticUpgrades(h.GetClusterFeatures()) { - cloudStableVersion, err := h.cfg.AutomaticUpgradesChannels.DefaultVersion(ctx) + const group, updaterUUID = "", "" + autoUpdateVersion, err := h.autoUpdateAgentVersion(r.Context(), group, updaterUUID) if err != nil { - return "", trace.Wrap(err) + h.logger.WarnContext(r.Context(), + "Cannot read autoupdate target version, falling back to our own version", + "error", err, + "version", teleport.Version) + } else { + teleportVersionTag = autoUpdateVersion } - - // cloudStableVersion has vX.Y.Z format, however the container image tag does not include the `v`. - teleportVersionTag = strings.TrimPrefix(cloudStableVersion, "v") } deployServiceResp, err := clt.IntegrationAWSOIDCClient().DeployService(ctx, &integrationv1.DeployServiceRequest{ @@ -211,13 +214,17 @@ func (h *Handler) awsOIDCDeployDatabaseServices(w http.ResponseWriter, r *http.R teleportVersionTag := teleport.Version if automaticUpgrades(h.GetClusterFeatures()) { - cloudStableVersion, err := h.cfg.AutomaticUpgradesChannels.DefaultVersion(ctx) + const group, updaterUUID = "", "" + autoUpdateVersion, err := h.autoUpdateAgentVersion(r.Context(), group, updaterUUID) if err != nil { - return "", trace.Wrap(err) + h.logger.WarnContext(r.Context(), + "Cannot read autoupdate target version, falling back to self version.", + "error", err, + "version", teleport.Version) + } else { + teleportVersionTag = autoUpdateVersion } - // cloudStableVersion has vX.Y.Z format, however the container image tag does not include the `v`. - teleportVersionTag = strings.TrimPrefix(cloudStableVersion, "v") } iamTokenName := deployserviceconfig.DefaultTeleportIAMTokenName @@ -593,7 +600,7 @@ func (h *Handler) awsOIDCConfigureDeployServiceIAM(w http.ResponseWriter, r *htt fmt.Sprintf("--aws-account-id=%s", shsprintf.EscapeDefaultContext(awsAccountID)), } script, err := oneoff.BuildScript(oneoff.OneOffScriptParams{ - TeleportArgs: strings.Join(argsList, " "), + EntrypointArgs: strings.Join(argsList, " "), SuccessMessage: "Success! You can now go back to the Teleport Web UI to complete the database enrollment.", }) if err != nil { @@ -635,7 +642,7 @@ func (h *Handler) awsOIDCConfigureEICEIAM(w http.ResponseWriter, r *http.Request fmt.Sprintf("--aws-account-id=%s", shsprintf.EscapeDefaultContext(awsAccountID)), } script, err := oneoff.BuildScript(oneoff.OneOffScriptParams{ - TeleportArgs: strings.Join(argsList, " "), + EntrypointArgs: strings.Join(argsList, " "), SuccessMessage: "Success! You can now go back to the Teleport Web UI to complete the EC2 enrollment.", }) if err != nil { @@ -668,7 +675,7 @@ func (h *Handler) awsOIDCConfigureAWSAppAccessIAM(w http.ResponseWriter, r *http fmt.Sprintf("--role=%s", shsprintf.EscapeDefaultContext(role)), } script, err := oneoff.BuildScript(oneoff.OneOffScriptParams{ - TeleportArgs: strings.Join(argsList, " "), + EntrypointArgs: strings.Join(argsList, " "), SuccessMessage: "Success! You can now go back to the Teleport Web UI to use AWS App Access.", }) if err != nil { @@ -739,7 +746,7 @@ func (h *Handler) awsOIDCConfigureEC2SSMIAM(w http.ResponseWriter, r *http.Reque fmt.Sprintf("--aws-account-id=%s", shsprintf.EscapeDefaultContext(awsAccountID)), } script, err := oneoff.BuildScript(oneoff.OneOffScriptParams{ - TeleportArgs: strings.Join(argsList, " "), + EntrypointArgs: strings.Join(argsList, " "), SuccessMessage: "Success! You can now go back to the Teleport Web UI to finish the EC2 auto discover set up.", }) if err != nil { @@ -780,7 +787,7 @@ func (h *Handler) awsOIDCConfigureEKSIAM(w http.ResponseWriter, r *http.Request, fmt.Sprintf("--aws-account-id=%s", shsprintf.EscapeDefaultContext(awsAccountID)), } script, err := oneoff.BuildScript(oneoff.OneOffScriptParams{ - TeleportArgs: strings.Join(argsList, " "), + EntrypointArgs: strings.Join(argsList, " "), SuccessMessage: "Success! You can now go back to the Teleport Web UI to complete the EKS enrollment.", }) if err != nil { @@ -813,7 +820,8 @@ func (h *Handler) awsOIDCEnrollEKSClusters(w http.ResponseWriter, r *http.Reques return nil, trace.BadParameter("an integration name is required") } - agentVersion, err := kubeutils.GetKubeAgentVersion(ctx, h.cfg.ProxyClient, h.GetClusterFeatures(), h.cfg.AutomaticUpgradesChannels) + versionGetter := &handlerVersionGetter{h} + agentVersion, err := kubeutils.GetKubeAgentVersion(ctx, h.cfg.ProxyClient, h.GetClusterFeatures(), versionGetter) if err != nil { return nil, trace.Wrap(err) } @@ -1447,7 +1455,7 @@ func (h *Handler) awsOIDCConfigureIdP(w http.ResponseWriter, r *http.Request, p } script, err := oneoff.BuildScript(oneoff.OneOffScriptParams{ - TeleportArgs: strings.Join(argsList, " "), + EntrypointArgs: strings.Join(argsList, " "), SuccessMessage: "Success! You can now go back to the Teleport Web UI to use the integration with AWS.", }) if err != nil { @@ -1488,7 +1496,7 @@ func (h *Handler) awsOIDCConfigureListDatabasesIAM(w http.ResponseWriter, r *htt fmt.Sprintf("--aws-account-id=%s", shsprintf.EscapeDefaultContext(awsAccountID)), } script, err := oneoff.BuildScript(oneoff.OneOffScriptParams{ - TeleportArgs: strings.Join(argsList, " "), + EntrypointArgs: strings.Join(argsList, " "), SuccessMessage: "Success! You can now go back to the Teleport Web UI to complete the Database enrollment.", }) if err != nil { @@ -1534,7 +1542,7 @@ func (h *Handler) awsAccessGraphOIDCSync(w http.ResponseWriter, r *http.Request, fmt.Sprintf("--aws-account-id=%s", shsprintf.EscapeDefaultContext(awsAccountID)), } script, err := oneoff.BuildScript(oneoff.OneOffScriptParams{ - TeleportArgs: strings.Join(argsList, " "), + EntrypointArgs: strings.Join(argsList, " "), SuccessMessage: "Success! You can now go back to the Teleport Web UI to complete the Access Graph AWS Sync enrollment.", }) if err != nil { diff --git a/lib/web/integrations_awsoidc_test.go b/lib/web/integrations_awsoidc_test.go index 296e99b7aa94e..3184c6458f5cc 100644 --- a/lib/web/integrations_awsoidc_test.go +++ b/lib/web/integrations_awsoidc_test.go @@ -173,7 +173,7 @@ func TestBuildDeployServiceConfigureIAMScript(t *testing.T) { } require.Contains(t, string(resp.Bytes()), - fmt.Sprintf("teleportArgs='%s'\n", tc.expectedTeleportArgs), + fmt.Sprintf("entrypointArgs='%s'\n", tc.expectedTeleportArgs), ) }) } @@ -277,7 +277,7 @@ func TestBuildEICEConfigureIAMScript(t *testing.T) { } require.Contains(t, string(resp.Bytes()), - fmt.Sprintf("teleportArgs='%s'\n", tc.expectedTeleportArgs), + fmt.Sprintf("entrypointArgs='%s'\n", tc.expectedTeleportArgs), ) }) } @@ -408,7 +408,7 @@ func TestBuildEC2SSMIAMScript(t *testing.T) { } require.Contains(t, string(resp.Bytes()), - fmt.Sprintf("teleportArgs='%s'\n", tc.expectedTeleportArgs), + fmt.Sprintf("entrypointArgs='%s'\n", tc.expectedTeleportArgs), ) }) } @@ -483,7 +483,7 @@ func TestBuildAWSAppAccessConfigureIAMScript(t *testing.T) { } require.Contains(t, string(resp.Bytes()), - fmt.Sprintf("teleportArgs='%s'\n", tc.expectedTeleportArgs), + fmt.Sprintf("entrypointArgs='%s'\n", tc.expectedTeleportArgs), ) }) } @@ -586,7 +586,7 @@ func TestBuildEKSConfigureIAMScript(t *testing.T) { } require.Contains(t, string(resp.Bytes()), - fmt.Sprintf("teleportArgs='%s'\n", tc.expectedTeleportArgs), + fmt.Sprintf("entrypointArgs='%s'\n", tc.expectedTeleportArgs), ) }) } @@ -718,7 +718,7 @@ func TestBuildAWSOIDCIdPConfigureScript(t *testing.T) { } require.Contains(t, string(resp.Bytes()), - fmt.Sprintf("teleportArgs='%s'\n", tc.expectedTeleportArgs), + fmt.Sprintf("entrypointArgs='%s'\n", tc.expectedTeleportArgs), ) }) } @@ -821,7 +821,7 @@ func TestBuildListDatabasesConfigureIAMScript(t *testing.T) { } require.Contains(t, string(resp.Bytes()), - fmt.Sprintf("teleportArgs='%s'\n", tc.expectedTeleportArgs), + fmt.Sprintf("entrypointArgs='%s'\n", tc.expectedTeleportArgs), ) }) } diff --git a/lib/web/integrations_azureoidc.go b/lib/web/integrations_azureoidc.go index 0ce8d624a79f1..3a1dd654550e5 100644 --- a/lib/web/integrations_azureoidc.go +++ b/lib/web/integrations_azureoidc.go @@ -66,7 +66,7 @@ func (h *Handler) azureOIDCConfigure(w http.ResponseWriter, r *http.Request, p h } script, err := oneoff.BuildScript(oneoff.OneOffScriptParams{ - TeleportArgs: strings.Join(argsList, " "), + EntrypointArgs: strings.Join(argsList, " "), SuccessMessage: "Success! You can now go back to the Teleport Web UI to use the integration with Azure.", }) if err != nil { diff --git a/lib/web/integrations_azureoidc_test.go b/lib/web/integrations_azureoidc_test.go index cbdadad4ef433..6e0f72c6d3136 100644 --- a/lib/web/integrations_azureoidc_test.go +++ b/lib/web/integrations_azureoidc_test.go @@ -97,7 +97,7 @@ func TestAzureOIDCConfigureScript(t *testing.T) { } require.Contains(t, string(resp.Bytes()), - fmt.Sprintf("teleportArgs='%s'\n", tc.expectedTeleportArgs), + fmt.Sprintf("entrypointArgs='%s'\n", tc.expectedTeleportArgs), ) }) } diff --git a/lib/web/integrations_samlidp.go b/lib/web/integrations_samlidp.go index 0ea1e0b1d67d3..eda5dac78a265 100644 --- a/lib/web/integrations_samlidp.go +++ b/lib/web/integrations_samlidp.go @@ -56,7 +56,7 @@ func (h *Handler) gcpWorkforceConfigScript(w http.ResponseWriter, r *http.Reques fmt.Sprintf("--idp-metadata-url=%s", shsprintf.EscapeDefaultContext(samlIdPMetadataURL)), } script, err := oneoff.BuildScript(oneoff.OneOffScriptParams{ - TeleportArgs: strings.Join(argsList, " "), + EntrypointArgs: strings.Join(argsList, " "), SuccessMessage: "Success! You can now go back to the Teleport Web UI to complete enrolling this workforce pool to Teleport SAML Identity Provider.", }) if err != nil { diff --git a/lib/web/join_tokens.go b/lib/web/join_tokens.go index 00d03014c22f1..e7d66ed0ab1c6 100644 --- a/lib/web/join_tokens.go +++ b/lib/web/join_tokens.go @@ -19,7 +19,6 @@ package web import ( - "bytes" "context" "encoding/hex" "fmt" @@ -27,24 +26,19 @@ import ( "net/http" "net/url" "reflect" - "regexp" "sort" - "strconv" "strings" "time" - "github.com/google/safetext/shsprintf" "github.com/google/uuid" "github.com/gravitational/trace" "github.com/julienschmidt/httprouter" - "k8s.io/apimachinery/pkg/util/validation" "github.com/gravitational/teleport/api/client/proto" "github.com/gravitational/teleport/api/types" apiutils "github.com/gravitational/teleport/api/utils" "github.com/gravitational/teleport/lib/defaults" "github.com/gravitational/teleport/lib/httplib" - "github.com/gravitational/teleport/lib/modules" "github.com/gravitational/teleport/lib/services" "github.com/gravitational/teleport/lib/tlsca" "github.com/gravitational/teleport/lib/ui" @@ -54,8 +48,7 @@ import ( ) const ( - stableCloudChannelRepo = "stable/cloud" - HeaderTokenName = "X-Teleport-TokenName" + HeaderTokenName = "X-Teleport-TokenName" ) // nodeJoinToken contains node token fields for the UI. @@ -79,15 +72,9 @@ type scriptSettings struct { appURI string joinMethod string databaseInstallMode bool - installUpdater bool discoveryInstallMode bool discoveryGroup string - - // automaticUpgradesVersion is the target automatic upgrades version. - // The version must be valid semver, with the leading 'v'. e.g. v15.0.0-dev - // Required when installUpdater is true. - automaticUpgradesVersion string } // automaticUpgrades returns whether automaticUpgrades should be enabled. @@ -376,41 +363,16 @@ func (h *Handler) createTokenForDiscoveryHandle(w http.ResponseWriter, r *http.R }, nil } -// getAutoUpgrades checks if automaticUpgrades are enabled and returns the -// version that should be used according to auto upgrades default channel. -func (h *Handler) getAutoUpgrades(ctx context.Context) (bool, string, error) { - var autoUpgradesVersion string - var err error - autoUpgrades := automaticUpgrades(h.GetClusterFeatures()) - if autoUpgrades { - autoUpgradesVersion, err = h.cfg.AutomaticUpgradesChannels.DefaultVersion(ctx) - if err != nil { - log.WithError(err).Info("Failed to get auto upgrades version.") - return false, "", trace.Wrap(err) - } - } - return autoUpgrades, autoUpgradesVersion, nil - -} - func (h *Handler) getNodeJoinScriptHandle(w http.ResponseWriter, r *http.Request, params httprouter.Params) (interface{}, error) { httplib.SetScriptHeaders(w.Header()) - autoUpgrades, autoUpgradesVersion, err := h.getAutoUpgrades(r.Context()) - if err != nil { - w.Write(scripts.ErrorBashScript) - return nil, nil - } - settings := scriptSettings{ - token: params.ByName("token"), - appInstallMode: false, - joinMethod: r.URL.Query().Get("method"), - installUpdater: autoUpgrades, - automaticUpgradesVersion: autoUpgradesVersion, + token: params.ByName("token"), + appInstallMode: false, + joinMethod: r.URL.Query().Get("method"), } - script, err := getJoinScript(r.Context(), settings, h.GetProxyClient()) + script, err := h.getJoinScript(r.Context(), settings) if err != nil { log.WithError(err).Info("Failed to return the node install script.") w.Write(scripts.ErrorBashScript) @@ -444,22 +406,14 @@ func (h *Handler) getAppJoinScriptHandle(w http.ResponseWriter, r *http.Request, return nil, nil } - autoUpgrades, autoUpgradesVersion, err := h.getAutoUpgrades(r.Context()) - if err != nil { - w.Write(scripts.ErrorBashScript) - return nil, nil - } - settings := scriptSettings{ - token: params.ByName("token"), - appInstallMode: true, - appName: name, - appURI: uri, - installUpdater: autoUpgrades, - automaticUpgradesVersion: autoUpgradesVersion, + token: params.ByName("token"), + appInstallMode: true, + appName: name, + appURI: uri, } - script, err := getJoinScript(r.Context(), settings, h.GetProxyClient()) + script, err := h.getJoinScript(r.Context(), settings) if err != nil { log.WithError(err).Info("Failed to return the app install script.") w.Write(scripts.ErrorBashScript) @@ -478,20 +432,12 @@ func (h *Handler) getAppJoinScriptHandle(w http.ResponseWriter, r *http.Request, func (h *Handler) getDatabaseJoinScriptHandle(w http.ResponseWriter, r *http.Request, params httprouter.Params) (interface{}, error) { httplib.SetScriptHeaders(w.Header()) - autoUpgrades, autoUpgradesVersion, err := h.getAutoUpgrades(r.Context()) - if err != nil { - w.Write(scripts.ErrorBashScript) - return nil, nil - } - settings := scriptSettings{ - token: params.ByName("token"), - databaseInstallMode: true, - installUpdater: autoUpgrades, - automaticUpgradesVersion: autoUpgradesVersion, + token: params.ByName("token"), + databaseInstallMode: true, } - script, err := getJoinScript(r.Context(), settings, h.GetProxyClient()) + script, err := h.getJoinScript(r.Context(), settings) if err != nil { log.WithError(err).Info("Failed to return the database install script.") w.Write(scripts.ErrorBashScript) @@ -512,12 +458,6 @@ func (h *Handler) getDiscoveryJoinScriptHandle(w http.ResponseWriter, r *http.Re queryValues := r.URL.Query() const discoveryGroupQueryParam = "discoveryGroup" - autoUpgrades, autoUpgradesVersion, err := h.getAutoUpgrades(r.Context()) - if err != nil { - w.Write(scripts.ErrorBashScript) - return nil, nil - } - discoveryGroup, err := url.QueryUnescape(queryValues.Get(discoveryGroupQueryParam)) if err != nil { log.WithField("query-param", discoveryGroupQueryParam).WithError(err).Debug("Failed to return the discovery install script.") @@ -531,14 +471,12 @@ func (h *Handler) getDiscoveryJoinScriptHandle(w http.ResponseWriter, r *http.Re } settings := scriptSettings{ - token: params.ByName("token"), - discoveryInstallMode: true, - discoveryGroup: discoveryGroup, - installUpdater: autoUpgrades, - automaticUpgradesVersion: autoUpgradesVersion, + token: params.ByName("token"), + discoveryInstallMode: true, + discoveryGroup: discoveryGroup, } - script, err := getJoinScript(r.Context(), settings, h.GetProxyClient()) + script, err := h.getJoinScript(r.Context(), settings) if err != nil { log.WithError(err).Info("Failed to return the discovery install script.") w.Write(scripts.ErrorBashScript) @@ -554,8 +492,9 @@ func (h *Handler) getDiscoveryJoinScriptHandle(w http.ResponseWriter, r *http.Re return nil, nil } -func getJoinScript(ctx context.Context, settings scriptSettings, m nodeAPIGetter) (string, error) { - switch types.JoinMethod(settings.joinMethod) { +func (h *Handler) getJoinScript(ctx context.Context, settings scriptSettings) (string, error) { + joinMethod := types.JoinMethod(settings.joinMethod) + switch joinMethod { case types.JoinMethodUnspecified, types.JoinMethodToken: if err := validateJoinToken(settings.token); err != nil { return "", trace.Wrap(err) @@ -565,141 +504,55 @@ func getJoinScript(ctx context.Context, settings scriptSettings, m nodeAPIGetter return "", trace.BadParameter("join method %q is not supported via script", settings.joinMethod) } + clt := h.GetProxyClient() + // The provided token can be attacker controlled, so we must validate // it with the backend before using it to generate the script. - token, err := m.GetToken(ctx, settings.token) + token, err := clt.GetToken(ctx, settings.token) if err != nil { return "", trace.BadParameter("invalid token") } - // Get hostname and port from proxy server address. - proxyServers, err := m.GetProxies() - if err != nil { - return "", trace.Wrap(err) - } - - if len(proxyServers) == 0 { - return "", trace.NotFound("no proxy servers found") - } - - version := proxyServers[0].GetTeleportVersion() - - publicAddr := proxyServers[0].GetPublicAddr() - if publicAddr == "" { - return "", trace.Errorf("proxy public_addr is not set, you must set proxy_service.public_addr to the publicly reachable address of the proxy before you can generate a node join script") - } - - hostname, portStr, err := utils.SplitHostPort(publicAddr) - if err != nil { - return "", trace.Wrap(err) - } + // TODO(hugoShaka): hit the local accesspoint which has a cache instead of asking the auth every time. // Get the CA pin hashes of the cluster to join. - localCAResponse, err := m.GetClusterCACert(ctx) + localCAResponse, err := clt.GetClusterCACert(ctx) if err != nil { return "", trace.Wrap(err) } + caPins, err := tlsca.CalculatePins(localCAResponse.TLSCA) if err != nil { return "", trace.Wrap(err) } - labelsList := []string{} - for labelKey, labelValues := range token.GetSuggestedLabels() { - labels := strings.Join(labelValues, " ") - labelsList = append(labelsList, fmt.Sprintf("%s=%s", labelKey, labels)) - } - - var dbServiceResourceLabels []string - if settings.databaseInstallMode { - suggestedAgentMatcherLabels := token.GetSuggestedAgentMatcherLabels() - dbServiceResourceLabels, err = scripts.MarshalLabelsYAML(suggestedAgentMatcherLabels, 6) - if err != nil { - return "", trace.Wrap(err) - } - } - - var buf bytes.Buffer - var appServerResourceLabels []string - // If app install mode is requested but parameters are blank for some reason, - // we need to return an error. - if settings.appInstallMode { - if errs := validation.IsDNS1035Label(settings.appName); len(errs) > 0 { - return "", trace.BadParameter("appName %q must be a valid DNS subdomain: https://goteleport.com/docs/enroll-resources/application-access/guides/connecting-apps/#application-name", settings.appName) - } - if !appURIPattern.MatchString(settings.appURI) { - return "", trace.BadParameter("appURI %q contains invalid characters", settings.appURI) - } - - suggestedLabels := token.GetSuggestedLabels() - appServerResourceLabels, err = scripts.MarshalLabelsYAML(suggestedLabels, 4) - if err != nil { - return "", trace.Wrap(err) - } - } - - if settings.discoveryInstallMode { - if settings.discoveryGroup == "" { - return "", trace.BadParameter("discovery group is required") - } + installOpts, err := h.installScriptOptions(ctx) + if err != nil { + return "", trace.Wrap(err, "Building install script options") } - packageName := types.PackageNameOSS - if modules.GetModules().BuildType() == modules.BuildEnterprise { - packageName = types.PackageNameEnt + nodeInstallOpts := scripts.InstallNodeScriptOptions{ + InstallOptions: installOpts, + Token: token.GetName(), + CAPins: caPins, + // We are using the joinMethod from the script settings instead of the one from the token + // to reproduce the previous script behavior. I'm also afraid that using the + // join method from the token would provide an oracle for an attacker wanting to discover + // the join method. + // We might want to change this in the future to lookup the join method from the token + // to avoid potential mismatch and allow the caller to not care about the join method. + JoinMethod: joinMethod, + Labels: token.GetSuggestedLabels(), + LabelMatchers: token.GetSuggestedAgentMatcherLabels(), + AppServiceEnabled: settings.appInstallMode, + AppName: settings.appName, + AppURI: settings.appURI, + DatabaseServiceEnabled: settings.databaseInstallMode, + DiscoveryServiceEnabled: settings.discoveryInstallMode, + DiscoveryGroup: settings.discoveryGroup, } - // By default, it will use `stable/v`, eg stable/v12 - repoChannel := "" - - // The install script will install the updater (teleport-ent-updater) for Cloud customers enrolled in Automatic Upgrades. - // The repo channel used must be `stable/cloud` which has the available packages for the Cloud Customer's agents. - // It pins the teleport version to the one specified by the default version channel - // This ensures the initial installed version is the same as the `teleport-ent-updater` would install. - if settings.installUpdater { - if settings.automaticUpgradesVersion == "" { - return "", trace.Wrap(err, "automatic upgrades version must be set when installUpdater is true") - } - - repoChannel = stableCloudChannelRepo - // automaticUpgradesVersion has vX.Y.Z format, however the script - // expects the version to not include the `v` so we strip it - version = strings.TrimPrefix(settings.automaticUpgradesVersion, "v") - } - - // This section relies on Go's default zero values to make sure that the settings - // are correct when not installing an app. - err = scripts.InstallNodeBashScript.Execute(&buf, map[string]interface{}{ - "token": settings.token, - "hostname": hostname, - "port": portStr, - // The install.sh script has some manually generated configs and some - // generated by the `teleport config` commands. The old bash - // version used space delimited values whereas the teleport command uses - // a comma delimeter. The Old version can be removed when the install.sh - // file has been completely converted over. - "caPinsOld": strings.Join(caPins, " "), - "caPins": strings.Join(caPins, ","), - "packageName": packageName, - "repoChannel": repoChannel, - "installUpdater": strconv.FormatBool(settings.installUpdater), - "version": shsprintf.EscapeDefaultContext(version), - "appInstallMode": strconv.FormatBool(settings.appInstallMode), - "appServerResourceLabels": appServerResourceLabels, - "appName": shsprintf.EscapeDefaultContext(settings.appName), - "appURI": shsprintf.EscapeDefaultContext(settings.appURI), - "joinMethod": shsprintf.EscapeDefaultContext(settings.joinMethod), - "labels": strings.Join(labelsList, ","), - "databaseInstallMode": strconv.FormatBool(settings.databaseInstallMode), - "db_service_resource_labels": dbServiceResourceLabels, - "discoveryInstallMode": settings.discoveryInstallMode, - "discoveryGroup": shsprintf.EscapeDefaultContext(settings.discoveryGroup), - }) - if err != nil { - return "", trace.Wrap(err) - } - - return buf.String(), nil + return scripts.GetNodeInstallScript(ctx, nodeInstallOpts) } // validateJoinToken validate a join token. @@ -789,17 +642,3 @@ func isSameAzureRuleSet(r1, r2 []*types.ProvisionTokenSpecV2Azure_Rule) bool { sortAzureRules(r2) return reflect.DeepEqual(r1, r2) } - -type nodeAPIGetter interface { - // GetToken looks up a provisioning token. - GetToken(ctx context.Context, token string) (types.ProvisionToken, error) - - // GetClusterCACert returns the CAs for the local cluster without signing keys. - GetClusterCACert(ctx context.Context) (*proto.GetClusterCACertResponse, error) - - // GetProxies returns a list of registered proxies. - GetProxies() ([]types.Server, error) -} - -// appURIPattern is a regexp excluding invalid characters from application URIs. -var appURIPattern = regexp.MustCompile(`^[-\w/:. ]+$`) diff --git a/lib/web/join_tokens_test.go b/lib/web/join_tokens_test.go index 4e0062b333ef3..95de15dfe0280 100644 --- a/lib/web/join_tokens_test.go +++ b/lib/web/join_tokens_test.go @@ -23,27 +23,35 @@ import ( "encoding/hex" "encoding/json" "fmt" + "math/rand/v2" "net/http" "net/url" "regexp" + "strconv" "testing" "time" "github.com/google/go-cmp/cmp" "github.com/google/go-cmp/cmp/cmpopts" "github.com/gravitational/trace" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/durationpb" "github.com/gravitational/teleport" "github.com/gravitational/teleport/api/client/proto" + autoupdatev1pb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" "github.com/gravitational/teleport/api/types" - "github.com/gravitational/teleport/api/utils" + "github.com/gravitational/teleport/api/types/autoupdate" + apiutils "github.com/gravitational/teleport/api/utils" "github.com/gravitational/teleport/lib/auth/authclient" + "github.com/gravitational/teleport/lib/automaticupgrades" "github.com/gravitational/teleport/lib/defaults" "github.com/gravitational/teleport/lib/fixtures" "github.com/gravitational/teleport/lib/modules" "github.com/gravitational/teleport/lib/services" libui "github.com/gravitational/teleport/lib/ui" + utils "github.com/gravitational/teleport/lib/utils" "github.com/gravitational/teleport/lib/web/ui" ) @@ -669,41 +677,18 @@ func toHex(s string) string { return hex.EncodeToString([]byte(s)) } func TestGetNodeJoinScript(t *testing.T) { validToken := "f18da1c9f6630a51e8daf121e7451daa" + invalidToken := "f18da1c9f6630a51e8daf121e7451dab" validIAMToken := "valid-iam-token" internalResourceID := "967d38ff-7a61-4f42-bd2d-c61965b44db0" - m := &mockedNodeAPIGetter{ - mockGetProxyServers: func() ([]types.Server, error) { - var s types.ServerV2 - s.SetPublicAddrs([]string{"test-host:12345678"}) - - return []types.Server{&s}, nil - }, - mockGetClusterCACert: func(context.Context) (*proto.GetClusterCACertResponse, error) { - fakeBytes := []byte(fixtures.SigningCertPEM) - return &proto.GetClusterCACertResponse{TLSCA: fakeBytes}, nil - }, - mockGetToken: func(_ context.Context, token string) (types.ProvisionToken, error) { - if token == validToken || token == validIAMToken { - return &types.ProvisionTokenV2{ - Metadata: types.Metadata{ - Name: token, - }, - Spec: types.ProvisionTokenSpecV2{ - SuggestedLabels: types.Labels{ - types.InternalResourceIDLabel: utils.Strings{internalResourceID}, - }, - }, - }, nil - } - return nil, trace.NotFound("token does not exist") - }, - } + hostname := "proxy.example.com" + port := 1234 for _, test := range []struct { desc string settings scriptSettings errAssert require.ErrorAssertionFunc + token *types.ProvisionTokenV2 extraAssertions func(script string) }{ { @@ -713,22 +698,52 @@ func TestGetNodeJoinScript(t *testing.T) { }, { desc: "short token length", - settings: scriptSettings{token: toHex("f18da1c9f6630a51e8daf121e7451d")}, + settings: scriptSettings{token: toHex(validToken[:30])}, errAssert: require.Error, + token: &types.ProvisionTokenV2{ + Metadata: types.Metadata{ + Name: validToken[:30], + }, + Spec: types.ProvisionTokenSpecV2{ + SuggestedLabels: types.Labels{ + types.InternalResourceIDLabel: apiutils.Strings{internalResourceID}, + }, + }, + }, }, { desc: "valid length but does not exist", - settings: scriptSettings{token: toHex("xxxxxxx9f6630a51e8daf121exxxxxxx")}, + settings: scriptSettings{token: toHex(invalidToken)}, errAssert: require.Error, + token: &types.ProvisionTokenV2{ + Metadata: types.Metadata{ + Name: validToken, + }, + Spec: types.ProvisionTokenSpecV2{ + SuggestedLabels: types.Labels{ + types.InternalResourceIDLabel: apiutils.Strings{internalResourceID}, + }, + }, + }, }, { desc: "valid", settings: scriptSettings{token: validToken}, errAssert: require.NoError, + token: &types.ProvisionTokenV2{ + Metadata: types.Metadata{ + Name: validToken, + }, + Spec: types.ProvisionTokenSpecV2{ + SuggestedLabels: types.Labels{ + types.InternalResourceIDLabel: apiutils.Strings{internalResourceID}, + }, + }, + }, extraAssertions: func(script string) { require.Contains(t, script, validToken) - require.Contains(t, script, "test-host") - require.Contains(t, script, "12345678") + require.Contains(t, script, hostname) + require.Contains(t, script, strconv.Itoa(port)) require.Contains(t, script, "sha256:") require.NotContains(t, script, "JOIN_METHOD='iam'") }, @@ -747,6 +762,16 @@ func TestGetNodeJoinScript(t *testing.T) { token: validIAMToken, joinMethod: string(types.JoinMethodIAM), }, + token: &types.ProvisionTokenV2{ + Metadata: types.Metadata{ + Name: validIAMToken, + }, + Spec: types.ProvisionTokenSpecV2{ + SuggestedLabels: types.Labels{ + types.InternalResourceIDLabel: apiutils.Strings{internalResourceID}, + }, + }, + }, errAssert: require.NoError, extraAssertions: func(script string) { require.Contains(t, script, "JOIN_METHOD='iam'") @@ -756,14 +781,34 @@ func TestGetNodeJoinScript(t *testing.T) { desc: "internal resourceid label", settings: scriptSettings{token: validToken}, errAssert: require.NoError, + token: &types.ProvisionTokenV2{ + Metadata: types.Metadata{ + Name: validToken, + }, + Spec: types.ProvisionTokenSpecV2{ + SuggestedLabels: types.Labels{ + types.InternalResourceIDLabel: apiutils.Strings{internalResourceID}, + }, + }, + }, extraAssertions: func(script string) { require.Contains(t, script, "--labels ") require.Contains(t, script, fmt.Sprintf("%s=%s", types.InternalResourceIDLabel, internalResourceID)) }, }, { - desc: "app server labels", - settings: scriptSettings{token: validToken, appInstallMode: true, appName: "app-name", appURI: "app-uri"}, + desc: "app server labels", + settings: scriptSettings{token: validToken, appInstallMode: true, appName: "app-name", appURI: "app-uri"}, + token: &types.ProvisionTokenV2{ + Metadata: types.Metadata{ + Name: validToken, + }, + Spec: types.ProvisionTokenSpecV2{ + SuggestedLabels: types.Labels{ + types.InternalResourceIDLabel: apiutils.Strings{internalResourceID}, + }, + }, + }, errAssert: require.NoError, extraAssertions: func(script string) { require.Contains(t, script, `APP_NAME='app-name'`) @@ -774,7 +819,12 @@ func TestGetNodeJoinScript(t *testing.T) { }, } { t.Run(test.desc, func(t *testing.T) { - script, err := getJoinScript(context.Background(), test.settings, m) + h := newAutoupdateTestHandler(t, autoupdateTestHandlerConfig{ + hostname: hostname, + port: port, + token: test.token, + }) + script, err := h.getJoinScript(context.Background(), test.settings) test.errAssert(t, err) if err != nil { require.Empty(t, script) @@ -787,28 +837,95 @@ func TestGetNodeJoinScript(t *testing.T) { } } -func TestGetAppJoinScript(t *testing.T) { - testTokenID := "f18da1c9f6630a51e8daf121e7451daa" - m := &mockedNodeAPIGetter{ - mockGetToken: func(_ context.Context, token string) (types.ProvisionToken, error) { - if token == testTokenID { - return &types.ProvisionTokenV2{ - Metadata: types.Metadata{ - Name: token, - }, - }, nil - } - return nil, trace.NotFound("token does not exist") - }, - mockGetProxyServers: func() ([]types.Server, error) { - var s types.ServerV2 - s.SetPublicAddrs([]string{"test-host:12345678"}) +type autoupdateAccessPointMock struct { + authclient.ProxyAccessPoint + mock.Mock +} + +func (a *autoupdateAccessPointMock) GetAutoUpdateAgentRollout(ctx context.Context) (*autoupdatev1pb.AutoUpdateAgentRollout, error) { + args := a.Called(ctx) + return args.Get(0).(*autoupdatev1pb.AutoUpdateAgentRollout), args.Error(1) +} + +type autoupdateProxyClientMock struct { + authclient.ClientI + mock.Mock +} + +func (a *autoupdateProxyClientMock) GetToken(ctx context.Context, token string) (types.ProvisionToken, error) { + args := a.Called(ctx, token) + return args.Get(0).(types.ProvisionToken), args.Error(1) +} + +func (a *autoupdateProxyClientMock) GetClusterCACert(ctx context.Context) (*proto.GetClusterCACertResponse, error) { + args := a.Called(ctx) + return args.Get(0).(*proto.GetClusterCACertResponse), args.Error(1) +} + +type autoupdateTestHandlerConfig struct { + testModules *modules.TestModules + hostname string + port int + channels automaticupgrades.Channels + rollout *autoupdatev1pb.AutoUpdateAgentRollout + token *types.ProvisionTokenV2 +} + +func newAutoupdateTestHandler(t *testing.T, config autoupdateTestHandlerConfig) *Handler { + if config.hostname == "" { + config.hostname = fmt.Sprintf("proxy-%d.example.com", rand.Int()) + } + if config.port == 0 { + config.port = rand.IntN(65535) + } + addr := config.hostname + ":" + strconv.Itoa(config.port) + + if config.channels == nil { + config.channels = automaticupgrades.Channels{} + } + require.NoError(t, config.channels.CheckAndSetDefaults()) + + ap := &autoupdateAccessPointMock{} + if config.rollout == nil { + ap.On("GetAutoUpdateAgentRollout", mock.Anything).Return(config.rollout, trace.NotFound("rollout does not exist")) + } else { + ap.On("GetAutoUpdateAgentRollout", mock.Anything).Return(config.rollout, nil) + } + + clt := &autoupdateProxyClientMock{} + if config.token == nil { + clt.On("GetToken", mock.Anything, mock.Anything).Return(config.token, trace.NotFound("token does not exist")) + } else { + clt.On("GetToken", mock.Anything, config.token.GetName()).Return(config.token, nil) + } - return []types.Server{&s}, nil + clt.On("GetClusterCACert", mock.Anything).Return(&proto.GetClusterCACertResponse{TLSCA: []byte(fixtures.SigningCertPEM)}, nil) + + if config.testModules == nil { + config.testModules = &modules.TestModules{ + TestBuildType: modules.BuildCommunity, + } + } + modules.SetTestModules(t, config.testModules) + h := &Handler{ + clusterFeatures: *config.testModules.Features().ToProto(), + cfg: Config{ + AutomaticUpgradesChannels: config.channels, + AccessPoint: ap, + PublicProxyAddr: addr, + ProxyClient: clt, }, - mockGetClusterCACert: func(context.Context) (*proto.GetClusterCACertResponse, error) { - fakeBytes := []byte(fixtures.SigningCertPEM) - return &proto.GetClusterCACertResponse{TLSCA: fakeBytes}, nil + logger: utils.NewSlogLoggerForTests(), + } + h.PublicProxyAddr() + return h +} + +func TestGetAppJoinScript(t *testing.T) { + testTokenID := "f18da1c9f6630a51e8daf121e7451daa" + token := &types.ProvisionTokenV2{ + Metadata: types.Metadata{ + Name: testTokenID, }, } badAppName := scriptSettings{ @@ -825,20 +942,24 @@ func TestGetAppJoinScript(t *testing.T) { appURI: "", } + h := newAutoupdateTestHandler(t, autoupdateTestHandlerConfig{token: token}) + hostname, port, err := utils.SplitHostPort(h.PublicProxyAddr()) + require.NoError(t, err) + // Test invalid app data. - script, err := getJoinScript(context.Background(), badAppName, m) + script, err := h.getJoinScript(context.Background(), badAppName) require.Empty(t, script) require.True(t, trace.IsBadParameter(err)) - script, err = getJoinScript(context.Background(), badAppURI, m) + script, err = h.getJoinScript(context.Background(), badAppURI) require.Empty(t, script) require.True(t, trace.IsBadParameter(err)) // Test various 'good' cases. expectedOutputs := []string{ testTokenID, - "test-host", - "12345678", + hostname, + port, "sha256:", } @@ -959,7 +1080,7 @@ func TestGetAppJoinScript(t *testing.T) { for _, tc := range tests { tc := tc t.Run(tc.desc, func(t *testing.T) { - script, err = getJoinScript(context.Background(), tc.settings, m) + script, err = h.getJoinScript(context.Background(), tc.settings) if tc.shouldError { require.Error(t, err) require.Empty(t, script) @@ -977,53 +1098,46 @@ func TestGetDatabaseJoinScript(t *testing.T) { validToken := "f18da1c9f6630a51e8daf121e7451daa" emptySuggestedAgentMatcherLabelsToken := "f18da1c9f6630a51e8daf121e7451000" internalResourceID := "967d38ff-7a61-4f42-bd2d-c61965b44db0" + hostname := "test.example.com" + port := 1234 - m := &mockedNodeAPIGetter{ - mockGetProxyServers: func() ([]types.Server, error) { - var s types.ServerV2 - s.SetPublicAddrs([]string{"test-host:12345678"}) - - return []types.Server{&s}, nil + token := &types.ProvisionTokenV2{ + Metadata: types.Metadata{ + Name: validToken, }, - mockGetClusterCACert: func(context.Context) (*proto.GetClusterCACertResponse, error) { - fakeBytes := []byte(fixtures.SigningCertPEM) - return &proto.GetClusterCACertResponse{TLSCA: fakeBytes}, nil + Spec: types.ProvisionTokenSpecV2{ + SuggestedLabels: types.Labels{ + types.InternalResourceIDLabel: apiutils.Strings{internalResourceID}, + }, + SuggestedAgentMatcherLabels: types.Labels{ + "env": apiutils.Strings{"prod"}, + "product": apiutils.Strings{"*"}, + "os": apiutils.Strings{"mac", "linux"}, + }, }, - mockGetToken: func(_ context.Context, token string) (types.ProvisionToken, error) { - provisionToken := &types.ProvisionTokenV2{ - Metadata: types.Metadata{ - Name: token, - }, - Spec: types.ProvisionTokenSpecV2{ - SuggestedLabels: types.Labels{ - types.InternalResourceIDLabel: utils.Strings{internalResourceID}, - }, - SuggestedAgentMatcherLabels: types.Labels{ - "env": utils.Strings{"prod"}, - "product": utils.Strings{"*"}, - "os": utils.Strings{"mac", "linux"}, - }, - }, - } - if token == validToken { - return provisionToken, nil - } - if token == emptySuggestedAgentMatcherLabelsToken { - provisionToken.Spec.SuggestedAgentMatcherLabels = types.Labels{} - return provisionToken, nil - } - return nil, trace.NotFound("token does not exist") + } + + noMatcherToken := &types.ProvisionTokenV2{ + Metadata: types.Metadata{ + Name: emptySuggestedAgentMatcherLabelsToken, + }, + Spec: types.ProvisionTokenSpecV2{ + SuggestedLabels: types.Labels{ + types.InternalResourceIDLabel: apiutils.Strings{internalResourceID}, + }, }, } for _, test := range []struct { desc string settings scriptSettings + token *types.ProvisionTokenV2 errAssert require.ErrorAssertionFunc extraAssertions func(script string) }{ { - desc: "two installation methods", + desc: "two installation methods", + token: token, settings: scriptSettings{ token: validToken, databaseInstallMode: true, @@ -1032,7 +1146,8 @@ func TestGetDatabaseJoinScript(t *testing.T) { errAssert: require.Error, }, { - desc: "valid", + desc: "valid", + token: token, settings: scriptSettings{ databaseInstallMode: true, token: validToken, @@ -1040,7 +1155,8 @@ func TestGetDatabaseJoinScript(t *testing.T) { errAssert: require.NoError, extraAssertions: func(script string) { require.Contains(t, script, validToken) - require.Contains(t, script, "test-host") + require.Contains(t, script, hostname) + require.Contains(t, script, strconv.Itoa(port)) require.Contains(t, script, "sha256:") require.Contains(t, script, "--labels ") require.Contains(t, script, fmt.Sprintf("%s=%s", types.InternalResourceIDLabel, internalResourceID)) @@ -1058,7 +1174,8 @@ db_service: }, }, { - desc: "empty suggestedAgentMatcherLabels", + desc: "empty suggestedAgentMatcherLabels", + token: noMatcherToken, settings: scriptSettings{ databaseInstallMode: true, token: emptySuggestedAgentMatcherLabelsToken, @@ -1066,7 +1183,8 @@ db_service: errAssert: require.NoError, extraAssertions: func(script string) { require.Contains(t, script, emptySuggestedAgentMatcherLabelsToken) - require.Contains(t, script, "test-host") + require.Contains(t, script, hostname) + require.Contains(t, script, strconv.Itoa(port)) require.Contains(t, script, "sha256:") require.Contains(t, script, "--labels ") require.Contains(t, script, fmt.Sprintf("%s=%s", types.InternalResourceIDLabel, internalResourceID)) @@ -1081,7 +1199,13 @@ db_service: }, } { t.Run(test.desc, func(t *testing.T) { - script, err := getJoinScript(context.Background(), test.settings, m) + h := newAutoupdateTestHandler(t, autoupdateTestHandlerConfig{ + hostname: hostname, + port: port, + token: test.token, + }) + + script, err := h.getJoinScript(context.Background(), test.settings) test.errAssert(t, err) if err != nil { require.Empty(t, script) @@ -1096,30 +1220,13 @@ db_service: func TestGetDiscoveryJoinScript(t *testing.T) { const validToken = "f18da1c9f6630a51e8daf121e7451daa" - - m := &mockedNodeAPIGetter{ - mockGetProxyServers: func() ([]types.Server, error) { - var s types.ServerV2 - s.SetPublicAddrs([]string{"test-host:12345678"}) - - return []types.Server{&s}, nil - }, - mockGetClusterCACert: func(context.Context) (*proto.GetClusterCACertResponse, error) { - fakeBytes := []byte(fixtures.SigningCertPEM) - return &proto.GetClusterCACertResponse{TLSCA: fakeBytes}, nil - }, - mockGetToken: func(_ context.Context, token string) (types.ProvisionToken, error) { - provisionToken := &types.ProvisionTokenV2{ - Metadata: types.Metadata{ - Name: token, - }, - Spec: types.ProvisionTokenSpecV2{}, - } - if token == validToken { - return provisionToken, nil - } - return nil, trace.NotFound("token does not exist") + hostname := "test.example.com" + port := 1234 + token := &types.ProvisionTokenV2{ + Metadata: types.Metadata{ + Name: validToken, }, + Spec: types.ProvisionTokenSpecV2{}, } for _, test := range []struct { @@ -1138,7 +1245,8 @@ func TestGetDiscoveryJoinScript(t *testing.T) { errAssert: require.NoError, extraAssertions: func(t *testing.T, script string) { require.Contains(t, script, validToken) - require.Contains(t, script, "test-host") + require.Contains(t, script, hostname) + require.Contains(t, script, strconv.Itoa(port)) require.Contains(t, script, "sha256:") require.Contains(t, script, "--labels ") require.Contains(t, script, ` @@ -1157,7 +1265,12 @@ discovery_service: }, } { t.Run(test.desc, func(t *testing.T) { - script, err := getJoinScript(context.Background(), test.settings, m) + h := newAutoupdateTestHandler(t, autoupdateTestHandlerConfig{ + hostname: hostname, + port: port, + token: token, + }) + script, err := h.getJoinScript(context.Background(), test.settings) test.errAssert(t, err) if err != nil { require.Empty(t, script) @@ -1276,28 +1389,9 @@ func TestIsSameRuleSet(t *testing.T) { func TestJoinScript(t *testing.T) { validToken := "f18da1c9f6630a51e8daf121e7451daa" - - m := &mockedNodeAPIGetter{ - mockGetProxyServers: func() ([]types.Server, error) { - return []types.Server{ - &types.ServerV2{ - Spec: types.ServerSpecV2{ - PublicAddrs: []string{"test-host:12345678"}, - Version: teleport.Version, - }, - }, - }, nil - }, - mockGetClusterCACert: func(context.Context) (*proto.GetClusterCACertResponse, error) { - fakeBytes := []byte(fixtures.SigningCertPEM) - return &proto.GetClusterCACertResponse{TLSCA: fakeBytes}, nil - }, - mockGetToken: func(_ context.Context, token string) (types.ProvisionToken, error) { - return &types.ProvisionTokenV2{ - Metadata: types.Metadata{ - Name: token, - }, - }, nil + token := &types.ProvisionTokenV2{ + Metadata: types.Metadata{ + Name: validToken, }, } @@ -1305,8 +1399,11 @@ func TestJoinScript(t *testing.T) { getGravitationalTeleportLinkRegex := regexp.MustCompile(`https://cdn\.teleport\.dev/\${TELEPORT_PACKAGE_NAME}[-_]v?\${TELEPORT_VERSION}`) t.Run("oss", func(t *testing.T) { + h := newAutoupdateTestHandler(t, autoupdateTestHandlerConfig{ + token: token, + }) // Using the OSS Version, all the links must contain only teleport as package name. - script, err := getJoinScript(context.Background(), scriptSettings{token: validToken}, m) + script, err := h.getJoinScript(context.Background(), scriptSettings{token: validToken}) require.NoError(t, err) matches := getGravitationalTeleportLinkRegex.FindAllString(script, -1) @@ -1321,8 +1418,11 @@ func TestJoinScript(t *testing.T) { t.Run("ent", func(t *testing.T) { // Using the Enterprise Version, the package name must be teleport-ent - modules.SetTestModules(t, &modules.TestModules{TestBuildType: modules.BuildEnterprise}) - script, err := getJoinScript(context.Background(), scriptSettings{token: validToken}, m) + h := newAutoupdateTestHandler(t, autoupdateTestHandlerConfig{ + testModules: &modules.TestModules{TestBuildType: modules.BuildEnterprise}, + token: token, + }) + script, err := h.getJoinScript(context.Background(), scriptSettings{token: validToken}) require.NoError(t, err) matches := getGravitationalTeleportLinkRegex.FindAllString(script, -1) @@ -1338,45 +1438,76 @@ func TestJoinScript(t *testing.T) { t.Run("using repo", func(t *testing.T) { t.Run("installUpdater is true", func(t *testing.T) { - currentStableCloudVersion := "v99.1.1" - script, err := getJoinScript(context.Background(), scriptSettings{token: validToken, installUpdater: true, automaticUpgradesVersion: currentStableCloudVersion}, m) + currentStableCloudVersion := "1.2.3" + h := newAutoupdateTestHandler(t, autoupdateTestHandlerConfig{ + testModules: &modules.TestModules{TestFeatures: modules.Features{Cloud: true, AutomaticUpgrades: true}}, + token: token, + channels: automaticupgrades.Channels{ + automaticupgrades.DefaultChannelName: &automaticupgrades.Channel{StaticVersion: currentStableCloudVersion}, + }, + }) + + script, err := h.getJoinScript(context.Background(), scriptSettings{token: validToken}) require.NoError(t, err) - // list of packages must include the updater - require.Contains(t, script, ""+ - " PACKAGE_LIST=${TELEPORT_PACKAGE_PIN_VERSION}\n"+ - " # (warning): This expression is constant. Did you forget the $ on a variable?\n"+ - " # Disabling the warning above because expression is templated.\n"+ - " # shellcheck disable=SC2050\n"+ - " if is_using_systemd && [[ \"true\" == \"true\" ]]; then\n"+ - " # Teleport Updater requires systemd.\n"+ - " PACKAGE_LIST+=\" ${TELEPORT_UPDATER_PIN_VERSION}\"\n"+ - " fi\n", - ) + require.Contains(t, script, "UPDATER_STYLE='package'") // Repo channel is stable/cloud require.Contains(t, script, "REPO_CHANNEL='stable/cloud'") // TELEPORT_VERSION is the one provided by https://updates.releases.teleport.dev/v1/stable/cloud/version - require.Contains(t, script, "TELEPORT_VERSION='99.1.1'") + require.Contains(t, script, fmt.Sprintf("TELEPORT_VERSION='%s'", currentStableCloudVersion)) }) t.Run("installUpdater is false", func(t *testing.T) { - script, err := getJoinScript(context.Background(), scriptSettings{token: validToken, installUpdater: false}, m) + h := newAutoupdateTestHandler(t, autoupdateTestHandlerConfig{ + token: token, + }) + script, err := h.getJoinScript(context.Background(), scriptSettings{token: validToken}) require.NoError(t, err) - require.Contains(t, script, ""+ - " PACKAGE_LIST=${TELEPORT_PACKAGE_PIN_VERSION}\n"+ - " # (warning): This expression is constant. Did you forget the $ on a variable?\n"+ - " # Disabling the warning above because expression is templated.\n"+ - " # shellcheck disable=SC2050\n"+ - " if is_using_systemd && [[ \"false\" == \"true\" ]]; then\n"+ - " # Teleport Updater requires systemd.\n"+ - " PACKAGE_LIST+=\" ${TELEPORT_UPDATER_PIN_VERSION}\"\n"+ - " fi\n", - ) + require.Contains(t, script, "UPDATER_STYLE='none'") // Default based on current version is used instead require.Contains(t, script, "REPO_CHANNEL=''") // Current version must be used require.Contains(t, script, fmt.Sprintf("TELEPORT_VERSION='%s'", teleport.Version)) }) }) + t.Run("using teleport-update", func(t *testing.T) { + testRollout := &autoupdatev1pb.AutoUpdateAgentRollout{Spec: &autoupdatev1pb.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.2", + TargetVersion: "1.2.3", + Schedule: autoupdate.AgentsScheduleImmediate, + AutoupdateMode: autoupdate.AgentsUpdateModeEnabled, + Strategy: autoupdate.AgentsStrategyTimeBased, + MaintenanceWindowDuration: durationpb.New(1 * time.Hour), + }} + t.Run("rollout exists and autoupdates are on", func(t *testing.T) { + currentStableCloudVersion := "1.1.1" + config := autoupdateTestHandlerConfig{ + testModules: &modules.TestModules{TestFeatures: modules.Features{Cloud: true, AutomaticUpgrades: true}}, + channels: automaticupgrades.Channels{ + automaticupgrades.DefaultChannelName: &automaticupgrades.Channel{StaticVersion: currentStableCloudVersion}, + }, + rollout: testRollout, + token: token, + } + h := newAutoupdateTestHandler(t, config) + + script, err := h.getJoinScript(context.Background(), scriptSettings{token: validToken}) + require.NoError(t, err) + + // list of packages must include the updater + require.Contains(t, script, "UPDATER_STYLE='binary'") + require.Contains(t, script, fmt.Sprintf("TELEPORT_VERSION='%s'", testRollout.Spec.TargetVersion)) + }) + t.Run("rollout exists and autoupdates are off", func(t *testing.T) { + h := newAutoupdateTestHandler(t, autoupdateTestHandlerConfig{ + rollout: testRollout, + token: token, + }) + script, err := h.getJoinScript(context.Background(), scriptSettings{token: validToken}) + require.NoError(t, err) + require.Contains(t, script, "UPDATER_STYLE='binary'") + require.Contains(t, script, fmt.Sprintf("TELEPORT_VERSION='%s'", testRollout.Spec.TargetVersion)) + }) + }) } func TestAutomaticUpgrades(t *testing.T) { @@ -1484,32 +1615,3 @@ func TestIsSameAzureRuleSet(t *testing.T) { }) } } - -type mockedNodeAPIGetter struct { - mockGetProxyServers func() ([]types.Server, error) - mockGetClusterCACert func(ctx context.Context) (*proto.GetClusterCACertResponse, error) - mockGetToken func(ctx context.Context, token string) (types.ProvisionToken, error) -} - -func (m *mockedNodeAPIGetter) GetProxies() ([]types.Server, error) { - if m.mockGetProxyServers != nil { - return m.mockGetProxyServers() - } - - return nil, trace.NotImplemented("mockGetProxyServers not implemented") -} - -func (m *mockedNodeAPIGetter) GetClusterCACert(ctx context.Context) (*proto.GetClusterCACertResponse, error) { - if m.mockGetClusterCACert != nil { - return m.mockGetClusterCACert(ctx) - } - - return nil, trace.NotImplemented("mockGetClusterCACert not implemented") -} - -func (m *mockedNodeAPIGetter) GetToken(ctx context.Context, token string) (types.ProvisionToken, error) { - if m.mockGetToken != nil { - return m.mockGetToken(ctx, token) - } - return nil, trace.NotImplemented("mockGetToken not implemented") -} diff --git a/lib/web/scripts.go b/lib/web/scripts.go new file mode 100644 index 0000000000000..7dc656958e1ad --- /dev/null +++ b/lib/web/scripts.go @@ -0,0 +1,165 @@ +/* + * Teleport + * Copyright (C) 2025 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package web + +import ( + "context" + "fmt" + "net/http" + "os" + "strconv" + + "github.com/coreos/go-semver/semver" + "github.com/gravitational/trace" + "github.com/julienschmidt/httprouter" + + "github.com/gravitational/teleport" + "github.com/gravitational/teleport/api/types" + "github.com/gravitational/teleport/lib/modules" + "github.com/gravitational/teleport/lib/utils/teleportassets" + "github.com/gravitational/teleport/lib/web/scripts" +) + +const insecureParamName = "insecure" + +// installScriptHandle handles calls for "/scripts/install.sh" and responds with a bash script installing Teleport +// by downloading and running `teleport-update`. This installation script does not start the agent, join it, +// or configure its services. This is handled by the "/scripts/:token/install-*.sh" scripts. +func (h *Handler) installScriptHandle(w http.ResponseWriter, r *http.Request, params httprouter.Params) (any, error) { + // This is a hack because the router is not allowing us to register "/scripts/install.sh", so we use + // the parameter ":token" to match the script name. + // Currently, only "install.sh" is supported. + if params.ByName("token") != "install.sh" { + return nil, trace.NotFound(`Route not found, query "/scripts/install.sh" for the install-only script, or "/scripts/:token/install-node.sh" for the install + join script.`) + } + + // TODO(hugoShaka): cache function + opts, err := h.installScriptOptions(r.Context()) + if err != nil { + return nil, trace.Wrap(err, "Failed to build install script options") + } + + if insecure := r.URL.Query().Get(insecureParamName); insecure != "" { + v, err := strconv.ParseBool(insecure) + if err != nil { + return nil, trace.BadParameter("failed to parse insecure flag %q: %v", insecure, err) + } + opts.Insecure = v + } + + script, err := scripts.GetInstallScript(r.Context(), opts) + if err != nil { + h.logger.WarnContext(r.Context(), "Failed to get install script", "error", err) + return nil, trace.Wrap(err, "getting script") + } + + w.WriteHeader(http.StatusOK) + if _, err := fmt.Fprintln(w, script); err != nil { + h.logger.WarnContext(r.Context(), "Failed to write install script", "error", err) + } + + return nil, nil +} + +// installScriptOptions computes the agent installation options based on the proxy configuration and the cluster status. +// This includes: +// - the type of automatic updates +// - the desired version +// - the proxy address (used for updates). +// - the Teleport artifact name and CDN +func (h *Handler) installScriptOptions(ctx context.Context) (scripts.InstallScriptOptions, error) { + const defaultGroup, defaultUpdater = "", "" + + version, err := h.autoUpdateAgentVersion(ctx, defaultGroup, defaultUpdater) + if err != nil { + h.logger.WarnContext(ctx, "Failed to get intended agent version", "error", err) + version = teleport.Version + } + + // if there's a rollout, we do new autoupdates + _, rolloutErr := h.cfg.AccessPoint.GetAutoUpdateAgentRollout(ctx) + if rolloutErr != nil && !(trace.IsNotFound(rolloutErr) || trace.IsNotImplemented(rolloutErr)) { + h.logger.WarnContext(ctx, "Failed to get rollout", "error", rolloutErr) + return scripts.InstallScriptOptions{}, trace.Wrap(err, "failed to check the autoupdate agent rollout state") + } + + var autoupdateStyle scripts.AutoupdateStyle + switch { + case rolloutErr == nil: + autoupdateStyle = scripts.UpdaterBinaryAutoupdate + case automaticUpgrades(h.clusterFeatures): + autoupdateStyle = scripts.PackageManagerAutoupdate + default: + autoupdateStyle = scripts.NoAutoupdate + } + + var teleportFlavor string + switch modules.GetModules().BuildType() { + case modules.BuildEnterprise: + teleportFlavor = types.PackageNameEnt + case modules.BuildOSS, modules.BuildCommunity: + teleportFlavor = types.PackageNameOSS + default: + h.logger.WarnContext(ctx, "Unknown built type, defaulting to the 'teleport' package.", "type", modules.GetModules().BuildType()) + teleportFlavor = types.PackageNameOSS + } + + cdnBaseURL, err := getCDNBaseURL(version) + if err != nil { + h.logger.WarnContext(ctx, "Failed to get CDN base URL", "error", err) + return scripts.InstallScriptOptions{}, trace.Wrap(err) + } + + return scripts.InstallScriptOptions{ + AutoupdateStyle: autoupdateStyle, + TeleportVersion: version, + CDNBaseURL: cdnBaseURL, + ProxyAddr: h.PublicProxyAddr(), + TeleportFlavor: teleportFlavor, + FIPS: modules.IsBoringBinary(), + }, nil + +} + +// EnvVarCDNBaseURL is the environment variable that allows users to override the Teleport base CDN url used in the installation script. +// Setting this value is required for testing (make production builds install from the dev CDN, and vice versa). +// As we (the Teleport company) don't distribute AGPL binaries, this must be set when using a Teleport OSS build. +// Example values: +// - "https://cdn.teleport.dev" (prod) +// - "https://cdn.cloud.gravitational.io" (dev builds/staging) +const EnvVarCDNBaseURL = "TELEPORT_CDN_BASE_URL" + +func getCDNBaseURL(version string) (string, error) { + // If the user explicitly overrides the CDN base URL, we use it. + if override := os.Getenv(EnvVarCDNBaseURL); override != "" { + return override, nil + } + + v, err := semver.NewVersion(version) + if err != nil { + return "", trace.Wrap(err) + } + + // For backward compatibility we don't fail if the user is running AGPL and + // did not specify the CDN URL. However we will fail in v18 for this as we + // cannot automatically install binaries subject to a license the user has + // not agreed to. + + return teleportassets.CDNBaseURLForVersion(v), nil +} diff --git a/lib/web/scripts/install.go b/lib/web/scripts/install.go new file mode 100644 index 0000000000000..097c916eb9a69 --- /dev/null +++ b/lib/web/scripts/install.go @@ -0,0 +1,196 @@ +/* + * Teleport + * Copyright (C) 2025 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package scripts + +import ( + "context" + _ "embed" + "net/url" + "strings" + + "github.com/google/safetext/shsprintf" + "github.com/gravitational/trace" + + "github.com/gravitational/teleport/lib/utils/teleportassets" + "github.com/gravitational/teleport/lib/web/scripts/oneoff" +) + +// AutoupdateStyle represents the kind of autoupdate mechanism the script should use. +type AutoupdateStyle int + +const ( + // NoAutoupdate means the installed Teleport should not autoupdate. + NoAutoupdate AutoupdateStyle = iota + // PackageManagerAutoupdate means the installed Teleport should update via a script triggering package manager + // updates. The script lives in the 'teleport-ent-update' package and was our original attempt at automatic updates. + // See RFD-109 for more details: https://github.com/gravitational/teleport/blob/master/rfd/0109-cloud-agent-upgrades.md + PackageManagerAutoupdate + // UpdaterBinaryAutoupdate means the installed Teleport should update via the teleport-update binary. + // This update style does not depend on any package manager (although it has a system dependency to wake up the + // updater). + // See RFD-184 for more details: https://github.com/gravitational/teleport/blob/master/rfd/0184-agent-auto-updates.md + UpdaterBinaryAutoupdate + + teleportUpdateDefaultCDN = teleportassets.TeleportReleaseCDN +) + +func (style AutoupdateStyle) String() string { + switch style { + case PackageManagerAutoupdate: + return "package" + case UpdaterBinaryAutoupdate: + return "binary" + case NoAutoupdate: + return "none" + default: + return "unknown" + } +} + +// InstallScriptOptions contains the Teleport installation options used to generate installation scripts. +type InstallScriptOptions struct { + AutoupdateStyle AutoupdateStyle + // TeleportVersion that should be installed. Without the leading "v". + TeleportVersion string + // CDNBaseURL is the URL of the CDN hosting teleport tarballs. + // If left empty, the 'teleport-update' installer will pick the one to use. + // For example: "https://cdn.example.com" + CDNBaseURL string + // ProxyAddr is the address of the Teleport Proxy service that will be used + // by the updater to fetch the desired version. Teleport Addrs are + // 'hostname:port' (no scheme nor path). + ProxyAddr string + // TeleportFlavor is the name of the Teleport artifact fetched from the CDN. + // Common values are "teleport" and "teleport-ent". + TeleportFlavor string + // FIPS represents if the installed Teleport version should use Teleport + // binaries built for FIPS compliance. + FIPS bool + // Insecure disables TLS certificate verification on the teleport-update command. + // This is meant for testing purposes. + // This does not disable the TLS certificate verification when downloading + // the artifacts from the CDN. + // The agent install in insecure mode will not be able to automatically update. + Insecure bool +} + +// Check validates that the minimal options are set. +func (o *InstallScriptOptions) Check() error { + switch o.AutoupdateStyle { + case NoAutoupdate, PackageManagerAutoupdate: + return nil + case UpdaterBinaryAutoupdate: + // We'll do the checks later. + default: + return trace.BadParameter("unsupported autoupdate style: %v", o.AutoupdateStyle) + } + if o.ProxyAddr == "" { + return trace.BadParameter("Proxy address is required") + } + + if o.TeleportVersion == "" { + return trace.BadParameter("Teleport version is required") + } + + if o.TeleportFlavor == "" { + return trace.BadParameter("Teleport flavor is required") + } + + if o.CDNBaseURL != "" { + url, err := url.Parse(o.CDNBaseURL) + if err != nil { + return trace.Wrap(err, "failed to parse CDN base URL") + } + if url.Scheme != "https" { + return trace.BadParameter("CDNBaseURL's scheme must be 'https://'") + } + } + return nil +} + +// oneOffParams returns the oneoff.OneOffScriptParams that will install Teleport +// using the oneoff.sh script to download and execute 'teleport-update'. +func (o *InstallScriptOptions) oneOffParams() (params oneoff.OneOffScriptParams) { + // We add the leading v if it's not here + version := o.TeleportVersion + if o.TeleportVersion[0] != 'v' { + version = "v" + o.TeleportVersion + } + + args := []string{"enable", "--proxy", shsprintf.EscapeDefaultContext(o.ProxyAddr)} + // Pass the base-url override if the base url is set and is not the default one. + if o.CDNBaseURL != "" && o.CDNBaseURL != teleportUpdateDefaultCDN { + args = append(args, "--base-url", shsprintf.EscapeDefaultContext(o.CDNBaseURL)) + } + + successMessage := "Teleport successfully installed." + if o.Insecure { + args = append(args, "--insecure") + successMessage += " --insecure was used during installation, automatic updates will not work unless the Proxy Service presents a certificate trusted by the system." + } + + return oneoff.OneOffScriptParams{ + Entrypoint: "teleport-update", + EntrypointArgs: strings.Join(args, " "), + CDNBaseURL: o.CDNBaseURL, + TeleportVersion: version, + TeleportFlavor: o.TeleportFlavor, + SuccessMessage: successMessage, + TeleportFIPS: o.FIPS, + } +} + +// GetInstallScript returns a Teleport installation script. +// This script only installs Teleport, it does not start the agent, join it, nor configure its services. +// See the InstallNodeBashScript if you need a more complete setup. +func GetInstallScript(ctx context.Context, opts InstallScriptOptions) (string, error) { + switch opts.AutoupdateStyle { + case NoAutoupdate, PackageManagerAutoupdate: + return getLegacyInstallScript(ctx, opts) + case UpdaterBinaryAutoupdate: + return getUpdaterInstallScript(ctx, opts) + default: + return "", trace.BadParameter("unsupported autoupdate style: %v", opts.AutoupdateStyle) + } +} + +//go:embed install/install.sh +var legacyInstallScript string + +// getLegacyInstallScript returns the installation script that we have been serving at +// "https://cdn.teleport.dev/install.sh". This script installs teleport via package manager +// or by unpacking the tarball. Its usage should be phased out in favor of the updater-based +// installation script served by getUpdaterInstallScript. +func getLegacyInstallScript(ctx context.Context, opts InstallScriptOptions) (string, error) { + return legacyInstallScript, nil +} + +// getUpdaterInstallScript returns an installation script that downloads teleport-update +// and uses it to install a self-updating version of Teleport. +// This installation script is based on the oneoff.sh script and will become the standard +// way of installing Teleport. +func getUpdaterInstallScript(ctx context.Context, opts InstallScriptOptions) (string, error) { + if err := opts.Check(); err != nil { + return "", trace.Wrap(err, "invalid install script parameters") + } + + scriptParams := opts.oneOffParams() + + return oneoff.BuildScript(scriptParams) +} diff --git a/lib/web/scripts/install/install.sh b/lib/web/scripts/install/install.sh new file mode 100755 index 0000000000000..52d3da00e4f63 --- /dev/null +++ b/lib/web/scripts/install/install.sh @@ -0,0 +1,430 @@ +#!/bin/bash +# Copyright 2022 Gravitational, Inc + +# This script detects the current Linux distribution and installs Teleport +# through its package manager, if supported, or downloading a tarball otherwise. +# We'll download Teleport from the official website and checksum it to make sure it was properly +# downloaded before executing. + +# The script is wrapped inside a function to protect against the connection being interrupted +# in the middle of the stream. + +# For more download options, head to https://goteleport.com/download/ + +set -euo pipefail + +# download uses curl or wget to download a teleport binary +download() { + URL=$1 + TMP_PATH=$2 + + echo "Downloading $URL" + if type curl &>/dev/null; then + set -x + # shellcheck disable=SC2086 + $SUDO $CURL -o "$TMP_PATH" "$URL" + else + set -x + # shellcheck disable=SC2086 + $SUDO $CURL -O "$TMP_PATH" "$URL" + fi + set +x +} + +install_via_apt_get() { + echo "Installing Teleport v$TELEPORT_VERSION via apt-get" + add_apt_key + set -x + $SUDO apt-get install -y "teleport$TELEPORT_SUFFIX=$TELEPORT_VERSION" + set +x + if [ "$TELEPORT_EDITION" = "cloud" ]; then + set -x + $SUDO apt-get install -y teleport-ent-updater + set +x + fi +} + +add_apt_key() { + APT_REPO_ID=$ID + APT_REPO_VERSION_CODENAME=$VERSION_CODENAME + IS_LEGACY=0 + + # check if we must use legacy .asc key + case "$ID" in + ubuntu | pop | neon | zorin) + if ! expr "$VERSION_ID" : "2.*" >/dev/null; then + IS_LEGACY=1 + fi + ;; + debian | raspbian) + if [ "$VERSION_ID" -lt 11 ]; then + IS_LEGACY=1 + fi + ;; + linuxmint | parrot) + if [ "$VERSION_ID" -lt 5 ]; then + IS_LEGACY=1 + fi + ;; + elementary) + if [ "$VERSION_ID" -lt 6 ]; then + IS_LEGACY=1 + fi + ;; + kali) + YEAR="$(echo "$VERSION_ID" | cut -f1 -d.)" + if [ "$YEAR" -lt 2021 ]; then + IS_LEGACY=1 + fi + ;; + esac + + if [[ "$IS_LEGACY" == 0 ]]; then + # set APT_REPO_ID if necessary + case "$ID" in + linuxmint | kali | elementary | pop | raspbian | neon | zorin | parrot) + APT_REPO_ID=$ID_LIKE + ;; + esac + + # set APT_REPO_VERSION_CODENAME if necessary + case "$ID" in + linuxmint | elementary | pop | neon | zorin) + APT_REPO_VERSION_CODENAME=$UBUNTU_CODENAME + ;; + kali) + APT_REPO_VERSION_CODENAME="bullseye" + ;; + parrot) + APT_REPO_VERSION_CODENAME="buster" + ;; + esac + fi + + echo "Downloading Teleport's PGP public key..." + TEMP_DIR=$(mktemp -d -t teleport-XXXXXXXXXX) + MAJOR=$(echo "$TELEPORT_VERSION" | cut -f1 -d.) + TELEPORT_REPO="" + + CHANNEL="stable/v${MAJOR}" + if [ "$TELEPORT_EDITION" = "cloud" ]; then + CHANNEL="stable/cloud" + fi + + if [[ "$IS_LEGACY" == 1 ]]; then + if ! type gpg >/dev/null; then + echo "Installing gnupg" + set -x + $SUDO apt-get update + $SUDO apt-get install -y gnupg + set +x + fi + TMP_KEY="$TEMP_DIR/teleport-pubkey.asc" + download "https://deb.releases.teleport.dev/teleport-pubkey.asc" "$TMP_KEY" + set -x + $SUDO apt-key add "$TMP_KEY" + set +x + TELEPORT_REPO="deb https://apt.releases.teleport.dev/${APT_REPO_ID?} ${APT_REPO_VERSION_CODENAME?} ${CHANNEL}" + else + TMP_KEY="$TEMP_DIR/teleport-pubkey.gpg" + download "https://apt.releases.teleport.dev/gpg" "$TMP_KEY" + set -x + $SUDO mkdir -p /etc/apt/keyrings + $SUDO cp "$TMP_KEY" /etc/apt/keyrings/teleport-archive-keyring.asc + set +x + TELEPORT_REPO="deb [signed-by=/etc/apt/keyrings/teleport-archive-keyring.asc] https://apt.releases.teleport.dev/${APT_REPO_ID?} ${APT_REPO_VERSION_CODENAME?} ${CHANNEL}" + fi + + set -x + echo "$TELEPORT_REPO" | $SUDO tee /etc/apt/sources.list.d/teleport.list >/dev/null + set +x + + set -x + $SUDO apt-get update + set +x +} + +# $1 is the value of the $ID path segment in the YUM repo URL. In +# /etc/os-release, this is either the value of $ID or $ID_LIKE. +install_via_yum() { + # shellcheck source=/dev/null + source /etc/os-release + + # Get the major version from the version ID. + VERSION_ID=$(echo "$VERSION_ID" | grep -Eo "^[0-9]+") + TELEPORT_MAJOR_VERSION="v$(echo "$TELEPORT_VERSION" | grep -Eo "^[0-9]+")" + + CHANNEL="stable/${TELEPORT_MAJOR_VERSION}" + if [ "$TELEPORT_EDITION" = "cloud" ]; then + CHANNEL="stable/cloud" + fi + + if type dnf &>/dev/null; then + echo "Installing Teleport v$TELEPORT_VERSION through dnf" + $SUDO dnf install -y 'dnf-command(config-manager)' + $SUDO dnf config-manager --add-repo "$(rpm --eval "https://yum.releases.teleport.dev/$1/$VERSION_ID/Teleport/%{_arch}/$CHANNEL/teleport-yum.repo")" + $SUDO dnf install -y "teleport$TELEPORT_SUFFIX-$TELEPORT_VERSION" + + if [ "$TELEPORT_EDITION" = "cloud" ]; then + $SUDO dnf install -y teleport-ent-updater + fi + + else + echo "Installing Teleport v$TELEPORT_VERSION through yum" + $SUDO yum install -y yum-utils + $SUDO yum-config-manager --add-repo "$(rpm --eval "https://yum.releases.teleport.dev/$1/$VERSION_ID/Teleport/%{_arch}/$CHANNEL/teleport-yum.repo")" + $SUDO yum install -y "teleport$TELEPORT_SUFFIX-$TELEPORT_VERSION" + + if [ "$TELEPORT_EDITION" = "cloud" ]; then + $SUDO yum install -y teleport-ent-updater + fi + fi + set +x +} + +install_via_zypper() { + # shellcheck source=/dev/null + source /etc/os-release + + # Get the major version from the version ID. + VERSION_ID=$(echo "$VERSION_ID" | grep -Eo "^[0-9]+") + TELEPORT_MAJOR_VERSION="v$(echo "$TELEPORT_VERSION" | grep -Eo "^[0-9]+")" + + CHANNEL="stable/${TELEPORT_MAJOR_VERSION}" + if [ "$TELEPORT_EDITION" = "cloud" ]; then + CHANNEL="stable/cloud" + fi + + $SUDO rpm --import https://zypper.releases.teleport.dev/gpg + $SUDO zypper addrepo --refresh --repo "$(rpm --eval "https://zypper.releases.teleport.dev/$ID/$VERSION_ID/Teleport/%{_arch}/$CHANNEL/teleport-zypper.repo")" + $SUDO zypper --gpg-auto-import-keys refresh teleport + $SUDO zypper install -y "teleport$TELEPORT_SUFFIX" + + if [ "$TELEPORT_EDITION" = "cloud" ]; then + $SUDO zypper install -y teleport-ent-updater + fi + + set +x +} + + +# download .tar.gz file via curl/wget, unzip it and run the install script +install_via_curl() { + TEMP_DIR=$(mktemp -d -t teleport-XXXXXXXXXX) + + TELEPORT_FILENAME="teleport$TELEPORT_SUFFIX-v$TELEPORT_VERSION-linux-$ARCH-bin.tar.gz" + URL="https://cdn.teleport.dev/${TELEPORT_FILENAME}" + download "${URL}" "${TEMP_DIR}/${TELEPORT_FILENAME}" + + TMP_CHECKSUM="${TEMP_DIR}/${TELEPORT_FILENAME}.sha256" + download "${URL}.sha256" "$TMP_CHECKSUM" + + set -x + cd "$TEMP_DIR" + # shellcheck disable=SC2086 + $SUDO $SHA_COMMAND -c "$TMP_CHECKSUM" + cd - + + $SUDO tar -xzf "${TEMP_DIR}/${TELEPORT_FILENAME}" -C "$TEMP_DIR" + $SUDO "$TEMP_DIR/teleport/install" + set +x +} + +# wrap script in a function so a partially downloaded script +# doesn't execute +install_teleport() { + # exit if not on Linux + if [[ $(uname) != "Linux" ]]; then + echo "ERROR: This script works only for Linux. Please go to the downloads page to find the proper installation method for your operating system:" + echo "https://goteleport.com/download/" + exit 1 + fi + + KERNEL_VERSION=$(uname -r) + MIN_VERSION="2.6.23" + if [ $MIN_VERSION != "$(echo -e "$MIN_VERSION\n$KERNEL_VERSION" | sort -V | head -n1)" ]; then + echo "ERROR: Teleport requires Linux kernel version $MIN_VERSION+" + exit 1 + fi + + # check if can run as admin either by running as root or by + # having 'sudo' or 'doas' installed + IS_ROOT="" + SUDO="" + if [ "$(id -u)" = 0 ]; then + # running as root, no need for sudo/doas + IS_ROOT="YES" + SUDO="" + elif type sudo &>/dev/null; then + SUDO="sudo" + elif type doas &>/dev/null; then + SUDO="doas" + fi + + if [ -z "$SUDO" ] && [ -z "$IS_ROOT" ]; then + echo "ERROR: The installer requires a way to run commands as root." + echo "Either run this script as root or install sudo/doas." + exit 1 + fi + + # require curl/wget + CURL="" + if type curl &>/dev/null; then + CURL="curl -fL" + elif type wget &>/dev/null; then + CURL="wget" + fi + if [ -z "$CURL" ]; then + echo "ERROR: This script requires either curl or wget in order to download files. Please install one of them and try again." + exit 1 + fi + + # require shasum/sha256sum + SHA_COMMAND="" + if type shasum &>/dev/null; then + SHA_COMMAND="shasum -a 256" + elif type sha256sum &>/dev/null; then + SHA_COMMAND="sha256sum" + else + echo "ERROR: This script requires sha256sum or shasum to validate the download. Please install it and try again." + exit 1 + fi + + # detect distro + OS_RELEASE=/etc/os-release + ID="" + ID_LIKE="" + VERSION_CODENAME="" + UBUNTU_CODENAME="" + if [[ -f "$OS_RELEASE" ]]; then + # shellcheck source=/dev/null + . $OS_RELEASE + fi + # Some $ID_LIKE values include multiple distro names in an arbitrary order, so + # evaluate the first one. + ID_LIKE="${ID_LIKE%% *}" + + # detect architecture + ARCH="" + case $(uname -m) in + x86_64) + ARCH="amd64" + ;; + i386) + ARCH="386" + ;; + armv7l) + ARCH="arm" + ;; + aarch64) + ARCH="arm64" + ;; + **) + echo "ERROR: Your system's architecture isn't officially supported or couldn't be determined." + echo "Please refer to the installation guide for more information:" + echo "https://goteleport.com/docs/installation/" + exit 1 + ;; + esac + + # select install method based on distribution + # if ID is debian derivate, run apt-get + case "$ID" in + debian | ubuntu | kali | linuxmint | pop | raspbian | neon | zorin | parrot | elementary) + install_via_apt_get + ;; + # if ID is amazon Linux 2/RHEL/etc, run yum + centos | rhel | amzn) + install_via_yum "$ID" + ;; + sles) + install_via_zypper + ;; + *) + # before downloading manually, double check if we didn't miss any debian or + # rh/fedora derived distros using the ID_LIKE var. + case "${ID_LIKE}" in + ubuntu | debian) + install_via_apt_get + ;; + centos | fedora | rhel) + # There is no repository for "fedora", and there is no difference + # between the repositories for "centos" and "rhel", so pick an arbitrary + # one. + install_via_yum rhel + ;; + *) + if [ "$TELEPORT_EDITION" = "cloud" ]; then + echo "The system does not support a package manager, which is required for Teleport Enterprise Cloud." + exit 1 + fi + + # if ID and ID_LIKE didn't return a supported distro, download through curl + echo "There is no officially supported package for your package manager. Downloading and installing Teleport via curl." + install_via_curl + ;; + esac + ;; + esac + + GREEN='\033[0;32m' + COLOR_OFF='\033[0m' + + echo "" + echo -e "${GREEN}$(teleport version) installed successfully!${COLOR_OFF}" + echo "" + echo "The following commands are now available:" + if type teleport &>/dev/null; then + echo " teleport - The daemon that runs the Auth Service, Proxy Service, and other Teleport services." + fi + if type tsh &>/dev/null; then + echo " tsh - A tool that lets end users interact with Teleport." + fi + if type tctl &>/dev/null; then + echo " tctl - An administrative tool that can configure the Teleport Auth Service." + fi + if type tbot &>/dev/null; then + echo " tbot - Teleport Machine ID client." + fi + if type fdpass-teleport &>/dev/null; then + echo " fdpass-teleport - Teleport Machine ID client." + fi + if type teleport-update &>/dev/null; then + echo " teleport-update - Teleport auto-update agent." + fi +} + +# The suffix is "-ent" if we are installing a commercial edition of Teleport and +# empty for Teleport Community Edition. +TELEPORT_SUFFIX="" +TELEPORT_VERSION="" +TELEPORT_EDITION="" +if [ $# -ge 1 ] && [ -n "$1" ]; then + TELEPORT_VERSION=$1 +else + echo "ERROR: Please provide the version you want to install (e.g., 10.1.9)." + exit 1 +fi + +if ! echo "$1" | grep -qE "[0-9]+\.[0-9]+\.[0-9]+"; then + echo "ERROR: The first parameter must be a version number, e.g., 10.1.9." + exit 1 +fi + +if [ $# -ge 2 ] && [ -n "$2" ]; then + TELEPORT_EDITION=$2 + + case $TELEPORT_EDITION in + enterprise | cloud) + TELEPORT_SUFFIX="-ent" + ;; + # An empty edition defaults to OSS. + oss | "" ) + ;; + *) + echo 'ERROR: The second parameter must be "oss", "cloud", or "enterprise".' + exit 1 + ;; + esac +fi +install_teleport diff --git a/lib/web/scripts/install_node.go b/lib/web/scripts/install_node.go index 87fffd7b587d8..ea6f68f1f54b7 100644 --- a/lib/web/scripts/install_node.go +++ b/lib/web/scripts/install_node.go @@ -19,19 +19,30 @@ package scripts import ( + "bytes" + "context" _ "embed" "fmt" + regexp "regexp" "sort" + "strconv" "strings" "text/template" + "github.com/google/safetext/shsprintf" "github.com/gravitational/trace" "gopkg.in/yaml.v3" + "k8s.io/apimachinery/pkg/util/validation" "github.com/gravitational/teleport/api/types" - "github.com/gravitational/teleport/api/utils" + apiutils "github.com/gravitational/teleport/api/utils" + "github.com/gravitational/teleport/lib/automaticupgrades" + "github.com/gravitational/teleport/lib/utils" ) +// appURIPattern is a regexp excluding invalid characters from application URIs. +var appURIPattern = regexp.MustCompile(`^[-\w/:. ]+$`) + // ErrorBashScript is used to display friendly error message when // there is an error prepping the actual script. var ErrorBashScript = []byte(` @@ -44,11 +55,146 @@ exit 1 // to install teleport and join a teleport cluster. // //go:embed node-join/install.sh -var installNodeBashScript string +var installNodeBashScriptRaw string + +var installNodeBashScript = template.Must(template.New("nodejoin").Parse(installNodeBashScriptRaw)) + +// InstallNodeScriptOptions contains the options configuring the install-node script. +type InstallNodeScriptOptions struct { + // Required for installation + InstallOptions InstallScriptOptions + + // Required for joining + Token string + CAPins []string + JoinMethod types.JoinMethod + + // Required for service configuration + Labels types.Labels + LabelMatchers types.Labels + + AppServiceEnabled bool + AppName string + AppURI string + + DatabaseServiceEnabled bool + DiscoveryServiceEnabled bool + DiscoveryGroup string +} + +// GetNodeInstallScript generates an agent installation script which will: +// - install Teleport +// - configure the Teleport agent joining +// - configure the Teleport agent services (currently support ssh, app, database, and discovery) +// - start the agent +func GetNodeInstallScript(ctx context.Context, opts InstallNodeScriptOptions) (string, error) { + // Computing installation-related values + + // By default, it will use `stable/v`, eg stable/v12 + repoChannel := "" + + switch opts.InstallOptions.AutoupdateStyle { + case NoAutoupdate, UpdaterBinaryAutoupdate: + case PackageManagerAutoupdate: + // Note: This is a cloud-specific repo. We could use the new stable/rolling + // repo in non-cloud case, but the script has never support enabling autoupdates + // in a non-cloud cluster. + // We will prefer using the new updater binary for autoupdates in self-hosted setups. + repoChannel = automaticupgrades.DefaultCloudChannelName + default: + return "", trace.BadParameter("unsupported autoupdate style: %v", opts.InstallOptions.AutoupdateStyle) + } + + // Computing joining-related values + hostname, portStr, err := utils.SplitHostPort(opts.InstallOptions.ProxyAddr) + if err != nil { + return "", trace.Wrap(err) + } + + // Computing service configuration-related values + labelsList := []string{} + for labelKey, labelValues := range opts.Labels { + labels := strings.Join(labelValues, " ") + labelsList = append(labelsList, fmt.Sprintf("%s=%s", labelKey, labels)) + } + + var dbServiceResourceLabels []string + if opts.DatabaseServiceEnabled { + dbServiceResourceLabels, err = marshalLabelsYAML(opts.LabelMatchers, 6) + if err != nil { + return "", trace.Wrap(err) + } + } + + var appServerResourceLabels []string + + if opts.AppServiceEnabled { + if errs := validation.IsDNS1035Label(opts.AppName); len(errs) > 0 { + return "", trace.BadParameter("appName %q must be a valid DNS subdomain: https://goteleport.com/docs/enroll-resources/application-access/guides/connecting-apps/#application-name", opts.AppName) + } + if !appURIPattern.MatchString(opts.AppURI) { + return "", trace.BadParameter("appURI %q contains invalid characters", opts.AppURI) + } + + appServerResourceLabels, err = marshalLabelsYAML(opts.Labels, 4) + if err != nil { + return "", trace.Wrap(err) + } + } + + if opts.DiscoveryServiceEnabled { + if opts.DiscoveryGroup == "" { + return "", trace.BadParameter("discovery group is required") + } + } + + var buf bytes.Buffer + + // TODO(hugoShaka): burn this map and replace it by something saner in a future PR. + + // This section relies on Go's default zero values to make sure that the settings + // are correct when not installing an app. + err = installNodeBashScript.Execute(&buf, map[string]interface{}{ + "token": opts.Token, + "hostname": hostname, + "port": portStr, + // The install.sh script has some manually generated configs and some + // generated by the `teleport config` commands. The old bash + // version used space delimited values whereas the teleport command uses + // a comma delimeter. The Old version can be removed when the install.sh + // file has been completely converted over. + "caPinsOld": strings.Join(opts.CAPins, " "), + "caPins": strings.Join(opts.CAPins, ","), + "packageName": opts.InstallOptions.TeleportFlavor, + "repoChannel": repoChannel, + "installUpdater": opts.InstallOptions.AutoupdateStyle.String(), + "version": shsprintf.EscapeDefaultContext(opts.InstallOptions.TeleportVersion), + "appInstallMode": strconv.FormatBool(opts.AppServiceEnabled), + "appServerResourceLabels": appServerResourceLabels, + "appName": shsprintf.EscapeDefaultContext(opts.AppName), + "appURI": shsprintf.EscapeDefaultContext(opts.AppURI), + "joinMethod": shsprintf.EscapeDefaultContext(string(opts.JoinMethod)), + "labels": strings.Join(labelsList, ","), + "databaseInstallMode": strconv.FormatBool(opts.DatabaseServiceEnabled), + // No one knows why this field is in snake case ¯\_(ツ)_/¯ + // Also, even if the name is similar to appServerResourceLabels, they must not be confused. + // appServerResourceLabels are labels to apply on the declared app, while + // db_service_resource_labels are labels matchers for the service to select resources to serve. + "db_service_resource_labels": dbServiceResourceLabels, + "discoveryInstallMode": strconv.FormatBool(opts.DiscoveryServiceEnabled), + "discoveryGroup": shsprintf.EscapeDefaultContext(opts.DiscoveryGroup), + }) + if err != nil { + return "", trace.Wrap(err) + } + + return buf.String(), nil +} -var InstallNodeBashScript = template.Must(template.New("nodejoin").Parse(installNodeBashScript)) +// TODO(hugoShaka): burn the indentation thing, this is too fragile and show be handled +// by the template itself. -// MarshalLabelsYAML returns a list of strings, each one containing a +// marshalLabelsYAML returns a list of strings, each one containing a // label key and list of value's pair. // This is used to create yaml sections within the join scripts. // @@ -56,7 +202,7 @@ var InstallNodeBashScript = template.Must(template.New("nodejoin").Parse(install // top of the default space already used, for the default yaml listing // format (the listing values with the dashes). If `extraListIndent` // is zero, it's equivalent to using default space only (which is 4 spaces). -func MarshalLabelsYAML(resourceMatcherLabels types.Labels, extraListIndent int) ([]string, error) { +func marshalLabelsYAML(resourceMatcherLabels types.Labels, extraListIndent int) ([]string, error) { if len(resourceMatcherLabels) == 0 { return []string{"{}"}, nil } @@ -73,7 +219,7 @@ func MarshalLabelsYAML(resourceMatcherLabels types.Labels, extraListIndent int) for _, labelName := range labelKeys { labelValues := resourceMatcherLabels[labelName] - bs, err := yaml.Marshal(map[string]utils.Strings{labelName: labelValues}) + bs, err := yaml.Marshal(map[string]apiutils.Strings{labelName: labelValues}) if err != nil { return nil, trace.Wrap(err) } diff --git a/lib/web/scripts/install_node_test.go b/lib/web/scripts/install_node_test.go index f56a44546e7ea..141133c5b9b75 100644 --- a/lib/web/scripts/install_node_test.go +++ b/lib/web/scripts/install_node_test.go @@ -66,7 +66,7 @@ func TestMarshalLabelsYAML(t *testing.T) { numExtraIndent: 2, }, } { - got, err := MarshalLabelsYAML(tt.labels, tt.numExtraIndent) + got, err := marshalLabelsYAML(tt.labels, tt.numExtraIndent) require.NoError(t, err) require.YAMLEq(t, strings.Join(tt.expected, "\n"), strings.Join(got, "\n")) diff --git a/lib/web/scripts/install_test.go b/lib/web/scripts/install_test.go new file mode 100644 index 0000000000000..4c7e51ea4eb50 --- /dev/null +++ b/lib/web/scripts/install_test.go @@ -0,0 +1,156 @@ +/* + * Teleport + * Copyright (C) 2025 Gravitational, Inc. + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU Affero General Public License as published by + * the Free Software Foundation, either version 3 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU Affero General Public License for more details. + * + * You should have received a copy of the GNU Affero General Public License + * along with this program. If not, see . + */ + +package scripts + +import ( + "context" + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/gravitational/teleport/api/types" + "github.com/gravitational/teleport/lib/utils/teleportassets" +) + +func TestGetInstallScript(t *testing.T) { + ctx := context.Background() + testVersion := "1.2.3" + testProxyAddr := "proxy.example.com:443" + + tests := []struct { + name string + opts InstallScriptOptions + assertFn func(t *testing.T, script string) + }{ + { + name: "Legacy install, no autoupdate", + opts: InstallScriptOptions{AutoupdateStyle: NoAutoupdate}, + assertFn: func(t *testing.T, script string) { + require.Equal(t, legacyInstallScript, script) + }, + }, + { + name: "Legacy install, package manager autoupdate", + opts: InstallScriptOptions{AutoupdateStyle: NoAutoupdate}, + assertFn: func(t *testing.T, script string) { + require.Equal(t, legacyInstallScript, script) + }, + }, + { + name: "Oneoff install", + opts: InstallScriptOptions{ + AutoupdateStyle: UpdaterBinaryAutoupdate, + TeleportVersion: testVersion, + ProxyAddr: testProxyAddr, + TeleportFlavor: types.PackageNameOSS, + }, + assertFn: func(t *testing.T, script string) { + require.Contains(t, script, "entrypoint='teleport-update'") + require.Contains(t, script, fmt.Sprintf("teleportVersion='v%s'", testVersion)) + require.Contains(t, script, fmt.Sprintf("teleportFlavor='%s'", types.PackageNameOSS)) + require.Contains(t, script, fmt.Sprintf("cdnBaseURL='%s'", teleportassets.CDNBaseURL())) + require.Contains(t, script, fmt.Sprintf("entrypointArgs='enable --proxy %s'", testProxyAddr)) + require.Contains(t, script, "packageSuffix='bin.tar.gz'") + }, + }, + { + name: "Oneoff install custom CDN", + opts: InstallScriptOptions{ + AutoupdateStyle: UpdaterBinaryAutoupdate, + TeleportVersion: testVersion, + ProxyAddr: testProxyAddr, + TeleportFlavor: types.PackageNameOSS, + CDNBaseURL: "https://cdn.example.com", + }, + assertFn: func(t *testing.T, script string) { + require.Contains(t, script, "entrypoint='teleport-update'") + require.Contains(t, script, fmt.Sprintf("teleportVersion='v%s'", testVersion)) + require.Contains(t, script, fmt.Sprintf("teleportFlavor='%s'", types.PackageNameOSS)) + require.Contains(t, script, "cdnBaseURL='https://cdn.example.com'") + require.Contains(t, script, fmt.Sprintf("entrypointArgs='enable --proxy %s --base-url %s'", testProxyAddr, "https://cdn.example.com")) + require.Contains(t, script, "packageSuffix='bin.tar.gz'") + }, + }, + { + name: "Oneoff install default CDN", + opts: InstallScriptOptions{ + AutoupdateStyle: UpdaterBinaryAutoupdate, + TeleportVersion: testVersion, + ProxyAddr: testProxyAddr, + TeleportFlavor: types.PackageNameOSS, + CDNBaseURL: teleportassets.TeleportReleaseCDN, + }, + assertFn: func(t *testing.T, script string) { + require.Contains(t, script, "entrypoint='teleport-update'") + require.Contains(t, script, fmt.Sprintf("teleportVersion='v%s'", testVersion)) + require.Contains(t, script, fmt.Sprintf("teleportFlavor='%s'", types.PackageNameOSS)) + require.Contains(t, script, fmt.Sprintf("cdnBaseURL='%s'", teleportassets.TeleportReleaseCDN)) + require.Contains(t, script, fmt.Sprintf("entrypointArgs='enable --proxy %s'", testProxyAddr)) + require.Contains(t, script, "packageSuffix='bin.tar.gz'") + }, + }, + { + name: "Oneoff enterprise install", + opts: InstallScriptOptions{ + AutoupdateStyle: UpdaterBinaryAutoupdate, + TeleportVersion: testVersion, + ProxyAddr: testProxyAddr, + TeleportFlavor: types.PackageNameEnt, + }, + assertFn: func(t *testing.T, script string) { + require.Contains(t, script, "entrypoint='teleport-update'") + require.Contains(t, script, fmt.Sprintf("teleportVersion='v%s'", testVersion)) + require.Contains(t, script, fmt.Sprintf("teleportFlavor='%s'", types.PackageNameEnt)) + require.Contains(t, script, fmt.Sprintf("cdnBaseURL='%s'", teleportassets.CDNBaseURL())) + require.Contains(t, script, fmt.Sprintf("entrypointArgs='enable --proxy %s'", testProxyAddr)) + require.Contains(t, script, "packageSuffix='bin.tar.gz'") + }, + }, + { + name: "Oneoff enterprise FIPS install", + opts: InstallScriptOptions{ + AutoupdateStyle: UpdaterBinaryAutoupdate, + TeleportVersion: testVersion, + ProxyAddr: testProxyAddr, + TeleportFlavor: types.PackageNameEnt, + FIPS: true, + }, + assertFn: func(t *testing.T, script string) { + require.Contains(t, script, "entrypoint='teleport-update'") + require.Contains(t, script, fmt.Sprintf("teleportVersion='v%s'", testVersion)) + require.Contains(t, script, fmt.Sprintf("teleportFlavor='%s'", types.PackageNameEnt)) + require.Contains(t, script, fmt.Sprintf("cdnBaseURL='%s'", teleportassets.CDNBaseURL())) + require.Contains(t, script, fmt.Sprintf("entrypointArgs='enable --proxy %s'", testProxyAddr)) + require.Contains(t, script, "packageSuffix='fips-bin.tar.gz'") + }, + }, + } + for _, test := range tests { + t.Run(test.name, func(t *testing.T) { + // Sanity check, test input should be legal. + require.NoError(t, test.opts.Check()) + + // Test execution. + result, err := GetInstallScript(ctx, test.opts) + require.NoError(t, err) + test.assertFn(t, result) + }) + } +} diff --git a/lib/web/scripts/node-join/install.sh b/lib/web/scripts/node-join/install.sh index 3559f688cbaa3..c86929ef0995c 100755 --- a/lib/web/scripts/node-join/install.sh +++ b/lib/web/scripts/node-join/install.sh @@ -21,6 +21,13 @@ TELEPORT_BINARY_LIST="teleport tctl tsh teleport-update" TELEPORT_CONFIG_PATH="/etc/teleport.yaml" TELEPORT_DATA_DIR="/var/lib/teleport" TELEPORT_DOCS_URL="https://goteleport.com/docs/" +# TELEPORT_FORMAT contains the Teleport installation formats. +# The value is dynamically computed unless OVERRIDE_FORMAT it set. +# Possible values are: +# - "deb" +# - "rpm" +# - "tarball" +# - "updater" TELEPORT_FORMAT="" # initialise variables (because set -u disallows unbound variables) @@ -38,6 +45,9 @@ INTERACTIVE=false # optionally be replaced by the server before the script is served up TELEPORT_VERSION='{{.version}}' TELEPORT_PACKAGE_NAME='{{.packageName}}' +# UPDATER_STYLE holds the Teleport updater style. +# Supported values are "none", "" (same as "none"), "package", and "binary". +UPDATER_STYLE='{{.installUpdater}}' REPO_CHANNEL='{{.repoChannel}}' TARGET_HOSTNAME='{{.hostname}}' TARGET_PORT='{{.port}}' @@ -680,23 +690,30 @@ fi # use OSTYPE variable to figure out host type/arch if [[ "${OSTYPE}" == "linux"* ]]; then - # linux host, now detect arch - TELEPORT_BINARY_TYPE="linux" - ARCH=$(uname -m) - log "Detected host: ${OSTYPE}, using Teleport binary type ${TELEPORT_BINARY_TYPE}" - if [[ ${ARCH} == "armv7l" ]]; then - TELEPORT_ARCH="arm" - elif [[ ${ARCH} == "aarch64" ]]; then - TELEPORT_ARCH="arm64" - elif [[ ${ARCH} == "x86_64" ]]; then - TELEPORT_ARCH="amd64" - elif [[ ${ARCH} == "i686" ]]; then - TELEPORT_ARCH="386" + + if [[ "$UPDATER_STYLE" == "binary" ]]; then + # if we are using the new updater, we can bypass this detection dance + # and always use the updater. + TELEPORT_FORMAT="updater" else - log_important "Error: cannot detect architecture from uname -m: ${ARCH}" - exit 1 + # linux host, now detect arch + TELEPORT_BINARY_TYPE="linux" + ARCH=$(uname -m) + log "Detected host: ${OSTYPE}, using Teleport binary type ${TELEPORT_BINARY_TYPE}" + if [[ ${ARCH} == "armv7l" ]]; then + TELEPORT_ARCH="arm" + elif [[ ${ARCH} == "aarch64" ]]; then + TELEPORT_ARCH="arm64" + elif [[ ${ARCH} == "x86_64" ]]; then + TELEPORT_ARCH="amd64" + elif [[ ${ARCH} == "i686" ]]; then + TELEPORT_ARCH="386" + else + log_important "Error: cannot detect architecture from uname -m: ${ARCH}" + exit 1 + fi + log "Detected arch: ${ARCH}, using Teleport arch ${TELEPORT_ARCH}" fi - log "Detected arch: ${ARCH}, using Teleport arch ${TELEPORT_ARCH}" # if the download format is already set, we have no need to detect distro if [[ ${TELEPORT_FORMAT} == "" ]]; then # detect distro @@ -985,6 +1002,26 @@ install_from_repo() { fi } +install_from_updater() { + SCRIPT_URL="https://$TARGET_HOSTNAME:$TARGET_PORT/scripts/install.sh" + CURL_COMMAND="curl -fsS" + if [[ ${DISABLE_TLS_VERIFICATION} == "true" ]]; then + CURL_COMMAND+=" -k" + SCRIPT_URL+="?insecure=true" + fi + + log "Requesting the install script: $SCRIPT_URL" + $CURL_COMMAND "$SCRIPT_URL" -o "$TEMP_DIR/install.sh" || (log "Failed to retrieve the install script." && exit 1) + + chmod +x "$TEMP_DIR/install.sh" + + log "Executing the install script" + # We execute the install script because it might be a bash or sh script depending on the install script served. + # This might cause issues if tmp is mounted with noexec, but the oneoff.sh script will also download and exec + # binaries from tmp + "$TEMP_DIR/install.sh" +} + # package_list returns the list of packages to install. # The list of packages can be fed into yum or apt because they already have the expected format when pinning versions. package_list() { @@ -1005,7 +1042,7 @@ package_list() { # (warning): This expression is constant. Did you forget the $ on a variable? # Disabling the warning above because expression is templated. # shellcheck disable=SC2050 - if is_using_systemd && [[ "{{.installUpdater}}" == "true" ]]; then + if is_using_systemd && [[ "$UPDATER_STYLE" == "package" ]]; then # Teleport Updater requires systemd. PACKAGE_LIST+=" ${TELEPORT_UPDATER_PIN_VERSION}" fi @@ -1035,7 +1072,10 @@ is_repo_available() { return 1 } -if is_repo_available; then +if [[ "$TELEPORT_FORMAT" == "updater" ]]; then + log "Installing from updater binary." + install_from_updater +elif is_repo_available; then log "Installing repo for distro $ID." install_from_repo else diff --git a/lib/web/scripts/oneoff/oneoff.go b/lib/web/scripts/oneoff/oneoff.go index 5d12c2c938289..794749b1abc02 100644 --- a/lib/web/scripts/oneoff/oneoff.go +++ b/lib/web/scripts/oneoff/oneoff.go @@ -22,6 +22,7 @@ import ( "bytes" _ "embed" "slices" + "strings" "text/template" "github.com/gravitational/trace" @@ -63,9 +64,12 @@ type OneOffScriptParams struct { // Used for testing. binSudo string - // TeleportArgs is the arguments to pass to the teleport binary. + // Entrypoint is the name of the binary from the teleport package. Defaults to "teleport", but can be set to + // other binaries such as "teleport-update" or "tbot". + Entrypoint string + // EntrypointArgs is the arguments to pass to the Entrypoint binary. // Eg, 'version' - TeleportArgs string + EntrypointArgs string // BinUname is the binary used to get OS name and Architecture of the host. // Defaults to `uname`. @@ -88,16 +92,23 @@ type OneOffScriptParams struct { // - teleport-ent TeleportFlavor string + // TeleportFIPS represents if the script should install a FIPS build of Teleport. + TeleportFIPS bool + // SuccessMessage is a message shown to the user after the one off is completed. SuccessMessage string } // CheckAndSetDefaults checks if the required params ara present. func (p *OneOffScriptParams) CheckAndSetDefaults() error { - if p.TeleportArgs == "" { + if p.EntrypointArgs == "" { return trace.BadParameter("missing teleport args") } + if p.Entrypoint == "" { + p.Entrypoint = "teleport" + } + if p.BinUname == "" { p.BinUname = binUname } @@ -117,6 +128,7 @@ func (p *OneOffScriptParams) CheckAndSetDefaults() error { if p.CDNBaseURL == "" { p.CDNBaseURL = teleportassets.CDNBaseURL() } + p.CDNBaseURL = strings.TrimRight(p.CDNBaseURL, "/") if p.TeleportFlavor == "" { p.TeleportFlavor = types.PackageNameOSS diff --git a/lib/web/scripts/oneoff/oneoff.sh b/lib/web/scripts/oneoff/oneoff.sh index 912e4d6ab3368..eaa15841be18b 100644 --- a/lib/web/scripts/oneoff/oneoff.sh +++ b/lib/web/scripts/oneoff/oneoff.sh @@ -5,7 +5,10 @@ cdnBaseURL='{{.CDNBaseURL}}' teleportVersion='{{.TeleportVersion}}' teleportFlavor='{{.TeleportFlavor}}' # teleport or teleport-ent successMessage='{{.SuccessMessage}}' -teleportArgs='{{.TeleportArgs}}' +entrypointArgs='{{.EntrypointArgs}}' +entrypoint='{{.Entrypoint}}' +packageSuffix='{{ if .TeleportFIPS }}fips-{{ end }}bin.tar.gz' +fips='{{ if .TeleportFIPS }}true{{ end }}' # shellcheck disable=all # Use $HOME or / as base dir @@ -17,20 +20,24 @@ ARCH=$({{.BinUname}} -m) trap 'rm -rf -- "$tempDir"' EXIT teleportTarballName() { - if [ ${OS} = "Darwin" ]; then - echo ${teleportFlavor}-${teleportVersion}-darwin-universal-bin.tar.gz + if [ "${OS}" = "Darwin" ]; then + if [ "$fips" = "true"]; then + echo "FIPS version of Teleport is not compatible with MacOS. Please run this script in a Linux machine." + return 1 + fi + echo "${teleportFlavor}-${teleportVersion}-darwin-universal-${packageSuffix}" return 0 fi; - if [ ${OS} != "Linux" ]; then + if [ "${OS}" != "Linux" ]; then echo "Only MacOS and Linux are supported." >&2 return 1 fi; - if [ ${ARCH} = "armv7l" ]; then echo "${teleportFlavor}-${teleportVersion}-linux-arm-bin.tar.gz" - elif [ ${ARCH} = "aarch64" ]; then echo "${teleportFlavor}-${teleportVersion}-linux-arm64-bin.tar.gz" - elif [ ${ARCH} = "x86_64" ]; then echo "${teleportFlavor}-${teleportVersion}-linux-amd64-bin.tar.gz" - elif [ ${ARCH} = "i686" ]; then echo "${teleportFlavor}-${teleportVersion}-linux-386-bin.tar.gz" + if [ ${ARCH} = "armv7l" ]; then echo "${teleportFlavor}-${teleportVersion}-linux-arm-${packageSuffix}" + elif [ ${ARCH} = "aarch64" ]; then echo "${teleportFlavor}-${teleportVersion}-linux-arm64-${packageSuffix}" + elif [ ${ARCH} = "x86_64" ]; then echo "${teleportFlavor}-${teleportVersion}-linux-amd64-${packageSuffix}" + elif [ ${ARCH} = "i686" ]; then echo "${teleportFlavor}-${teleportVersion}-linux-386-${packageSuffix}" else echo "Invalid Linux architecture ${ARCH}." >&2 return 1 @@ -40,12 +47,12 @@ teleportTarballName() { main() { tarballName=$(teleportTarballName) echo "Downloading from ${cdnBaseURL}/${tarballName} and extracting teleport to ${tempDir} ..." - curl --show-error --fail --location ${cdnBaseURL}/${tarballName} | tar xzf - -C ${tempDir} ${teleportFlavor}/teleport + curl --show-error --fail --location "${cdnBaseURL}/${tarballName}" | tar xzf - -C "${tempDir}" "${teleportFlavor}/${entrypoint}" - mkdir -p ${tempDir}/bin - mv ${tempDir}/${teleportFlavor}/teleport ${tempDir}/bin/teleport - echo "> ${tempDir}/bin/teleport ${teleportArgs} $@" - {{.TeleportCommandPrefix}} ${tempDir}/bin/teleport ${teleportArgs} $@ && echo $successMessage + mkdir -p "${tempDir}/bin" + mv "${tempDir}/${teleportFlavor}/${entrypoint}" "${tempDir}/bin/${entrypoint}" + echo "> ${tempDir}/bin/${entrypoint} ${entrypointArgs} $@" + {{.TeleportCommandPrefix}} "${tempDir}/bin/${entrypoint}" ${entrypointArgs} $@ && echo "$successMessage" } main $@ diff --git a/lib/web/scripts/oneoff/oneoff_test.go b/lib/web/scripts/oneoff/oneoff_test.go index 963f7d2392f1d..c6da7d96e2e21 100644 --- a/lib/web/scripts/oneoff/oneoff_test.go +++ b/lib/web/scripts/oneoff/oneoff_test.go @@ -69,7 +69,7 @@ func TestOneOffScript(t *testing.T) { BinMktemp: mktempMock.Path, CDNBaseURL: "dummyURL", TeleportVersion: "v13.1.0", - TeleportArgs: "version", + EntrypointArgs: "version", }) require.NoError(t, err) @@ -99,7 +99,7 @@ func TestOneOffScript(t *testing.T) { BinMktemp: mktempMock.Path, CDNBaseURL: testServer.URL, TeleportVersion: "v13.1.0", - TeleportArgs: "version", + EntrypointArgs: "version", SuccessMessage: "Test was a success.", }) require.NoError(t, err) @@ -156,7 +156,7 @@ func TestOneOffScript(t *testing.T) { BinMktemp: mktempMock.Path, CDNBaseURL: testServer.URL, TeleportVersion: "v13.1.0", - TeleportArgs: "version", + EntrypointArgs: "version", SuccessMessage: "Test was a success.", TeleportCommandPrefix: "sudo", binSudo: sudoMock.Path, @@ -215,7 +215,7 @@ func TestOneOffScript(t *testing.T) { BinUname: unameMock.Path, BinMktemp: mktempMock.Path, CDNBaseURL: testServer.URL, - TeleportArgs: "help", + EntrypointArgs: "help", TeleportVersion: "v13.1.0", SuccessMessage: "Test was a success.", }) @@ -293,7 +293,7 @@ func TestOneOffScript(t *testing.T) { BinMktemp: mktempMock.Path, CDNBaseURL: "dummyURL", TeleportVersion: "v13.1.0", - TeleportArgs: "version", + EntrypointArgs: "version", SuccessMessage: "Test was a success.", TeleportFlavor: "../not-teleport", }) @@ -306,7 +306,7 @@ func TestOneOffScript(t *testing.T) { BinMktemp: mktempMock.Path, CDNBaseURL: "dummyURL", TeleportVersion: "v13.1.0", - TeleportArgs: "version", + EntrypointArgs: "version", SuccessMessage: "Test was a success.", TeleportFlavor: "teleport", TeleportCommandPrefix: "rm -rf thing", @@ -343,7 +343,7 @@ func TestOneOffScript(t *testing.T) { BinMktemp: mktempMock.Path, CDNBaseURL: testServer.URL, TeleportVersion: "v13.1.0", - TeleportArgs: "version", + EntrypointArgs: "version", SuccessMessage: "Test was a success.", }) require.NoError(t, err) diff --git a/tool/tctl/common/autoupdate_command.go b/tool/tctl/common/autoupdate_command.go index c089010c091f4..11593a3ab24bc 100644 --- a/tool/tctl/common/autoupdate_command.go +++ b/tool/tctl/common/autoupdate_command.go @@ -23,6 +23,8 @@ import ( "fmt" "io" "os" + "strings" + "time" "github.com/alecthomas/kingpin/v2" "github.com/coreos/go-semver/semver" @@ -32,7 +34,7 @@ import ( "github.com/gravitational/teleport/api/client/webclient" autoupdatev1pb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" "github.com/gravitational/teleport/api/types/autoupdate" - "github.com/gravitational/teleport/lib/auth/authclient" + "github.com/gravitational/teleport/lib/asciitable" "github.com/gravitational/teleport/lib/service/servicecfg" "github.com/gravitational/teleport/lib/utils" commonclient "github.com/gravitational/teleport/tool/tctl/common/client" @@ -48,10 +50,11 @@ type AutoUpdateCommand struct { app *kingpin.Application ccf *tctlcfg.GlobalCLIFlags - targetCmd *kingpin.CmdClause - enableCmd *kingpin.CmdClause - disableCmd *kingpin.CmdClause - statusCmd *kingpin.CmdClause + toolsTargetCmd *kingpin.CmdClause + toolsEnableCmd *kingpin.CmdClause + toolsDisableCmd *kingpin.CmdClause + toolsStatusCmd *kingpin.CmdClause + agentsStatusCmd *kingpin.CmdClause toolsTargetVersion string proxy string @@ -71,16 +74,19 @@ func (c *AutoUpdateCommand) Initialize(app *kingpin.Application, ccf *tctlcfg.Gl clientToolsCmd := autoUpdateCmd.Command("client-tools", "Manage client tools auto update configuration.") - c.statusCmd = clientToolsCmd.Command("status", "Prints if the client tools updates are enabled/disabled, and the target version in specified format.") - c.statusCmd.Flag("proxy", "Address of the Teleport proxy. When defined this address will be used to retrieve client tools auto update configuration.").StringVar(&c.proxy) - c.statusCmd.Flag("format", "Output format: 'yaml' or 'json'").Default(teleport.YAML).StringVar(&c.format) + c.toolsStatusCmd = clientToolsCmd.Command("status", "Prints if the client tools updates are enabled/disabled, and the target version in specified format.") + c.toolsStatusCmd.Flag("proxy", "Address of the Teleport proxy. When defined this address will be used to retrieve client tools auto update configuration.").StringVar(&c.proxy) + c.toolsStatusCmd.Flag("format", "Output format: 'yaml' or 'json'").Default(teleport.YAML).StringVar(&c.format) - c.enableCmd = clientToolsCmd.Command("enable", "Enables client tools auto updates. Clients will be told to update to the target version.") - c.disableCmd = clientToolsCmd.Command("disable", "Disables client tools auto updates. Clients will not be told to update to the target version.") + c.toolsEnableCmd = clientToolsCmd.Command("enable", "Enables client tools auto updates. Clients will be told to update to the target version.") + c.toolsDisableCmd = clientToolsCmd.Command("disable", "Disables client tools auto updates. Clients will not be told to update to the target version.") - c.targetCmd = clientToolsCmd.Command("target", "Sets the client tools target version. This command is not supported on Teleport Cloud.") - c.targetCmd.Arg("version", "Client tools target version. Clients will be told to update to this version.").StringVar(&c.toolsTargetVersion) - c.targetCmd.Flag("clear", "removes the target version, Teleport will default to its current proxy version.").BoolVar(&c.clear) + c.toolsTargetCmd = clientToolsCmd.Command("target", "Sets the client tools target version. This command is not supported on Teleport Cloud.") + c.toolsTargetCmd.Arg("version", "Client tools target version. Clients will be told to update to this version.").StringVar(&c.toolsTargetVersion) + c.toolsTargetCmd.Flag("clear", "removes the target version, Teleport will default to its current proxy version.").BoolVar(&c.clear) + + agentsCmd := autoUpdateCmd.Command("agents", "Manage agents auto update configuration.") + c.agentsStatusCmd = agentsCmd.Command("status", "Prints agents auto update status.") if c.stdout == nil { c.stdout = os.Stdout @@ -89,19 +95,21 @@ func (c *AutoUpdateCommand) Initialize(app *kingpin.Application, ccf *tctlcfg.Gl // TryRun takes the CLI command as an argument and executes it. func (c *AutoUpdateCommand) TryRun(ctx context.Context, cmd string, clientFunc commonclient.InitFunc) (match bool, err error) { - var commandFunc func(ctx context.Context, client *authclient.Client) error + var commandFunc func(ctx context.Context, client autoupdateClient) error switch { - case cmd == c.targetCmd.FullCommand(): + case cmd == c.toolsTargetCmd.FullCommand(): commandFunc = c.TargetVersion - case cmd == c.enableCmd.FullCommand(): + case cmd == c.toolsEnableCmd.FullCommand(): commandFunc = c.SetModeCommand(true) - case cmd == c.disableCmd.FullCommand(): + case cmd == c.toolsDisableCmd.FullCommand(): commandFunc = c.SetModeCommand(false) - case c.proxy == "" && cmd == c.statusCmd.FullCommand(): - commandFunc = c.Status - case c.proxy != "" && cmd == c.statusCmd.FullCommand(): - err = c.StatusByProxy(ctx) + case c.proxy == "" && cmd == c.toolsStatusCmd.FullCommand(): + commandFunc = c.ToolsStatus + case c.proxy != "" && cmd == c.toolsStatusCmd.FullCommand(): + err = c.ToolsStatusByProxy(ctx) return true, trace.Wrap(err) + case cmd == c.agentsStatusCmd.FullCommand(): + commandFunc = c.agentsStatusCommand default: return false, nil } @@ -117,17 +125,17 @@ func (c *AutoUpdateCommand) TryRun(ctx context.Context, cmd string, clientFunc c } // TargetVersion creates or updates AutoUpdateVersion resource with client tools target version. -func (c *AutoUpdateCommand) TargetVersion(ctx context.Context, client *authclient.Client) error { +func (c *AutoUpdateCommand) TargetVersion(ctx context.Context, client autoupdateClient) error { var err error switch { case c.clear: - err = c.clearTargetVersion(ctx, client) + err = c.clearToolsTargetVersion(ctx, client) case c.toolsTargetVersion != "": // For parallel requests where we attempt to create a resource simultaneously, retries should be implemented. // The same approach applies to updates if the resource has been deleted during the process. // Second create request must return `AlreadyExists` error, update for deleted resource `NotFound` error. for i := 0; i < maxRetries; i++ { - err = c.setTargetVersion(ctx, client) + err = c.setToolsTargetVersion(ctx, client) if err == nil { break } @@ -140,13 +148,13 @@ func (c *AutoUpdateCommand) TargetVersion(ctx context.Context, client *authclien } // SetModeCommand returns a command to enable or disable client tools auto-updates in the cluster. -func (c *AutoUpdateCommand) SetModeCommand(enabled bool) func(ctx context.Context, client *authclient.Client) error { - return func(ctx context.Context, client *authclient.Client) error { +func (c *AutoUpdateCommand) SetModeCommand(enabled bool) func(ctx context.Context, client autoupdateClient) error { + return func(ctx context.Context, client autoupdateClient) error { // For parallel requests where we attempt to create a resource simultaneously, retries should be implemented. // The same approach applies to updates if the resource has been deleted during the process. // Second create request must return `AlreadyExists` error, update for deleted resource `NotFound` error. for i := 0; i < maxRetries; i++ { - err := c.setMode(ctx, client, enabled) + err := c.setToolsMode(ctx, client, enabled) if err == nil { break } @@ -164,8 +172,95 @@ type getResponse struct { TargetVersion string `json:"target_version"` } -// Status makes request to auth service to fetch client tools auto update version and mode. -func (c *AutoUpdateCommand) Status(ctx context.Context, client *authclient.Client) error { +// autoupdateClient is a subset of the Teleport client, with functions used to interact with automatic update resources. +// Not every AU function is part of the interface, we'll add them as we need. +type autoupdateClient interface { + GetAutoUpdateAgentRollout(context.Context) (*autoupdatev1pb.AutoUpdateAgentRollout, error) + GetAutoUpdateVersion(context.Context) (*autoupdatev1pb.AutoUpdateVersion, error) + GetAutoUpdateConfig(context.Context) (*autoupdatev1pb.AutoUpdateConfig, error) + CreateAutoUpdateConfig(context.Context, *autoupdatev1pb.AutoUpdateConfig) (*autoupdatev1pb.AutoUpdateConfig, error) + CreateAutoUpdateVersion(context.Context, *autoupdatev1pb.AutoUpdateVersion) (*autoupdatev1pb.AutoUpdateVersion, error) + UpdateAutoUpdateConfig(context.Context, *autoupdatev1pb.AutoUpdateConfig) (*autoupdatev1pb.AutoUpdateConfig, error) + UpdateAutoUpdateVersion(context.Context, *autoupdatev1pb.AutoUpdateVersion) (*autoupdatev1pb.AutoUpdateVersion, error) +} + +func (c *AutoUpdateCommand) agentsStatusCommand(ctx context.Context, client autoupdateClient) error { + rollout, err := client.GetAutoUpdateAgentRollout(ctx) + if err != nil && !trace.IsNotFound(err) { + return trace.Wrap(err) + } + + sb := strings.Builder{} + if rollout.GetSpec() == nil { + sb.WriteString("No active agent rollout (autoupdate_agent_rollout).\n") + } + if mode := rollout.GetSpec().GetAutoupdateMode(); mode != "" { + sb.WriteString("Agent autoupdate mode: " + mode + "\n") + } + if st := formatTimeIfNotEmpty(rollout.GetStatus().GetStartTime().AsTime(), time.DateTime); st != "" { + sb.WriteString("Rollout creation date: " + st + "\n") + } + if start := rollout.GetSpec().GetStartVersion(); start != "" { + sb.WriteString("Start version: " + start + "\n") + } + if target := rollout.GetSpec().GetTargetVersion(); target != "" { + sb.WriteString("Target version: " + target + "\n") + } + if state := rollout.GetStatus().GetState(); state != autoupdatev1pb.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_UNSPECIFIED { + sb.WriteString("Rollout state: " + userFriendlyState(state) + "\n") + } + if schedule := rollout.GetSpec().GetSchedule(); schedule == autoupdate.AgentsScheduleImmediate { + sb.WriteString("Schedule is immediate. Every group immediately updates to the target version.\n") + } + if strategy := rollout.GetSpec().GetStrategy(); strategy != "" { + sb.WriteString("Strategy: " + strategy + "\n") + } + + if groups := rollout.GetStatus().GetGroups(); len(groups) > 0 { + sb.WriteRune('\n') + headers := []string{"Group Name", "State", "Start Time", "State Reason"} + table := asciitable.MakeTable(headers) + for _, group := range groups { + table.AddRow([]string{ + group.GetName(), + userFriendlyState(group.GetState()), + formatTimeIfNotEmpty(group.GetStartTime().AsTime(), time.DateTime), + group.GetLastUpdateReason()}) + } + sb.Write(table.AsBuffer().Bytes()) + } + + fmt.Fprint(c.stdout, sb.String()) + return nil +} + +func formatTimeIfNotEmpty(t time.Time, format string) string { + if t.IsZero() || t.Unix() == 0 { + return "" + } + return t.Format(format) +} + +func userFriendlyState[T autoupdatev1pb.AutoUpdateAgentGroupState | autoupdatev1pb.AutoUpdateAgentRolloutState](state T) string { + switch state { + case 0: + return "Unknown" + case 1: + return "Unstarted" + case 2: + return "Active" + case 3: + return "Done" + case 4: + return "Rolledback" + default: + // If we don't know anything about this state, we display its integer + return fmt.Sprintf("Unknown state (%d)", state) + } +} + +// ToolsStatus makes request to auth service to fetch client tools auto update version and mode. +func (c *AutoUpdateCommand) ToolsStatus(ctx context.Context, client autoupdateClient) error { var response getResponse config, err := client.GetAutoUpdateConfig(ctx) if err != nil && !trace.IsNotFound(err) { @@ -183,12 +278,12 @@ func (c *AutoUpdateCommand) Status(ctx context.Context, client *authclient.Clien response.TargetVersion = version.Spec.Tools.TargetVersion } - return c.printResponse(response) + return c.printToolsResponse(response) } -// StatusByProxy makes request to `webapi/find` endpoint to fetch tools auto update version and mode +// ToolsStatusByProxy makes request to `webapi/find` endpoint to fetch tools auto update version and mode // without authentication. -func (c *AutoUpdateCommand) StatusByProxy(ctx context.Context) error { +func (c *AutoUpdateCommand) ToolsStatusByProxy(ctx context.Context) error { find, err := webclient.Find(&webclient.Config{ Context: ctx, ProxyAddr: c.proxy, @@ -201,13 +296,13 @@ func (c *AutoUpdateCommand) StatusByProxy(ctx context.Context) error { if find.AutoUpdate.ToolsAutoUpdate { mode = autoupdate.ToolsUpdateModeEnabled } - return c.printResponse(getResponse{ + return c.printToolsResponse(getResponse{ TargetVersion: find.AutoUpdate.ToolsVersion, Mode: mode, }) } -func (c *AutoUpdateCommand) setMode(ctx context.Context, client *authclient.Client, enabled bool) error { +func (c *AutoUpdateCommand) setToolsMode(ctx context.Context, client autoupdateClient, enabled bool) error { setMode := client.UpdateAutoUpdateConfig config, err := client.GetAutoUpdateConfig(ctx) if trace.IsNotFound(err) { @@ -235,7 +330,7 @@ func (c *AutoUpdateCommand) setMode(ctx context.Context, client *authclient.Clie return nil } -func (c *AutoUpdateCommand) setTargetVersion(ctx context.Context, client *authclient.Client) error { +func (c *AutoUpdateCommand) setToolsTargetVersion(ctx context.Context, client autoupdateClient) error { if _, err := semver.NewVersion(c.toolsTargetVersion); err != nil { return trace.WrapWithMessage(err, "not semantic version") } @@ -262,7 +357,7 @@ func (c *AutoUpdateCommand) setTargetVersion(ctx context.Context, client *authcl return nil } -func (c *AutoUpdateCommand) clearTargetVersion(ctx context.Context, client *authclient.Client) error { +func (c *AutoUpdateCommand) clearToolsTargetVersion(ctx context.Context, client autoupdateClient) error { version, err := client.GetAutoUpdateVersion(ctx) if trace.IsNotFound(err) { return nil @@ -279,7 +374,7 @@ func (c *AutoUpdateCommand) clearTargetVersion(ctx context.Context, client *auth return nil } -func (c *AutoUpdateCommand) printResponse(response getResponse) error { +func (c *AutoUpdateCommand) printToolsResponse(response getResponse) error { switch c.format { case teleport.JSON: if err := utils.WriteJSON(c.stdout, response); err != nil { diff --git a/tool/tctl/common/autoupdate_command_test.go b/tool/tctl/common/autoupdate_command_test.go index 31d2782fbc335..164943b08b8ec 100644 --- a/tool/tctl/common/autoupdate_command_test.go +++ b/tool/tctl/common/autoupdate_command_test.go @@ -22,12 +22,18 @@ import ( "bytes" "context" "testing" + "time" "github.com/gravitational/trace" "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/durationpb" + "google.golang.org/protobuf/types/known/timestamppb" "github.com/gravitational/teleport/api/breaker" + autoupdatepb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" + "github.com/gravitational/teleport/api/types/autoupdate" "github.com/gravitational/teleport/lib/auth/authclient" "github.com/gravitational/teleport/lib/service/servicecfg" "github.com/gravitational/teleport/lib/utils" @@ -116,3 +122,183 @@ func runAutoUpdateCommand(t *testing.T, client *authclient.Client, args []string }) return &stdoutBuff, err } + +type mockRolloutClient struct { + authclient.Client + mock.Mock +} + +func (m *mockRolloutClient) GetAutoUpdateAgentRollout(_ context.Context) (*autoupdatepb.AutoUpdateAgentRollout, error) { + args := m.Called() + return args.Get(0).(*autoupdatepb.AutoUpdateAgentRollout), args.Error(1) +} + +func TestAutoUpdateAgentStatusCommand(t *testing.T) { + ctx := context.Background() + + tests := []struct { + name string + fixture *autoupdatepb.AutoUpdateAgentRollout + fixtureErr error + expectedOutput string + }{ + { + name: "no rollout", + fixture: nil, + fixtureErr: trace.NotFound("no rollout found"), + expectedOutput: "No active agent rollout (autoupdate_agent_rollout).\n", + }, + { + name: "rollout immediate schedule", + fixture: &autoupdatepb.AutoUpdateAgentRollout{ + Spec: &autoupdatepb.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + Schedule: autoupdate.AgentsScheduleImmediate, + AutoupdateMode: autoupdate.AgentsUpdateModeEnabled, + }, + }, + expectedOutput: `Agent autoupdate mode: enabled +Start version: 1.2.3 +Target version: 1.2.4 +Schedule is immediate. Every group immediately updates to the target version. +`, + }, + { + name: "rollout regular schedule time-based", + fixture: &autoupdatepb.AutoUpdateAgentRollout{ + Spec: &autoupdatepb.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + Schedule: autoupdate.AgentsScheduleRegular, + AutoupdateMode: autoupdate.AgentsUpdateModeEnabled, + Strategy: autoupdate.AgentsStrategyTimeBased, + MaintenanceWindowDuration: durationpb.New(1 * time.Hour), + }, + Status: &autoupdatepb.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdatepb.AutoUpdateAgentRolloutStatusGroup{ + { + Name: "dev", + StartTime: timestamppb.New(time.Date(2025, 1, 15, 12, 00, 0, 0, time.UTC)), + State: autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + LastUpdateTime: nil, + LastUpdateReason: "outside_window", + ConfigDays: []string{"Mon", "Tue", "Wed", "Thu", "Fri"}, + ConfigStartHour: 8, + }, + { + Name: "stage", + StartTime: timestamppb.New(time.Date(2025, 1, 15, 14, 00, 0, 0, time.UTC)), + State: autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + LastUpdateReason: "in_window", + ConfigDays: []string{"Mon", "Tue", "Wed", "Thu", "Fri"}, + ConfigStartHour: 14, + }, + { + Name: "prod", + StartTime: nil, + State: autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateReason: "outside_window", + ConfigDays: []string{"Mon", "Tue", "Wed", "Thu", "Fri"}, + ConfigStartHour: 18, + }, + }, + State: autoupdatepb.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_ACTIVE, + StartTime: timestamppb.New(time.Date(2025, 1, 15, 2, 0, 0, 0, time.UTC)), + TimeOverride: nil, + }, + }, + expectedOutput: `Agent autoupdate mode: enabled +Rollout creation date: 2025-01-15 02:00:00 +Start version: 1.2.3 +Target version: 1.2.4 +Rollout state: Active +Strategy: time-based + +Group Name State Start Time State Reason +---------- --------- ------------------- -------------- +dev Done 2025-01-15 12:00:00 outside_window +stage Active 2025-01-15 14:00:00 in_window +prod Unstarted outside_window +`, + }, + { + name: "rollout regular schedule halt-on-error", + fixture: &autoupdatepb.AutoUpdateAgentRollout{ + Spec: &autoupdatepb.AutoUpdateAgentRolloutSpec{ + StartVersion: "1.2.3", + TargetVersion: "1.2.4", + Schedule: autoupdate.AgentsScheduleRegular, + AutoupdateMode: autoupdate.AgentsUpdateModeEnabled, + Strategy: autoupdate.AgentsStrategyHaltOnError, + }, + Status: &autoupdatepb.AutoUpdateAgentRolloutStatus{ + Groups: []*autoupdatepb.AutoUpdateAgentRolloutStatusGroup{ + { + Name: "dev", + StartTime: timestamppb.New(time.Date(2025, 1, 15, 12, 00, 0, 0, time.UTC)), + State: autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_DONE, + LastUpdateTime: nil, + LastUpdateReason: "outside_window", + ConfigDays: []string{"Mon", "Tue", "Wed", "Thu", "Fri"}, + ConfigStartHour: 8, + }, + { + Name: "stage", + StartTime: timestamppb.New(time.Date(2025, 1, 15, 14, 00, 0, 0, time.UTC)), + State: autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_ACTIVE, + LastUpdateReason: "in_window", + ConfigDays: []string{"Mon", "Tue", "Wed", "Thu", "Fri"}, + ConfigStartHour: 14, + }, + { + Name: "prod", + StartTime: nil, + State: autoupdatepb.AutoUpdateAgentGroupState_AUTO_UPDATE_AGENT_GROUP_STATE_UNSTARTED, + LastUpdateReason: "outside_window", + ConfigDays: []string{"Mon", "Tue", "Wed", "Thu", "Fri"}, + ConfigStartHour: 18, + }, + }, + State: autoupdatepb.AutoUpdateAgentRolloutState_AUTO_UPDATE_AGENT_ROLLOUT_STATE_ACTIVE, + StartTime: timestamppb.New(time.Date(2025, 1, 15, 2, 0, 0, 0, time.UTC)), + TimeOverride: nil, + }, + }, + expectedOutput: `Agent autoupdate mode: enabled +Rollout creation date: 2025-01-15 02:00:00 +Start version: 1.2.3 +Target version: 1.2.4 +Rollout state: Active +Strategy: halt-on-error + +Group Name State Start Time State Reason +---------- --------- ------------------- -------------- +dev Done 2025-01-15 12:00:00 outside_window +stage Active 2025-01-15 14:00:00 in_window +prod Unstarted outside_window +`, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + // Test setup: create mock client and load fixtures. + clt := &mockRolloutClient{} + clt.On("GetAutoUpdateAgentRollout", mock.Anything).Return(tt.fixture, tt.fixtureErr).Once() + + // Test execution: run command. + output := &bytes.Buffer{} + cmd := AutoUpdateCommand{stdout: output} + err := cmd.agentsStatusCommand(ctx, clt) + require.NoError(t, err) + + // Test validation: check the command output. + require.Equal(t, tt.expectedOutput, output.String()) + + // Test validation: check that the mock received the expected calls. + clt.AssertExpectations(t) + }) + } + +} diff --git a/tool/tctl/common/collection.go b/tool/tctl/common/collection.go index 5ccb1e02d2230..e966e9154e8a3 100644 --- a/tool/tctl/common/collection.go +++ b/tool/tctl/common/collection.go @@ -1944,7 +1944,7 @@ type autoUpdateConfigCollection struct { } func (c *autoUpdateConfigCollection) resources() []types.Resource { - return []types.Resource{types.Resource153ToLegacy(c.config)} + return []types.Resource{types.ProtoResource153ToLegacy(c.config)} } func (c *autoUpdateConfigCollection) writeText(w io.Writer, verbose bool) error { @@ -1962,7 +1962,7 @@ type autoUpdateVersionCollection struct { } func (c *autoUpdateVersionCollection) resources() []types.Resource { - return []types.Resource{types.Resource153ToLegacy(c.version)} + return []types.Resource{types.ProtoResource153ToLegacy(c.version)} } func (c *autoUpdateVersionCollection) writeText(w io.Writer, verbose bool) error { @@ -1975,6 +1975,28 @@ func (c *autoUpdateVersionCollection) writeText(w io.Writer, verbose bool) error return trace.Wrap(err) } +type autoUpdateAgentRolloutCollection struct { + rollout *autoupdatev1pb.AutoUpdateAgentRollout +} + +func (c *autoUpdateAgentRolloutCollection) resources() []types.Resource { + return []types.Resource{types.ProtoResource153ToLegacy(c.rollout)} +} + +func (c *autoUpdateAgentRolloutCollection) writeText(w io.Writer, verbose bool) error { + t := asciitable.MakeTable([]string{"Name", "Start Version", "Target Version", "Mode", "Schedule", "Strategy"}) + t.AddRow([]string{ + c.rollout.GetMetadata().GetName(), + fmt.Sprintf("%v", c.rollout.GetSpec().GetStartVersion()), + fmt.Sprintf("%v", c.rollout.GetSpec().GetTargetVersion()), + fmt.Sprintf("%v", c.rollout.GetSpec().GetAutoupdateMode()), + fmt.Sprintf("%v", c.rollout.GetSpec().GetSchedule()), + fmt.Sprintf("%v", c.rollout.GetSpec().GetStrategy()), + }) + _, err := t.AsBuffer().WriteTo(w) + return trace.Wrap(err) +} + type accessMonitoringRuleCollection struct { items []*accessmonitoringrulesv1pb.AccessMonitoringRule } diff --git a/tool/tctl/common/collection_test.go b/tool/tctl/common/collection_test.go index 166c5f6901599..f0679b9a65581 100644 --- a/tool/tctl/common/collection_test.go +++ b/tool/tctl/common/collection_test.go @@ -27,13 +27,19 @@ import ( "github.com/google/go-cmp/cmp" "github.com/google/uuid" "github.com/stretchr/testify/require" + "google.golang.org/protobuf/types/known/durationpb" + kyaml "k8s.io/apimachinery/pkg/util/yaml" "github.com/gravitational/teleport/api" + autoupdatev1pb "github.com/gravitational/teleport/api/gen/proto/go/teleport/autoupdate/v1" dbobjectv1 "github.com/gravitational/teleport/api/gen/proto/go/teleport/dbobject/v1" dbobjectimportrulev1 "github.com/gravitational/teleport/api/gen/proto/go/teleport/dbobjectimportrule/v1" "github.com/gravitational/teleport/api/types" + "github.com/gravitational/teleport/api/types/autoupdate" "github.com/gravitational/teleport/api/types/label" "github.com/gravitational/teleport/lib/asciitable" + "github.com/gravitational/teleport/lib/defaults" + "github.com/gravitational/teleport/lib/services" "github.com/gravitational/teleport/lib/srv/db/common/databaseobject" "github.com/gravitational/teleport/lib/srv/db/common/databaseobjectimportrule" "github.com/gravitational/teleport/tool/common" @@ -431,3 +437,66 @@ func makeTestLabels(extraStaticLabels map[string]string) map[string]string { maps.Copy(labels, extraStaticLabels) return labels } + +// autoUpdateConfigBrokenCollection is an intentionally broken version of the +// autoUpdateConfigCollection that is not marshaling resources properly because +// it's doing json marshaling instead of protojson marshaling. +type autoUpdateConfigBrokenCollection struct { + autoUpdateConfigCollection +} + +func (c *autoUpdateConfigBrokenCollection) resources() []types.Resource { + // We use Resource153ToLegacy instead of ProtoResource153ToLegacy. + return []types.Resource{types.Resource153ToLegacy(c.config)} +} + +// This test makes sure we marshal and unmarshal proto-based Resource153 properly. +// We had a bug where types.Resource153 implemented by protobuf structs were not +// marshaled properly (they should be marshaled using protojson). This test +// checks we can do a round-trip with one of those proto-struct resource. +func TestRoundTripProtoResource153(t *testing.T) { + // Test setup: generate fixture. + initial, err := autoupdate.NewAutoUpdateConfig(&autoupdatev1pb.AutoUpdateConfigSpec{ + Agents: &autoupdatev1pb.AutoUpdateConfigSpecAgents{ + Mode: autoupdate.AgentsUpdateModeEnabled, + Strategy: autoupdate.AgentsStrategyTimeBased, + MaintenanceWindowDuration: durationpb.New(1 * time.Hour), + Schedules: &autoupdatev1pb.AgentAutoUpdateSchedules{ + Regular: []*autoupdatev1pb.AgentAutoUpdateGroup{ + { + Name: "group1", + Days: []string{types.Wildcard}, + }, + }, + }, + }, + }) + require.NoError(t, err) + + // Test execution: dump the resource into a YAML manifest. + collection := &autoUpdateConfigCollection{config: initial} + buf := &bytes.Buffer{} + require.NoError(t, writeYAML(collection, buf)) + + // Test execution: load the YAML manifest back. + decoder := kyaml.NewYAMLOrJSONDecoder(buf, defaults.LookaheadBufSize) + var raw services.UnknownResource + require.NoError(t, decoder.Decode(&raw)) + result, err := services.UnmarshalProtoResource[*autoupdatev1pb.AutoUpdateConfig](raw.Raw) + require.NoError(t, err) + + // Test validation: check that the loaded content matches what we had before. + require.Equal(t, result, initial) + + // Test execution: now dump the resource into a YAML manifest with a + // collection using types.Resource153ToLegacy instead of types.ProtoResource153ToLegacy + brokenCollection := &autoUpdateConfigBrokenCollection{autoUpdateConfigCollection{initial}} + buf = &bytes.Buffer{} + require.NoError(t, writeYAML(brokenCollection, buf)) + + // Test execution: load the YAML manifest back and see that we can't unmarshal it. + decoder = kyaml.NewYAMLOrJSONDecoder(buf, defaults.LookaheadBufSize) + require.NoError(t, decoder.Decode(&raw)) + _, err = services.UnmarshalProtoResource[*autoupdatev1pb.AutoUpdateConfig](raw.Raw) + require.Error(t, err) +} diff --git a/tool/tctl/common/helpers_test.go b/tool/tctl/common/helpers_test.go index 0cf773852c96f..b235a40e8b5e2 100644 --- a/tool/tctl/common/helpers_test.go +++ b/tool/tctl/common/helpers_test.go @@ -35,6 +35,7 @@ import ( "github.com/jonboulle/clockwork" "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" + kyaml "k8s.io/apimachinery/pkg/util/yaml" "github.com/gravitational/teleport/api/breaker" apidefaults "github.com/gravitational/teleport/api/defaults" @@ -43,6 +44,7 @@ import ( "github.com/gravitational/teleport/lib/config" "github.com/gravitational/teleport/lib/service" "github.com/gravitational/teleport/lib/service/servicecfg" + "github.com/gravitational/teleport/lib/services" "github.com/gravitational/teleport/lib/utils" commonclient "github.com/gravitational/teleport/tool/tctl/common/client" tctlcfg "github.com/gravitational/teleport/tool/tctl/common/config" @@ -153,6 +155,13 @@ func mustDecodeJSON[T any](t *testing.T, r io.Reader) T { return out } +func mustTranscodeYAMLToJSON(t *testing.T, r io.Reader) []byte { + decoder := kyaml.NewYAMLToJSONDecoder(r) + var resource services.UnknownResource + require.NoError(t, decoder.Decode(&resource)) + return resource.Raw +} + func mustDecodeYAMLDocuments[T any](t *testing.T, r io.Reader, out *[]T) { t.Helper() decoder := yaml.NewDecoder(r) diff --git a/tool/tctl/common/plugin/entraid.go b/tool/tctl/common/plugin/entraid.go index c537be2680da8..b175b11312665 100644 --- a/tool/tctl/common/plugin/entraid.go +++ b/tool/tctl/common/plugin/entraid.go @@ -399,7 +399,7 @@ func buildScript(proxyPublicAddr string, entraCfg entraArgs) (string, error) { } script, err := oneoff.BuildScript(oneoff.OneOffScriptParams{ - TeleportArgs: strings.Join(argsList, " "), + EntrypointArgs: strings.Join(argsList, " "), SuccessMessage: "Success! You can now go back to the Teleport Web UI to use the integration with Azure.", }) if err != nil { diff --git a/tool/tctl/common/resource_command.go b/tool/tctl/common/resource_command.go index cb388c969d4f6..01419e90833b3 100644 --- a/tool/tctl/common/resource_command.go +++ b/tool/tctl/common/resource_command.go @@ -181,6 +181,7 @@ func (rc *ResourceCommand) Initialize(app *kingpin.Application, _ *tctlcfg.Globa types.KindAutoUpdateConfig: rc.createAutoUpdateConfig, types.KindAutoUpdateVersion: rc.createAutoUpdateVersion, types.KindGitServer: rc.createGitServer, + types.KindAutoUpdateAgentRollout: rc.createAutoUpdateAgentRollout, } rc.UpdateHandlers = map[ResourceKind]ResourceCreateHandler{ types.KindUser: rc.updateUser, @@ -202,6 +203,7 @@ func (rc *ResourceCommand) Initialize(app *kingpin.Application, _ *tctlcfg.Globa types.KindAutoUpdateVersion: rc.updateAutoUpdateVersion, types.KindDynamicWindowsDesktop: rc.updateDynamicWindowsDesktop, types.KindGitServer: rc.updateGitServer, + types.KindAutoUpdateAgentRollout: rc.updateAutoUpdateAgentRollout, } rc.config = config @@ -1629,6 +1631,7 @@ func (rc *ResourceCommand) Delete(ctx context.Context, client *authclient.Client types.KindNetworkRestrictions, types.KindAutoUpdateConfig, types.KindAutoUpdateVersion, + types.KindAutoUpdateAgentRollout, } if !slices.Contains(singletonResources, rc.ref.Kind) && (rc.ref.Kind == "" || rc.ref.Name == "") { return trace.BadParameter("provide a full resource name to delete, for example:\n$ tctl rm cluster/east\n") @@ -2059,6 +2062,11 @@ func (rc *ResourceCommand) Delete(ctx context.Context, client *authclient.Client return trace.Wrap(err) } fmt.Printf("AutoUpdateVersion has been deleted\n") + case types.KindAutoUpdateAgentRollout: + if err := client.DeleteAutoUpdateAgentRollout(ctx); err != nil { + return trace.Wrap(err) + } + fmt.Printf("AutoUpdateAgentRollout has been deleted\n") default: return trace.BadParameter("deleting resources of type %q is not supported", rc.ref.Kind) } @@ -3337,6 +3345,12 @@ func (rc *ResourceCommand) getCollection(ctx context.Context, client *authclient return nil, trace.Wrap(err) } return &autoUpdateVersionCollection{version}, nil + case types.KindAutoUpdateAgentRollout: + version, err := client.GetAutoUpdateAgentRollout(ctx) + if err != nil { + return nil, trace.Wrap(err) + } + return &autoUpdateAgentRolloutCollection{version}, nil case types.KindAccessMonitoringRule: if rc.ref.Name != "" { rule, err := client.AccessMonitoringRuleClient().GetAccessMonitoringRule(ctx, rc.ref.Name) @@ -3798,6 +3812,37 @@ func (rc *ResourceCommand) updateAutoUpdateVersion(ctx context.Context, client * return nil } +func (rc *ResourceCommand) createAutoUpdateAgentRollout(ctx context.Context, client *authclient.Client, raw services.UnknownResource) error { + version, err := services.UnmarshalProtoResource[*autoupdatev1pb.AutoUpdateAgentRollout](raw.Raw) + if err != nil { + return trace.Wrap(err) + } + + if rc.IsForced() { + _, err = client.UpsertAutoUpdateAgentRollout(ctx, version) + } else { + _, err = client.CreateAutoUpdateAgentRollout(ctx, version) + } + if err != nil { + return trace.Wrap(err) + } + + fmt.Println("autoupdate_agent_rollout has been created") + return nil +} + +func (rc *ResourceCommand) updateAutoUpdateAgentRollout(ctx context.Context, client *authclient.Client, raw services.UnknownResource) error { + version, err := services.UnmarshalProtoResource[*autoupdatev1pb.AutoUpdateAgentRollout](raw.Raw) + if err != nil { + return trace.Wrap(err) + } + if _, err := client.UpdateAutoUpdateAgentRollout(ctx, version); err != nil { + return trace.Wrap(err) + } + fmt.Println("autoupdate_version has been updated") + return nil +} + func (rc *ResourceCommand) createGitServer(ctx context.Context, client *authclient.Client, raw services.UnknownResource) error { server, err := services.UnmarshalGitServer(raw.Raw) if err != nil { diff --git a/tool/tctl/common/resource_command_test.go b/tool/tctl/common/resource_command_test.go index 22dbef95569c1..1b9a6e7ddc914 100644 --- a/tool/tctl/common/resource_command_test.go +++ b/tool/tctl/common/resource_command_test.go @@ -36,6 +36,7 @@ import ( "github.com/jonboulle/clockwork" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "google.golang.org/protobuf/encoding/protojson" "google.golang.org/protobuf/testing/protocmp" "k8s.io/apimachinery/pkg/util/yaml" @@ -1371,17 +1372,29 @@ func TestCreateResources(t *testing.T) { process := testenv.MakeTestServer(t, testenv.WithLogger(utils.NewSlogLoggerForTests())) rootClient := testenv.MakeDefaultAuthClient(t, process) + // tctlGetAllValidations allows tests to register post-test validations to validate + // that their resource is present in "tctl get all" output. + // This allows running test rows instead of the whole test table. + var tctlGetAllValidations []func(t *testing.T, out string) + tests := []struct { - kind string - create func(t *testing.T, clt *authclient.Client) + kind string + create func(t *testing.T, clt *authclient.Client) + getAllCheck func(t *testing.T, out string) }{ { kind: types.KindGithubConnector, create: testCreateGithubConnector, + getAllCheck: func(t *testing.T, s string) { + assert.Contains(t, s, "kind: github") + }, }, { kind: types.KindRole, create: testCreateRole, + getAllCheck: func(t *testing.T, s string) { + assert.Contains(t, s, "kind: role") + }, }, { kind: types.KindServerInfo, @@ -1390,6 +1403,9 @@ func TestCreateResources(t *testing.T) { { kind: types.KindUser, create: testCreateUser, + getAllCheck: func(t *testing.T, s string) { + assert.Contains(t, s, "kind: user") + }, }, { kind: "empty-doc", @@ -1406,10 +1422,16 @@ func TestCreateResources(t *testing.T) { { kind: types.KindClusterNetworkingConfig, create: testCreateClusterNetworkingConfig, + getAllCheck: func(t *testing.T, s string) { + assert.Contains(t, s, "kind: cluster_networking_config") + }, }, { kind: types.KindClusterAuthPreference, create: testCreateAuthPreference, + getAllCheck: func(t *testing.T, s string) { + assert.Contains(t, s, "kind: cluster_auth_preference") + }, }, { kind: types.KindSessionRecordingConfig, @@ -1431,6 +1453,10 @@ func TestCreateResources(t *testing.T) { kind: types.KindAutoUpdateVersion, create: testCreateAutoUpdateVersion, }, + { + kind: types.KindAutoUpdateAgentRollout, + create: testCreateAutoUpdateAgentRollout, + }, { kind: types.KindDynamicWindowsDesktop, create: testCreateDynamicWindowsDesktop, @@ -1440,6 +1466,9 @@ func TestCreateResources(t *testing.T) { for _, test := range tests { t.Run(test.kind, func(t *testing.T) { test.create(t, rootClient) + if test.getAllCheck != nil { + tctlGetAllValidations = append(tctlGetAllValidations, test.getAllCheck) + } }) } @@ -1447,12 +1476,9 @@ func TestCreateResources(t *testing.T) { out, err := runResourceCommand(t, rootClient, []string{"get", "all"}) require.NoError(t, err) s := out.String() - require.NotEmpty(t, s) - assert.Contains(t, s, "kind: github") - assert.Contains(t, s, "kind: cluster_auth_preference") - assert.Contains(t, s, "kind: cluster_networking_config") - assert.Contains(t, s, "kind: user") - assert.Contains(t, s, "kind: role") + for _, validateGetAll := range tctlGetAllValidations { + validateGetAll(t, s) + } } func testCreateGithubConnector(t *testing.T, clt *authclient.Client) { @@ -2342,18 +2368,21 @@ version: v1 _, err = runResourceCommand(t, clt, []string{"create", resourceYAMLPath}) require.NoError(t, err) - // Get the resource buf, err := runResourceCommand(t, clt, []string{"get", types.KindAutoUpdateConfig, "--format=json"}) require.NoError(t, err) - resources := mustDecodeJSON[[]*autoupdate.AutoUpdateConfig](t, buf) - require.Len(t, resources, 1) + + rawResources := mustDecodeJSON[[]services.UnknownResource](t, buf) + require.Len(t, rawResources, 1) + var resource autoupdate.AutoUpdateConfig + require.NoError(t, protojson.Unmarshal(rawResources[0].Raw, &resource)) var expected autoupdate.AutoUpdateConfig - require.NoError(t, yaml.Unmarshal([]byte(resourceYAML), &expected)) + expectedJSON := mustTranscodeYAMLToJSON(t, bytes.NewReader([]byte(resourceYAML))) + require.NoError(t, protojson.Unmarshal(expectedJSON, &expected)) require.Empty(t, cmp.Diff( - []*autoupdate.AutoUpdateConfig{&expected}, - resources, + &expected, + &resource, protocmp.IgnoreFields(&headerv1.Metadata{}, "revision"), protocmp.Transform(), )) @@ -2384,18 +2413,21 @@ version: v1 _, err = runResourceCommand(t, clt, []string{"create", resourceYAMLPath}) require.NoError(t, err) - // Get the resource buf, err := runResourceCommand(t, clt, []string{"get", types.KindAutoUpdateVersion, "--format=json"}) require.NoError(t, err) - resources := mustDecodeJSON[[]*autoupdate.AutoUpdateVersion](t, buf) - require.Len(t, resources, 1) + + rawResources := mustDecodeJSON[[]services.UnknownResource](t, buf) + require.Len(t, rawResources, 1) + var resource autoupdate.AutoUpdateVersion + require.NoError(t, protojson.Unmarshal(rawResources[0].Raw, &resource)) var expected autoupdate.AutoUpdateVersion - require.NoError(t, yaml.Unmarshal([]byte(resourceYAML), &expected)) + expectedJSON := mustTranscodeYAMLToJSON(t, bytes.NewReader([]byte(resourceYAML))) + require.NoError(t, protojson.Unmarshal(expectedJSON, &expected)) require.Empty(t, cmp.Diff( - []*autoupdate.AutoUpdateVersion{&expected}, - resources, + &expected, + &resource, protocmp.IgnoreFields(&headerv1.Metadata{}, "revision"), protocmp.Transform(), )) @@ -2407,6 +2439,62 @@ version: v1 require.ErrorContains(t, err, "autoupdate_version \"autoupdate-version\" doesn't exist") } +func testCreateAutoUpdateAgentRollout(t *testing.T, clt *authclient.Client) { + const resourceYAML = `kind: autoupdate_agent_rollout +metadata: + name: autoupdate-agent-rollout + revision: 3a43b44a-201e-4d7f-aef1-ae2f6d9811ed +spec: + start_version: 1.2.3 + target_version: 1.2.3 + autoupdate_mode: "suspended" + schedule: "regular" + strategy: "halt-on-error" +status: + groups: + - name: my-group + state: 1 + config_days: ["*"] + config_start_hour: 12 + config_wait_hours: 0 +version: v1 +` + _, err := runResourceCommand(t, clt, []string{"get", types.KindAutoUpdateAgentRollout, "--format=json"}) + require.ErrorContains(t, err, "doesn't exist") + + // Create the resource. + resourceYAMLPath := filepath.Join(t.TempDir(), "resource.yaml") + require.NoError(t, os.WriteFile(resourceYAMLPath, []byte(resourceYAML), 0644)) + _, err = runResourceCommand(t, clt, []string{"create", resourceYAMLPath}) + require.NoError(t, err) + + // Get the resource + buf, err := runResourceCommand(t, clt, []string{"get", types.KindAutoUpdateAgentRollout, "--format=json"}) + require.NoError(t, err) + + rawResources := mustDecodeJSON[[]services.UnknownResource](t, buf) + require.Len(t, rawResources, 1) + var resource autoupdate.AutoUpdateAgentRollout + require.NoError(t, protojson.Unmarshal(rawResources[0].Raw, &resource)) + + var expected autoupdate.AutoUpdateAgentRollout + expectedJSON := mustTranscodeYAMLToJSON(t, bytes.NewReader([]byte(resourceYAML))) + require.NoError(t, protojson.Unmarshal(expectedJSON, &expected)) + + require.Empty(t, cmp.Diff( + &expected, + &resource, + protocmp.IgnoreFields(&headerv1.Metadata{}, "revision"), + protocmp.Transform(), + )) + + // Delete the resource + _, err = runResourceCommand(t, clt, []string{"rm", types.KindAutoUpdateAgentRollout}) + require.NoError(t, err) + _, err = runResourceCommand(t, clt, []string{"get", types.KindAutoUpdateAgentRollout}) + require.ErrorContains(t, err, "autoupdate_agent_rollout \"autoupdate-agent-rollout\" doesn't exist") +} + func testCreateDynamicWindowsDesktop(t *testing.T, clt *authclient.Client) { const resourceYAML = `kind: dynamic_windows_desktop metadata: