-
Notifications
You must be signed in to change notification settings - Fork 910
Enhanced worker crash handling with integrated crash telemetry #5412
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
base: master
Are you sure you want to change the base?
Changes from all commits
40302fe
7a5bb52
c05c057
f2f4138
95aedec
f0a3af4
e18c663
425e29b
af10a40
1d3c339
1944d29
eedd2e4
ea914c3
50fe7d7
8e617cf
f2699ce
e67f638
0448035
b56bfc8
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
| Original file line number | Diff line number | Diff line change |
|---|---|---|
| @@ -0,0 +1,49 @@ | ||
| // Copyright (c) Microsoft Corporation. | ||
| // Licensed under the MIT License. | ||
|
|
||
| using System; | ||
| using System.Collections.Generic; | ||
| using System.Threading.Tasks; | ||
| using Microsoft.VisualStudio.Services.Agent.Util; | ||
| using Microsoft.TeamFoundation.DistributedTask.WebApi; | ||
| using Newtonsoft.Json; | ||
|
|
||
| namespace Microsoft.VisualStudio.Services.Agent.Listener.Telemetry | ||
| { | ||
| [ServiceLocator(Default = typeof(WorkerCrashTelemetryPublisher))] | ||
| public interface IWorkerCrashTelemetryPublisher : IAgentService | ||
| { | ||
| Task PublishWorkerCrashTelemetryAsync(IHostContext hostContext, Guid jobId, int exitCode); | ||
| } | ||
|
|
||
| public sealed class WorkerCrashTelemetryPublisher : AgentService, IWorkerCrashTelemetryPublisher | ||
| { | ||
| public async Task PublishWorkerCrashTelemetryAsync(IHostContext hostContext, Guid jobId, int exitCode) | ||
| { | ||
| try | ||
| { | ||
| var telemetryPublisher = hostContext.GetService<IAgenetListenerTelemetryPublisher>(); | ||
|
|
||
| var telemetryData = new Dictionary<string, object> | ||
| { | ||
| ["JobId"] = jobId.ToString(), | ||
| ["ExitCode"] = exitCode.ToString() | ||
| }; | ||
|
|
||
| var command = new Command("telemetry", "publish") | ||
| { | ||
| Data = JsonConvert.SerializeObject(telemetryData) | ||
| }; | ||
| command.Properties.Add("area", "AzurePipelinesAgent"); | ||
| command.Properties.Add("feature", "WorkerCrash"); | ||
|
|
||
| await telemetryPublisher.PublishEvent(hostContext, command); | ||
| Trace.Info($"Published worker crash telemetry for job {jobId} with exit code {exitCode}"); | ||
| } | ||
| catch (Exception ex) | ||
| { | ||
| Trace.Warning($"Failed to publish worker crash telemetry: {ex}"); | ||
| } | ||
| } | ||
| } | ||
| } |
| Original file line number | Diff line number | Diff line change |
|---|---|---|
|
|
@@ -715,6 +715,12 @@ public class AgentKnobs | |
| new EnvironmentKnobSource("FAIL_JOB_WHEN_AGENT_DIES"), | ||
| new BuiltInDefaultKnobSource("false")); | ||
|
|
||
| public static readonly Knob EnhancedWorkerCrashHandling = new Knob( | ||
| nameof(EnhancedWorkerCrashHandling), | ||
| "If true, enables enhanced worker crash handling with forced completion for Plan v8+ scenarios where worker crashes cannot send completion events", | ||
| new EnvironmentKnobSource("AZP_ENHANCED_WORKER_CRASH_HANDLING"), | ||
| new BuiltInDefaultKnobSource("false")); | ||
|
Contributor
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Is this intended, not to have RuntimeKnobSource?
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. On the Listener side, enabling RuntimeKnobSource is not possible once the listener has started. Instead, I will replace this mechanism with a server API call, removing the dependency on the Agent knob.
Contributor
Author
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. updated with runtime control in Agent.cs file |
||
|
|
||
| public static readonly Knob AllowWorkDirectoryRepositories = new Knob( | ||
| nameof(AllowWorkDirectoryRepositories), | ||
| "Allows repositories to be checked out below work directory level on self hosted agents.", | ||
|
|
||
Uh oh!
There was an error while loading. Please reload this page.