Skip to content

Commit cde47ae

Browse files
[8.19] (backport #9386) Enhancement/5235 upgrade cleans up downloads and extracted agent (#10037)
* Enhancement/5235 upgrade cleans up downloads and extracted agent (#9386) * enhancement(5235): added comments describing updates in error handling. returning path in error cases if the download is completed * enhancement(5235): added wrapper var for verifier constructor in download step to improve testability * enhancement(5235): added test for downloadArtifact verifier errors * enhancement(5235): addded cleanupPaths slice. added deferred call to remove paths in the cleanupPaths slice * enhancement(5235): add archive and hash path to cleanup paths after artifact download * enhancement(5235): update error handling after unpack. add new versioned home to cleanup paths enhancement(5235): move newHome declaration * enhancement(5235): added upgrade cleanup test * enhancement(5235): added changelog fragment enhancement(5235): updated changelog fragment * enhancement(5235): fix errors in step_download after rebase * enhancement(5235): fix error in step_download_test after rebase * enhancement(5235): refactor disk space error check and path cleanup into one deferred call * enhancement(5235): refactored verifier factory and updated relevant download tests * enhancement(5235): removed separate test for cleanup, added cleanup checks in error handling test * enhancement(5235): changed function name in verifier * enhancement(5235): using AddHashExtension instead of hard coded hash extension string in upgrader (cherry picked from commit ab23962) # Conflicts: # internal/pkg/agent/application/upgrade/upgrade.go # internal/pkg/agent/application/upgrade/upgrade_test.go * resolved merge conflicts --------- Co-authored-by: Kaan Yalti <[email protected]>
1 parent f2be06f commit cde47ae

File tree

6 files changed

+304
-73
lines changed

6 files changed

+304
-73
lines changed
Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,32 @@
1+
# Kind can be one of:
2+
# - breaking-change: a change to previously-documented behavior
3+
# - deprecation: functionality that is being removed in a later release
4+
# - bug-fix: fixes a problem in a previous version
5+
# - enhancement: extends functionality but does not break or fix existing behavior
6+
# - feature: new functionality
7+
# - known-issue: problems that we are aware of in a given version
8+
# - security: impacts on the security of a product or a user’s deployment.
9+
# - upgrade: important information for someone upgrading from a prior version
10+
# - other: does not fit into any of the other categories
11+
kind: enhancement
12+
13+
# Change summary; a 80ish characters long description of the change.
14+
summary: agent cleans up downloads directory and the new versioned home if upgrade fails
15+
16+
# Long description; in case the summary is not enough to describe the change
17+
# this field accommodate a description without length limits.
18+
# NOTE: This field will be rendered only for breaking-change and known-issue kinds at the moment.
19+
#description:
20+
21+
# Affected component; usually one of "elastic-agent", "fleet-server", "filebeat", "metricbeat", "auditbeat", "all", etc.
22+
component: "elastic-agent"
23+
24+
# PR URL; optional; the PR number that added the changeset.
25+
# If not present is automatically filled by the tooling finding the PR where this changelog fragment has been added.
26+
# NOTE: the tooling supports backports, so it's able to fill the original PR number instead of the backport PR number.
27+
# Please provide it if you are adding a fragment for a different PR.
28+
pr: https://github.com/elastic/elastic-agent/pull/9386
29+
30+
# Issue URL; optional; the GitHub issue related to this changeset (either closes or is part of).
31+
# If not present is automatically filled by the tooling with the issue linked to the PR number.
32+
issue: https://github.com/elastic/elastic-agent/issues/5235

internal/pkg/agent/application/upgrade/artifact/download/verifier.go

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -106,12 +106,12 @@ func VerifySHA512HashWithCleanup(log infoWarnLogger, filename string) error {
106106
}
107107
} else if err != nil && !errors.Is(err, os.ErrNotExist) {
108108
// it's not a simple hash mismatch, probably something is wrong with the hash file
109-
hashFileName := getHashFileName(filename)
109+
hashFileName := AddHashExtension(filename)
110110
hashFileBytes, readErr := os.ReadFile(hashFileName)
111111
if readErr != nil {
112-
log.Warnf("error verifying the package using hash file %q, unable do read contents for logging: %v", getHashFileName(filename), readErr)
112+
log.Warnf("error verifying the package using hash file %q, unable do read contents for logging: %v", AddHashExtension(filename), readErr)
113113
} else {
114-
log.Warnf("error verifying the package using hash file %q, contents: %q", getHashFileName(filename), string(hashFileBytes))
114+
log.Warnf("error verifying the package using hash file %q, contents: %q", AddHashExtension(filename), string(hashFileBytes))
115115
}
116116
}
117117

@@ -121,20 +121,20 @@ func VerifySHA512HashWithCleanup(log infoWarnLogger, filename string) error {
121121
return nil
122122
}
123123

124-
func getHashFileName(filename string) string {
124+
func AddHashExtension(file string) string {
125125
const hashFileExt = ".sha512"
126-
if strings.HasSuffix(filename, hashFileExt) {
127-
return filename
126+
if strings.HasSuffix(file, hashFileExt) {
127+
return file
128128
}
129-
return filename + hashFileExt
129+
return file + hashFileExt
130130
}
131131

132132
// VerifySHA512Hash checks that a sidecar file containing a sha512 checksum
133133
// exists and that the checksum in the sidecar file matches the checksum of
134134
// the file. It returns an error if validation fails.
135135
func VerifySHA512Hash(filename string) error {
136136
hasher := sha512.New()
137-
checksumFileName := getHashFileName(filename)
137+
checksumFileName := AddHashExtension(filename)
138138
return VerifyChecksum(hasher, filename, checksumFileName)
139139
}
140140

internal/pkg/agent/application/upgrade/step_download.go

Lines changed: 12 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -41,16 +41,21 @@ type downloaderFactory func(*agtversion.ParsedSemVer, *logger.Logger, *artifact.
4141

4242
type downloader func(context.Context, downloaderFactory, *agtversion.ParsedSemVer, *artifact.Config, *details.Details) (string, error)
4343

44+
// abstraction for testability for newVerifier
45+
type verifierFactory func(*agtversion.ParsedSemVer, *logger.Logger, *artifact.Config) (download.Verifier, error)
46+
4447
type artifactDownloader struct {
4548
log *logger.Logger
4649
settings *artifact.Config
4750
fleetServerURI string
51+
newVerifier verifierFactory
4852
}
4953

5054
func newArtifactDownloader(settings *artifact.Config, log *logger.Logger) *artifactDownloader {
5155
return &artifactDownloader{
52-
log: log,
53-
settings: settings,
56+
log: log,
57+
settings: settings,
58+
newVerifier: newVerifier,
5459
}
5560
}
5661

@@ -123,19 +128,21 @@ func (a *artifactDownloader) downloadArtifact(ctx context.Context, parsedVersion
123128
return "", fmt.Errorf("failed download of agent binary: %w", err)
124129
}
125130

131+
// If there are errors in the following steps, we return the path so that we
132+
// can cleanup the downloaded files.
126133
if skipVerifyOverride {
127134
return path, nil
128135
}
129136

130137
if verifier == nil {
131-
verifier, err = newVerifier(parsedVersion, a.log, &settings)
138+
verifier, err = a.newVerifier(parsedVersion, a.log, &settings)
132139
if err != nil {
133-
return "", errors.New(err, "initiating verifier")
140+
return path, errors.New(err, "initiating verifier")
134141
}
135142
}
136143

137144
if err := verifier.Verify(ctx, agentArtifact, *parsedVersion, skipDefaultPgp, pgpBytes...); err != nil {
138-
return "", errors.New(err, "failed verification of agent binary")
145+
return path, errors.New(err, "failed verification of agent binary")
139146
}
140147
return path, nil
141148
}

internal/pkg/agent/application/upgrade/step_download_test.go

Lines changed: 83 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,13 +8,16 @@ import (
88
"context"
99
"encoding/json"
1010
"fmt"
11+
"net/http"
12+
"net/http/httptest"
1113
"strings"
1214
"testing"
1315
"time"
1416

1517
"github.com/stretchr/testify/require"
1618

1719
"github.com/elastic/elastic-agent-libs/transport/httpcommon"
20+
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
1821
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact"
1922
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact/download"
2023
downloadErrors "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact/download/errors"
@@ -303,6 +306,86 @@ func TestDownloadWithRetries(t *testing.T) {
303306
})
304307
}
305308

309+
type mockVerifier struct {
310+
called bool
311+
returnError error
312+
}
313+
314+
func (mv *mockVerifier) Name() string {
315+
return ""
316+
}
317+
318+
func (mv *mockVerifier) Verify(ctx context.Context, a artifact.Artifact, version agtversion.ParsedSemVer, skipDefaultPgp bool, pgpBytes ...string) error {
319+
mv.called = true
320+
return mv.returnError
321+
}
322+
323+
func TestDownloadArtifact(t *testing.T) {
324+
testLogger, _ := loggertest.New("TestDownloadArtifact")
325+
tempConfig := &artifact.Config{} // used only to get os and arch, runtime.GOARCH returns amd64 which is not a valid arch when used in GetArtifactName
326+
327+
parsedVersion, err := agtversion.ParseVersion("8.9.0")
328+
require.NoError(t, err)
329+
330+
upgradeDeatils := details.NewDetails(parsedVersion.String(), details.StateRequested, "")
331+
332+
mockContent := []byte("mock content")
333+
334+
testServer := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
335+
w.WriteHeader(http.StatusOK)
336+
_, err := w.Write(mockContent)
337+
require.NoError(t, err)
338+
}))
339+
defer testServer.Close()
340+
341+
testError := errors.New("test error")
342+
343+
type testCase struct {
344+
mockNewVerifierFactory verifierFactory
345+
expectedError error
346+
}
347+
348+
testCases := map[string]testCase{
349+
"should return path if verifier constructor fails": {
350+
mockNewVerifierFactory: func(version *agtversion.ParsedSemVer, log *logger.Logger, settings *artifact.Config) (download.Verifier, error) {
351+
return nil, testError
352+
},
353+
expectedError: testError,
354+
},
355+
"should return path if verifier fails": {
356+
mockNewVerifierFactory: func(version *agtversion.ParsedSemVer, log *logger.Logger, settings *artifact.Config) (download.Verifier, error) {
357+
return &mockVerifier{returnError: testError}, nil
358+
},
359+
expectedError: testError,
360+
},
361+
}
362+
363+
for name, tc := range testCases {
364+
t.Run(name, func(t *testing.T) {
365+
paths.SetTop(t.TempDir())
366+
367+
artifactPath, err := artifact.GetArtifactPath(agentArtifact, *parsedVersion, tempConfig.OS(), tempConfig.Arch(), paths.Downloads())
368+
require.NoError(t, err)
369+
370+
settings := artifact.Config{
371+
RetrySleepInitDuration: 20 * time.Millisecond,
372+
HTTPTransportSettings: httpcommon.HTTPTransportSettings{
373+
Timeout: 2 * time.Second,
374+
},
375+
SourceURI: testServer.URL,
376+
TargetDirectory: paths.Downloads(),
377+
}
378+
379+
a := newArtifactDownloader(&settings, testLogger)
380+
a.newVerifier = tc.mockNewVerifierFactory
381+
382+
path, err := a.downloadArtifact(t.Context(), parsedVersion, testServer.URL, upgradeDeatils, false, true)
383+
require.ErrorIs(t, err, tc.expectedError)
384+
require.Equal(t, artifactPath, path)
385+
})
386+
}
387+
}
388+
306389
// mockUpgradeDetails returns a *details.Details value that has an observer registered on it for inspecting
307390
// certain properties of the object being set and unset. It also returns:
308391
// - a *time.Time value, which will be not nil if Metadata.RetryUntil is set on the mock value,

internal/pkg/agent/application/upgrade/upgrade.go

Lines changed: 35 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -23,6 +23,7 @@ import (
2323
"github.com/elastic/elastic-agent/internal/pkg/agent/application/paths"
2424
"github.com/elastic/elastic-agent/internal/pkg/agent/application/reexec"
2525
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact"
26+
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact/download"
2627
upgradeErrors "github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/artifact/download/errors"
2728
"github.com/elastic/elastic-agent/internal/pkg/agent/application/upgrade/details"
2829
"github.com/elastic/elastic-agent/internal/pkg/agent/configuration"
@@ -241,14 +242,23 @@ func checkUpgrade(log *logger.Logger, currentVersion, newVersion agentVersion, m
241242
// Upgrade upgrades running agent, function returns shutdown callback that must be called by reexec.
242243
func (u *Upgrader) Upgrade(ctx context.Context, version string, sourceURI string, action *fleetapi.ActionUpgrade, det *details.Details, skipVerifyOverride bool, skipDefaultPgp bool, pgpBytes ...string) (_ reexec.ShutdownCallbackFn, err error) {
243244
u.log.Infow("Upgrading agent", "version", version, "source_uri", sourceURI)
244-
245+
cleanupPaths := []string{}
245246
defer func() {
246247
if err != nil {
247248
// Add the disk space error to the error chain if it is a disk space error
248249
// so that we can use errors.Is to check for it
249250
if u.isDiskSpaceErrorFunc(err) {
250251
err = goerrors.Join(err, upgradeErrors.ErrInsufficientDiskSpace)
251252
}
253+
// If there is an error, we need to clean up downloads and any
254+
// extracted agent files.
255+
for _, path := range cleanupPaths {
256+
rmErr := os.RemoveAll(path)
257+
if rmErr != nil {
258+
u.log.Errorw("error removing path during upgrade cleanup", "error.message", rmErr, "path", path)
259+
err = goerrors.Join(err, rmErr)
260+
}
261+
}
252262
}
253263
}()
254264

@@ -293,6 +303,15 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, sourceURI string
293303
}
294304

295305
archivePath, err := u.artifactDownloader.downloadArtifact(ctx, parsedVersion, sourceURI, det, skipVerifyOverride, skipDefaultPgp, pgpBytes...)
306+
307+
// If the artifactPath is not empty, then the artifact was downloaded.
308+
// There may still be an error in the download process, so we need to add
309+
// the archive and hash path to the cleanup slice.
310+
if archivePath != "" {
311+
archiveHashPath := download.AddHashExtension(archivePath)
312+
cleanupPaths = append(cleanupPaths, archivePath, archiveHashPath)
313+
}
314+
296315
if err != nil {
297316
// Run the same pre-upgrade cleanup task to get rid of any newly downloaded files
298317
// This may have an issue if users are upgrading to the same version number.
@@ -317,7 +336,22 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, sourceURI string
317336

318337
u.log.Infow("Unpacking agent package", "version", newVersion)
319338

339+
// Nice to have: add check that no archive files end up in the current versioned home
320340
unpackRes, err := u.unpacker.unpack(version, archivePath, paths.Data())
341+
342+
// If VersionedHome is empty then unpack has not started unpacking the
343+
// archive yet. There's nothing to clean up. Return the error.
344+
if unpackRes.VersionedHome == "" {
345+
return nil, goerrors.Join(err, fmt.Errorf("versionedhome is empty: %v", unpackRes))
346+
}
347+
348+
// If VersionedHome is not empty, it means that the unpack function has
349+
// started extracting the archive. It may have failed while extracting.
350+
// Setup newHome to be cleanedup.
351+
newHome := filepath.Join(paths.Top(), unpackRes.VersionedHome)
352+
353+
cleanupPaths = append(cleanupPaths, newHome)
354+
321355
if err != nil {
322356
return nil, err
323357
}
@@ -327,12 +361,6 @@ func (u *Upgrader) Upgrade(ctx context.Context, version string, sourceURI string
327361
return nil, errors.New("unknown hash")
328362
}
329363

330-
if unpackRes.VersionedHome == "" {
331-
return nil, fmt.Errorf("versionedhome is empty: %v", unpackRes)
332-
}
333-
334-
newHome := filepath.Join(paths.Top(), unpackRes.VersionedHome)
335-
336364
if err := u.copyActionStore(u.log, newHome); err != nil {
337365
return nil, fmt.Errorf("failed to copy action store: %w", err)
338366
}

0 commit comments

Comments
 (0)