Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion shim/restore.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (
var (
ErrAlreadyRestored = errors.New("container is already restored")
ErrRestoreRequestFailed = errors.New("restore request failed")
ErrRestoreDial = errors.New("failed to connect to node socket")
)

func (c *Container) Restore(ctx context.Context) (*runc.Container, process.Process, error) {
Expand Down Expand Up @@ -199,7 +200,7 @@ func createContainerLoggers(ctx context.Context, logPath string, tty bool) (stdo
func MigrationRestore(ctx context.Context, r *task.CreateTaskRequest, cfg *Config) (skipStart bool, err error) {
conn, err := net.Dial("unix", nodev1.SocketPath)
if err != nil {
return false, fmt.Errorf("dialing node service: %w", err)
return false, fmt.Errorf("%w: dialing node service: %w", ErrRestoreDial, err)
}
log.G(ctx).Infof("creating restore request for container: %s", cfg.ContainerName)

Expand Down
14 changes: 9 additions & 5 deletions shim/task/service_zeropod.go
Original file line number Diff line number Diff line change
Expand Up @@ -159,13 +159,17 @@ func (w *wrapper) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ *
if cfg.AnyMigrationEnabled() {
skipStart, err := zshim.MigrationRestore(ctx, r, cfg)
if err != nil {
if !errors.Is(err, zshim.ErrRestoreRequestFailed) {
if errors.Is(err, zshim.ErrRestoreRequestFailed) ||
errors.Is(err, zshim.ErrRestoreDial) {
// if the restore fails with ErrRestoreRequestFailed it's very
// likely it simply did not find a matching migration. Equally,
// if the shim can't manage to dial the node service there's no
// chance it can be restored. We log it and create the container
// from scratch.
log.G(ctx).Errorf("restore request failed: %s", err)
} else {
return nil, err
}
// if the restore fails with ErrRestoreRequestFailed it's very
// likely it simply did not find a matching migration. We log it and
// create the container from scratch.
log.G(ctx).Errorf("restore request failed: %s", err)
}
zeropodContainer.SetSkipStart(skipStart)
}
Expand Down