diff --git a/shim/restore.go b/shim/restore.go index df03ad0..43f06a5 100644 --- a/shim/restore.go +++ b/shim/restore.go @@ -30,6 +30,7 @@ import ( var ( ErrAlreadyRestored = errors.New("container is already restored") ErrRestoreRequestFailed = errors.New("restore request failed") + ErrRestoreDial = errors.New("failed to connect to node socket") ) func (c *Container) Restore(ctx context.Context) (*runc.Container, process.Process, error) { @@ -199,7 +200,7 @@ func createContainerLoggers(ctx context.Context, logPath string, tty bool) (stdo func MigrationRestore(ctx context.Context, r *task.CreateTaskRequest, cfg *Config) (skipStart bool, err error) { conn, err := net.Dial("unix", nodev1.SocketPath) if err != nil { - return false, fmt.Errorf("dialing node service: %w", err) + return false, fmt.Errorf("%w: dialing node service: %w", ErrRestoreDial, err) } log.G(ctx).Infof("creating restore request for container: %s", cfg.ContainerName) diff --git a/shim/task/service_zeropod.go b/shim/task/service_zeropod.go index 25ae2ba..b867a92 100644 --- a/shim/task/service_zeropod.go +++ b/shim/task/service_zeropod.go @@ -159,13 +159,17 @@ func (w *wrapper) Create(ctx context.Context, r *taskAPI.CreateTaskRequest) (_ * if cfg.AnyMigrationEnabled() { skipStart, err := zshim.MigrationRestore(ctx, r, cfg) if err != nil { - if !errors.Is(err, zshim.ErrRestoreRequestFailed) { + if errors.Is(err, zshim.ErrRestoreRequestFailed) || + errors.Is(err, zshim.ErrRestoreDial) { + // if the restore fails with ErrRestoreRequestFailed it's very + // likely it simply did not find a matching migration. Equally, + // if the shim can't manage to dial the node service there's no + // chance it can be restored. We log it and create the container + // from scratch. + log.G(ctx).Errorf("restore request failed: %s", err) + } else { return nil, err } - // if the restore fails with ErrRestoreRequestFailed it's very - // likely it simply did not find a matching migration. We log it and - // create the container from scratch. - log.G(ctx).Errorf("restore request failed: %s", err) } zeropodContainer.SetSkipStart(skipStart) }