Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 6 additions & 6 deletions .github/workflows/ci.yml
Original file line number Diff line number Diff line change
Expand Up @@ -11,12 +11,12 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: "1.23"
go-version: "1.24"

- uses: dominikh/staticcheck-action@v1.3.1
- uses: dominikh/staticcheck-action@v1.4.0
with:
install-go: false
version: "2024.1"
version: "2025.1"

test:
runs-on: ${{ matrix.os }}
Expand All @@ -29,7 +29,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: "1.23"
go-version: "1.24"

- name: test
run: sudo --preserve-env make test
Expand All @@ -47,7 +47,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: "1.23"
go-version: "1.24"

- name: Install protoc-gen-go
run: |
Expand Down Expand Up @@ -84,7 +84,7 @@ jobs:
- name: Set up Go
uses: actions/setup-go@v4
with:
go-version: "1.23"
go-version: "1.24"

- name: e2e
run: make test-e2e
15 changes: 13 additions & 2 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@ the last TCP connection. While in scaled down state, it will listen on the same
port the application inside the container was listening on and will restore the
container on the first incoming connection. Depending on the memory size of the
checkpointed program this happens in tens to a few hundred milliseconds,
virtually unnoticable to the user. As all the memory contents are stored to disk
virtually unnoticeable to the user. As all the memory contents are stored to disk
during checkpointing, all state of the application is restored. [It adjusts
resource requests](#in-place-resource-scaling) in scaled down state in-place if
the cluster supports it. To prevent huge resource usage spikes when draining a
Expand Down Expand Up @@ -283,7 +283,7 @@ zeropod.ctrox.dev/ports-map: "nginx=80,81;sidecar=8080"
### `zeropod.ctrox.dev/scaledown-duration`

Configures how long to wait before scaling down again after the last
connnection. The duration is reset whenever a connection happens. Setting it to
connection. The duration is reset whenever a connection happens. Setting it to
0 disables scaling down. If unset it defaults to 1 minute.

```yaml
Expand Down Expand Up @@ -354,6 +354,17 @@ to be enabled in the host kernel (`CONFIG_USERFAULTFD`).
zeropod.ctrox.dev/live-migrate: "nginx"
```

### `zeropod.ctrox.dev/disable-migrate-data`

When migrating a pod (regardless of live or scaled down), data that has been
written to the containers file system will be copied over. This is done by
copying the upper layer of the container overlayfs. Enabled by default but can
be disabled with this annotation.

```yaml
zeropod.ctrox.dev/disable-migrate-data: "true"
```

### `io.containerd.runc.v2.group`

It's possible to reduce the resource usage further by grouping multiple pods
Expand Down
5 changes: 5 additions & 0 deletions api/node/v1/meta.go
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,7 @@ const (
SocketPath = runPath + "node.sock"
imagesPath = varPath + "i/"
SnapshotSuffix = "snapshot"
UpperSuffix = "upper"
WorkDirSuffix = "work"
MigrateAnnotationKey = "zeropod.ctrox.dev/migrate"
LiveMigrateAnnotationKey = "zeropod.ctrox.dev/live-migrate"
Expand All @@ -28,6 +29,10 @@ func SnapshotPath(id string) string {
return filepath.Join(ImagePath(id), SnapshotSuffix)
}

func UpperPath(id string) string {
return filepath.Join(ImagePath(id), SnapshotSuffix, UpperSuffix)
}

func LazyPagesSocket(id string) string {
return filepath.Join(runPath, id+".sock")
}
Expand Down
2 changes: 1 addition & 1 deletion cmd/freezer/Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -10,6 +10,6 @@ COPY cmd/freezer cmd/freezer
ARG TARGETARCH
RUN CGO_ENABLED=0 GOOS=linux GOARCH=$TARGETARCH GO111MODULE=on go build -ldflags "-s -w" -a -o freezer cmd/freezer/main.go

FROM gcr.io/distroless/static-debian12
FROM gcr.io/distroless/static-debian12:debug
COPY --from=builder /workspace/freezer /
ENTRYPOINT ["/freezer"]
14 changes: 13 additions & 1 deletion cmd/freezer/main.go
Original file line number Diff line number Diff line change
@@ -1,13 +1,16 @@
package main

import (
"context"
"encoding/json"
"flag"
"fmt"
"io"
"log/slog"
"math/rand/v2"
"net"
"net/http"
"os"
"time"
)

Expand Down Expand Up @@ -68,7 +71,16 @@ func main() {
fmt.Fprintf(w, "%s", freezeJSON)
})

go http.ListenAndServe(":8080", nil)
lc := net.ListenConfig{}
// disable multipath TCP for now since it's not supported by CRIU
lc.SetMultipathTCP(false)

ln, err := lc.Listen(context.Background(), "tcp", ":8080")
if err != nil {
slog.Error("tcp listen", "err", err)
os.Exit(1)
}
go http.Serve(ln, nil)
for {
since := time.Since(f.LastObservation)
if since > time.Millisecond*50 {
Expand Down
2 changes: 2 additions & 0 deletions cmd/installer/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@ network-lock skip
"zeropod.ctrox.dev/live-migrate",
"zeropod.ctrox.dev/disable-probe-detection",
"zeropod.ctrox.dev/probe-buffer-size",
"zeropod.ctrox.dev/disable-migrate-data",
"io.containerd.runc.v2.group"
]

Expand All @@ -108,6 +109,7 @@ network-lock skip
"zeropod.ctrox.dev/live-migrate",
"zeropod.ctrox.dev/disable-probe-detection",
"zeropod.ctrox.dev/probe-buffer-size",
"zeropod.ctrox.dev/disable-migrate-data",
"io.containerd.runc.v2.group"
]

Expand Down
2 changes: 1 addition & 1 deletion cmd/manager/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
ARG CRIU_IMAGE_NAME=ghcr.io/ctrox/zeropod-criu
ARG CRIU_VERSION=v4.0
ARG CRIU_VERSION=v4.1

FROM --platform=$BUILDPLATFORM golang:1.24 AS builder

Expand Down
2 changes: 1 addition & 1 deletion e2e/Dockerfile
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
FROM docker:28.0-cli
FROM docker:28.3.3-cli

RUN apk add --update go make iptables
WORKDIR /app
Expand Down
129 changes: 85 additions & 44 deletions e2e/migration_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,9 @@ package e2e

import (
"context"
"fmt"
"path"
"strings"
"testing"
"time"

Expand All @@ -15,22 +17,23 @@ import (
corev1 "k8s.io/api/core/v1"
)

type testCase struct {
deploy *appsv1.Deployment
svc *corev1.Service
sameNode bool
migrationCount int
liveMigration bool
expectDataNotMigrated bool
beforeMigration func(t *testing.T)
afterMigration func(t *testing.T, tc testCase)
}

func TestMigration(t *testing.T) {
if testing.Short() {
t.Skip("skipping e2e test")
}
e2e := setupOnce(t)
ctx := context.Background()

type testCase struct {
deploy *appsv1.Deployment
svc *corev1.Service
sameNode bool
migrationCount int
liveMigration bool
beforeMigration func(t *testing.T)
afterMigration func(t *testing.T)
}
cases := map[string]testCase{
"same-node live migration": {
deploy: freezerDeployment("same-node-live-migration", "default", 256, liveMigrateAnnotation("freezer")),
Expand All @@ -54,27 +57,33 @@ func TestMigration(t *testing.T) {
migrationCount: 1,
},
"same-node non-live migration": {
deploy: freezerDeployment("same-node-migration", "default", 1, migrateAnnotation("freezer"), scaleDownAfter(time.Second*5)),
svc: testService(8080),
sameNode: true,
liveMigration: false,
migrationCount: 1,
beforeMigration: nonLiveBeforeMigration,
afterMigration: nonLiveAfterMigration,
deploy: freezerDeployment("same-node-migration", "default", 1, migrateAnnotation("freezer"), scaleDownAfter(time.Second)),
svc: testService(8080),
sameNode: true,
liveMigration: false,
migrationCount: 1,
afterMigration: nonLiveAfterMigration,
},
"cross-node non-live migration": {
deploy: freezerDeployment("same-node-migration", "default", 1, migrateAnnotation("freezer"), scaleDownAfter(time.Second*5)),
svc: testService(8080),
sameNode: false,
liveMigration: false,
migrationCount: 1,
beforeMigration: nonLiveBeforeMigration,
afterMigration: nonLiveAfterMigration,
deploy: freezerDeployment("cross-node-migration", "default", 1, migrateAnnotation("freezer"), scaleDownAfter(time.Second)),
svc: testService(8080),
sameNode: false,
liveMigration: false,
migrationCount: 1,
afterMigration: nonLiveAfterMigration,
},
"data migration disabled": {
deploy: freezerDeployment("data-migration-disabled", "default", 1, liveMigrateAnnotation("freezer"), disableDataMigration()),
svc: testService(8080),
sameNode: true,
liveMigration: true,
expectDataNotMigrated: true,
migrationCount: 1,
},
}

migrate := func(t *testing.T, ctx context.Context, e2e *e2eConfig, tc testCase) {
pods := podsOfDeployment(t, ctx, e2e.client, tc.deploy)
pods := podsOfDeployment(t, e2e.client, tc.deploy)
if len(pods) < 1 {
t.Fatal("expected at least one pod in the deployment")
}
Expand All @@ -87,10 +96,12 @@ func TestMigration(t *testing.T) {
uncordon := cordonNode(t, ctx, e2e.client, pod.Spec.NodeName)
defer uncordon()
}

assert.NoError(t, e2e.client.Delete(ctx, &pod))
if !tc.liveMigration {
waitUntilScaledDown(t, ctx, e2e.client, &pod)
}
require.NoError(t, e2e.client.Delete(ctx, &pod))
assert.Eventually(t, func() bool {
pods := podsOfDeployment(t, ctx, e2e.client, tc.deploy)
pods := podsOfDeployment(t, e2e.client, tc.deploy)
if len(pods) != 1 {
return false
}
Expand Down Expand Up @@ -133,6 +144,7 @@ func TestMigration(t *testing.T) {
for range tc.migrationCount {
tc.beforeMigration(t)
checkCtx, cancel := context.WithCancel(ctx)
writePodData(t, migrationPod(t, tc.deploy))
defer cancel()
if tc.liveMigration {
go func() {
Expand All @@ -142,12 +154,52 @@ func TestMigration(t *testing.T) {
}
migrate(t, ctx, e2e, tc)
cancel()
tc.afterMigration(t)
tc.afterMigration(t, tc)
if tc.expectDataNotMigrated {
_, err := readPodData(t, migrationPod(t, tc.deploy))
assert.Error(t, err)
} else {
data, err := readPodDataEventually(t, migrationPod(t, tc.deploy))
assert.NoError(t, err)
assert.Equal(t, t.Name(), strings.TrimSpace(data))
}
}
})
}
}

func migrationPod(t testing.TB, deploy *appsv1.Deployment) *corev1.Pod {
pods := podsOfDeployment(t, e2e.client, deploy)
if len(pods) != 1 {
t.Errorf("pod of deployment %s not found", deploy.Name)
}
return &pods[0]
}

func writePodData(t testing.TB, pod *corev1.Pod) {
assert.Eventually(t, func() bool {
_, _, err := podExec(e2e.cfg, pod, fmt.Sprintf("echo %s > /containerdata", t.Name()))
return err == nil
}, time.Second*10, time.Second)
}

func readPodData(t testing.TB, pod *corev1.Pod) (string, error) {
data, _, err := podExec(e2e.cfg, pod, "cat /containerdata")
return data, err
}

func readPodDataEventually(t testing.TB, pod *corev1.Pod) (string, error) {
var data string
var err error
if !assert.Eventually(t, func() bool {
data, err = readPodData(t, pod)
return err == nil
}, time.Second*10, time.Second) {
return "", err
}
return data, nil
}

func defaultBeforeMigration(t *testing.T) {
assert.Eventually(t, func() bool {
if err := freezerWrite(t.Name(), e2e.port); err != nil {
Expand All @@ -161,28 +213,17 @@ func defaultBeforeMigration(t *testing.T) {
}, time.Second*10, time.Second)
}

func defaultAfterMigration(t *testing.T) {
func defaultAfterMigration(t *testing.T, _ testCase) {
f, err := freezerRead(e2e.port)
require.NoError(t, err)
t.Logf("freeze duration: %s", f.LastFreezeDuration)
assert.Equal(t, t.Name(), f.Data, "freezer memory has persisted migration")
assert.Less(t, f.LastFreezeDuration, time.Second, "freeze duration")
}

func nonLiveBeforeMigration(t *testing.T) {
defaultBeforeMigration(t)
require.Eventually(t, func() bool {
pods := podsOfDeployment(t, context.Background(), e2e.client, freezerDeployment("same-node-migration", "default", 1))
if len(pods) == 0 {
return false
}
return pods[0].Labels[path.Join(manager.StatusLabelKeyPrefix, "freezer")] == shimv1.ContainerPhase_SCALED_DOWN.String()
}, time.Second*30, time.Second, "container is scaled down before migration")
}

func nonLiveAfterMigration(t *testing.T) {
func nonLiveAfterMigration(t *testing.T, tc testCase) {
require.Never(t, func() bool {
pods := podsOfDeployment(t, context.Background(), e2e.client, freezerDeployment("same-node-migration", "default", 1))
pods := podsOfDeployment(t, e2e.client, tc.deploy)
if len(pods) == 0 {
return true
}
Expand All @@ -192,5 +233,5 @@ func nonLiveAfterMigration(t *testing.T) {
}
return status != shimv1.ContainerPhase_SCALED_DOWN.String()
}, time.Second*30, time.Second, "container is scaled down after migration")
defaultAfterMigration(t)
defaultAfterMigration(t, tc)
}
Loading