diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/SandboxManager.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/SandboxManager.kt index c98ee38b7..b80589447 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/SandboxManager.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/SandboxManager.kt @@ -19,6 +19,8 @@ package com.alibaba.opensandbox.sandbox import com.alibaba.opensandbox.sandbox.config.ConnectionConfig import com.alibaba.opensandbox.sandbox.domain.exceptions.InvalidArgumentException import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxException +import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxReadyTimeoutException +import com.alibaba.opensandbox.sandbox.domain.exceptions.SnapshotFailedException import com.alibaba.opensandbox.sandbox.domain.models.diagnostics.DiagnosticContent import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.PagedSandboxInfos import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.PagedSnapshotInfos @@ -27,6 +29,7 @@ import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxInfo import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxRenewResponse import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SnapshotFilter import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SnapshotInfo +import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SnapshotState import com.alibaba.opensandbox.sandbox.domain.services.Diagnostics import com.alibaba.opensandbox.sandbox.domain.services.Sandboxes import com.alibaba.opensandbox.sandbox.infrastructure.factory.AdapterFactory @@ -224,6 +227,80 @@ class SandboxManager internal constructor( fun deleteSnapshot(snapshotId: String) = sandboxService.deleteSnapshot(snapshotId) + /** + * Waits for a snapshot to reach the [SnapshotState.READY] state, polling at a fixed interval. + * + * Snapshot creation is asynchronous: [createSnapshot] returns as soon as the snapshot record + * exists (typically in the [SnapshotState.CREATING] state). This helper polls [getSnapshot] + * until the snapshot becomes ready, fails, or the timeout elapses. + * + * @param snapshotId Unique identifier of the snapshot to wait for + * @param timeout Maximum time to wait for the snapshot to become ready. Defaults to 900s to + * cover the server's `snapshot_create_timeout_seconds` (Kubernetes deployments may take up to + * the controller `commitJobTimeout`, 10m by default, before a snapshot is Ready or Failed) + * @param pollingInterval Time between successive [getSnapshot] polls + * @return The ready [SnapshotInfo] + * @throws SnapshotFailedException if the snapshot reaches the [SnapshotState.FAILED] state + * @throws SandboxReadyTimeoutException if the snapshot is not ready within [timeout] + * @throws InvalidArgumentException if [pollingInterval] is not positive + */ + @JvmOverloads + fun waitForSnapshotReady( + snapshotId: String, + timeout: Duration = Duration.ofSeconds(900), + pollingInterval: Duration = Duration.ofSeconds(2), + ): SnapshotInfo { + if (pollingInterval.isNegative || pollingInterval.isZero) { + throw InvalidArgumentException("Polling interval must be positive, got: $pollingInterval") + } + logger.info("Waiting for snapshot {} to become ready (timeout: {}s)", snapshotId, timeout.seconds) + + val deadline = System.currentTimeMillis() + timeout.toMillis() + var attempt = 0 + while (true) { + // Enforce the deadline before each poll so a snapshot that only turns Ready after the + // timeout is reported as a timeout rather than a late success. + if (System.currentTimeMillis() >= deadline) { + throw SandboxReadyTimeoutException( + "Snapshot $snapshotId did not become ready within ${timeout.seconds}s ($attempt attempts)", + ) + } + attempt++ + val snapshot = getSnapshot(snapshotId) + when (snapshot.status.state) { + SnapshotState.READY -> { + // getSnapshot itself may block past the deadline on a slow server; only accept + // READY if we are still within the timeout, otherwise surface a timeout. + if (System.currentTimeMillis() >= deadline) { + throw SandboxReadyTimeoutException( + "Snapshot $snapshotId did not become ready within ${timeout.seconds}s ($attempt attempts)", + ) + } + logger.info("Snapshot {} is ready after {} attempts", snapshotId, attempt) + return snapshot + } + SnapshotState.FAILED -> { + val detail = snapshot.status.message ?: snapshot.status.reason ?: "no detail provided" + throw SnapshotFailedException("Snapshot $snapshotId failed: $detail") + } + else -> + logger.debug( + "Snapshot {} not ready yet (state: {}, attempt #{})", + snapshotId, + snapshot.status.state, + attempt, + ) + } + + // Sleep for at most the remaining window so we keep polling until the real deadline + // instead of giving up a full interval early, and never sleep past it. + val remaining = deadline - System.currentTimeMillis() + if (remaining > 0) { + Thread.sleep(minOf(pollingInterval.toMillis(), remaining)) + } + } + } + /** * Closes this resource, relinquishing any underlying resources. * diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt index 25092de02..0732f4b66 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/exceptions/SandboxException.kt @@ -92,6 +92,18 @@ class SandboxReadyTimeoutException( error = SandboxError(SandboxError.READY_TIMEOUT, message), ) +/** + * Thrown when a snapshot reaches the `Failed` state while waiting for it to become ready. + */ +class SnapshotFailedException( + message: String? = null, + cause: Throwable? = null, +) : SandboxException( + message = message, + cause = cause, + error = SandboxError(SandboxError.SNAPSHOT_FAILED, message), + ) + /** * Thrown when an invalid argument is provided to an SDK method. * Similar to [IllegalArgumentException] but within the SDK's exception hierarchy. @@ -180,6 +192,9 @@ data class SandboxError( const val INVALID_ARGUMENT = "INVALID_ARGUMENT" const val UNEXPECTED_RESPONSE = "UNEXPECTED_RESPONSE" + /** A snapshot reached the `Failed` state while waiting for it to become ready. */ + const val SNAPSHOT_FAILED = "SNAPSHOT_FAILED" + /** The requested file or directory does not exist (server responds with HTTP 404). */ const val FILE_NOT_FOUND = "FILE_NOT_FOUND" diff --git a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/sandboxes/SandboxModels.kt b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/sandboxes/SandboxModels.kt index 69a0ca419..cbc547ba8 100644 --- a/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/sandboxes/SandboxModels.kt +++ b/sdks/sandbox/kotlin/sandbox/src/main/kotlin/com/alibaba/opensandbox/sandbox/domain/models/sandboxes/SandboxModels.kt @@ -753,6 +753,30 @@ class SandboxCreateResponse( val platform: PlatformSpec? = null, ) +/** + * Lifecycle state of a snapshot. + * + * Common state values: + * - Creating: Snapshot is being captured from the sandbox + * - Ready: Snapshot has been captured and can be used to restore a sandbox + * - Failed: Snapshot capture encountered a critical error + * - Deleting: Snapshot is being deleted + * + * State transitions: + * - Creating → Ready (capture completes successfully) + * - Creating → Failed (on error) + * + * Note: New state values may be added in future versions. + * Clients should handle unknown state values gracefully. + */ +object SnapshotState { + const val CREATING = "Creating" + const val READY = "Ready" + const val FAILED = "Failed" + const val DELETING = "Deleting" + const val UNKNOWN = "Unknown" +} + class SnapshotStatus( val state: String, val reason: String?, diff --git a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/SandboxManagerTest.kt b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/SandboxManagerTest.kt index f2a3210d8..f96039ea9 100644 --- a/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/SandboxManagerTest.kt +++ b/sdks/sandbox/kotlin/sandbox/src/test/kotlin/com/alibaba/opensandbox/sandbox/SandboxManagerTest.kt @@ -16,6 +16,9 @@ package com.alibaba.opensandbox.sandbox +import com.alibaba.opensandbox.sandbox.domain.exceptions.InvalidArgumentException +import com.alibaba.opensandbox.sandbox.domain.exceptions.SandboxReadyTimeoutException +import com.alibaba.opensandbox.sandbox.domain.exceptions.SnapshotFailedException import com.alibaba.opensandbox.sandbox.domain.models.diagnostics.DiagnosticContent import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.PagedSandboxInfos import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.PaginationInfo @@ -25,6 +28,9 @@ import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxInfo import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxRenewResponse import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxState import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SandboxStatus +import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SnapshotInfo +import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SnapshotState +import com.alibaba.opensandbox.sandbox.domain.models.sandboxes.SnapshotStatus import com.alibaba.opensandbox.sandbox.domain.services.Diagnostics import com.alibaba.opensandbox.sandbox.domain.services.Sandboxes import io.mockk.Runs @@ -36,6 +42,7 @@ import io.mockk.mockk import io.mockk.verify import org.junit.jupiter.api.Assertions.assertEquals import org.junit.jupiter.api.Assertions.assertSame +import org.junit.jupiter.api.Assertions.assertThrows import org.junit.jupiter.api.BeforeEach import org.junit.jupiter.api.Test import org.junit.jupiter.api.extension.ExtendWith @@ -190,4 +197,107 @@ class SandboxManagerTest { verify { httpClientProvider.close() } } + + private fun snapshot(state: String): SnapshotInfo = + SnapshotInfo( + id = "snapshot-id", + sandboxId = "sandbox-id", + name = "snap", + status = SnapshotStatus(state = state, reason = null, message = null, lastTransitionAt = null), + createdAt = OffsetDateTime.now(), + ) + + @Test + fun `waitForSnapshotReady returns once the snapshot becomes ready`() { + val sequence = listOf(snapshot(SnapshotState.CREATING), snapshot(SnapshotState.READY)) + var index = 0 + every { sandboxService.getSnapshot("snapshot-id") } answers { sequence[index++] } + + val result = + sandboxManager.waitForSnapshotReady( + "snapshot-id", + Duration.ofSeconds(5), + Duration.ofMillis(10), + ) + + assertEquals(SnapshotState.READY, result.status.state) + verify(exactly = 2) { sandboxService.getSnapshot("snapshot-id") } + } + + @Test + fun `waitForSnapshotReady throws SnapshotFailedException when the snapshot fails`() { + every { sandboxService.getSnapshot("snapshot-id") } returns snapshot(SnapshotState.FAILED) + + assertThrows(SnapshotFailedException::class.java) { + sandboxManager.waitForSnapshotReady("snapshot-id", Duration.ofSeconds(5), Duration.ofMillis(10)) + } + } + + @Test + fun `waitForSnapshotReady throws SandboxReadyTimeoutException when it never becomes ready`() { + every { sandboxService.getSnapshot("snapshot-id") } returns snapshot(SnapshotState.CREATING) + + assertThrows(SandboxReadyTimeoutException::class.java) { + sandboxManager.waitForSnapshotReady("snapshot-id", Duration.ofMillis(30), Duration.ofMillis(10)) + } + } + + @Test + fun `waitForSnapshotReady rejects a non-positive polling interval`() { + assertThrows(InvalidArgumentException::class.java) { + sandboxManager.waitForSnapshotReady("snapshot-id", Duration.ofSeconds(5), Duration.ZERO) + } + } + + @Test + fun `waitForSnapshotReady keeps polling within the window instead of giving up early`() { + // Several non-ready polls within a generous window must not trigger a premature timeout. + val sequence = + listOf( + snapshot(SnapshotState.CREATING), + snapshot(SnapshotState.CREATING), + snapshot(SnapshotState.READY), + ) + var index = 0 + every { sandboxService.getSnapshot("snapshot-id") } answers { sequence[index++] } + + val result = + sandboxManager.waitForSnapshotReady( + "snapshot-id", + Duration.ofSeconds(1), + Duration.ofMillis(20), + ) + + assertEquals(SnapshotState.READY, result.status.state) + verify(exactly = 3) { sandboxService.getSnapshot("snapshot-id") } + } + + @Test + fun `waitForSnapshotReady does not accept a snapshot that turns ready only after the deadline`() { + // The interval (100ms) outlasts the timeout (80ms): after the single sleep the deadline has + // passed, so the late READY must be rejected with a timeout rather than returned as success. + val sequence = listOf(snapshot(SnapshotState.CREATING), snapshot(SnapshotState.READY)) + var index = 0 + every { sandboxService.getSnapshot("snapshot-id") } answers { sequence[index++] } + + assertThrows(SandboxReadyTimeoutException::class.java) { + sandboxManager.waitForSnapshotReady("snapshot-id", Duration.ofMillis(80), Duration.ofMillis(100)) + } + } + + @Test + fun `waitForSnapshotReady rejects a READY response that blocks past the deadline`() { + // Each poll blocks ~80ms; with a 100ms timeout the READY response is produced only after the + // deadline elapses, so it must surface as a timeout rather than a late success. + val sequence = listOf(snapshot(SnapshotState.CREATING), snapshot(SnapshotState.READY)) + var index = 0 + every { sandboxService.getSnapshot("snapshot-id") } answers { + Thread.sleep(80) + sequence[index++] + } + + assertThrows(SandboxReadyTimeoutException::class.java) { + sandboxManager.waitForSnapshotReady("snapshot-id", Duration.ofMillis(100), Duration.ofMillis(10)) + } + } }