1313# limitations under the License.
1414
1515import logging
16- from kubernetes import client
16+ import os
17+ import uuid
18+ from datetime import datetime
19+ from typing import Any
20+ from dataclasses import dataclass
21+ from kubernetes import client , watch
1722from kubernetes .client import ApiException
1823from ..sandbox_client import SandboxClient
1924from ..constants import (
2025 PODSNAPSHOT_API_GROUP ,
2126 PODSNAPSHOT_API_VERSION ,
2227 PODSNAPSHOT_API_KIND ,
28+ PODSNAPSHOTMANUALTRIGGER_API_KIND ,
29+ PODSNAPSHOTMANUALTRIGGER_PLURAL ,
2330)
2431
32+ SNAPSHOT_SUCCESS_CODE = 0
33+ SNAPSHOT_ERROR_CODE = 1
34+
2535logger = logging .getLogger (__name__ )
2636
2737
38+ @dataclass
39+ class SnapshotResult :
40+ """Result of a snapshot processing operation."""
41+
42+ snapshot_uid : str
43+ snapshot_timestamp : str
44+
45+
46+ @dataclass
47+ class SnapshotResponse :
48+ """Structured response for snapshot operations."""
49+
50+ success : bool
51+ trigger_name : str
52+ snapshot_uid : str
53+ error_reason : str
54+ error_code : int
55+
56+
2857class PodSnapshotSandboxClient (SandboxClient ):
2958 """
3059 A specialized Sandbox client for interacting with the GKE Pod Snapshot Controller.
31-
32- TODO: This class enables users to take a snapshot of their sandbox and restore from the taken snapshot.
60+ This class enables users to take a manual trigger snapshot of their sandbox and restore from the taken snapshot.
3361 """
3462
3563 def __init__ (
3664 self ,
3765 template_name : str ,
66+ podsnapshot_timeout : int = 180 ,
3867 ** kwargs ,
3968 ):
4069 super ().__init__ (template_name , ** kwargs )
4170
4271 self .snapshot_crd_installed = False
4372 self .core_v1_api = client .CoreV1Api ()
73+ self .podsnapshot_timeout = podsnapshot_timeout
74+
75+ self .created_manual_triggers = []
4476
4577 def __enter__ (self ) -> "PodSnapshotSandboxClient" :
4678 try :
@@ -61,7 +93,9 @@ def __enter__(self) -> "PodSnapshotSandboxClient":
6193
6294 def _check_snapshot_crd_installed (self ) -> bool :
6395 """
64- Checks if the PodSnapshot CRD is installed in the cluster.
96+ Checks if the PodSnapshot CRD is installed and available in the cluster.
97+ Returns:
98+ bool: True if the CRD is installed, False otherwise.
6599 """
66100
67101 if self .snapshot_crd_installed :
@@ -87,8 +121,172 @@ def _check_snapshot_crd_installed(self) -> bool:
87121 return False
88122 raise
89123
124+ def _parse_created_snapshot_info (self , obj : dict [str , Any ]) -> SnapshotResult :
125+ """Parses the object to extract snapshot details."""
126+ status = obj .get ("status" , {})
127+ conditions = status .get ("conditions" , [])
128+ for condition in conditions :
129+ if (
130+ condition .get ("type" ) == "Triggered"
131+ and condition .get ("status" ) == "True"
132+ and condition .get ("reason" ) == "Complete"
133+ ):
134+ snapshot_uid = status .get ("snapshotCreated" , {}).get ("name" )
135+ snapshot_timestamp = condition .get ("lastTransitionTime" )
136+ return SnapshotResult (
137+ snapshot_uid = snapshot_uid ,
138+ snapshot_timestamp = snapshot_timestamp ,
139+ )
140+ raise ValueError ("Snapshot is not yet complete." )
141+
142+ def _wait_for_snapshot_to_be_completed (
143+ self , trigger_name : str , resource_version : str | None = None
144+ ) -> SnapshotResult :
145+ """
146+ Waits for the PodSnapshotManualTrigger to be processed and returns SnapshotResult.
147+ """
148+ w = watch .Watch ()
149+ logger .info (
150+ f"Waiting for snapshot manual trigger '{ trigger_name } ' to be processed..."
151+ )
152+
153+ kwargs = {}
154+ if resource_version :
155+ kwargs ["resource_version" ] = resource_version
156+
157+ try :
158+ for event in w .stream (
159+ func = self .custom_objects_api .list_namespaced_custom_object ,
160+ namespace = self .namespace ,
161+ group = PODSNAPSHOT_API_GROUP ,
162+ version = PODSNAPSHOT_API_VERSION ,
163+ plural = PODSNAPSHOTMANUALTRIGGER_PLURAL ,
164+ field_selector = f"metadata.name={ trigger_name } " ,
165+ timeout_seconds = self .podsnapshot_timeout ,
166+ ** kwargs ,
167+ ):
168+ if event ["type" ] in ["ADDED" , "MODIFIED" ]:
169+ obj = event ["object" ]
170+ try :
171+ result = self ._parse_created_snapshot_info (obj )
172+ logger .info (
173+ f"Snapshot manual trigger '{ trigger_name } ' processed successfully. Created Snapshot UID: { result .snapshot_uid } "
174+ )
175+ return result
176+ except ValueError :
177+ # Continue watching if snapshot is not yet complete
178+ continue
179+ except Exception as e :
180+ logger .error (f"Error watching snapshot: { e } " )
181+ raise
182+ finally :
183+ w .stop ()
184+
185+ raise TimeoutError (
186+ f"Snapshot manual trigger '{ trigger_name } ' was not processed within { self .podsnapshot_timeout } seconds."
187+ )
188+
189+ def snapshot (self , trigger_name : str ) -> SnapshotResponse :
190+ """
191+ Triggers a snapshot of the specified pod by creating a PodSnapshotManualTrigger resource.
192+ The trigger_name will be suffixed with a timestamp and random hex string.
193+ Returns:
194+ SnapshotResponse: The result of the operation.
195+ """
196+ timestamp = datetime .now ().strftime ("%Y%m%d-%H%M%S" )
197+ suffix = uuid .uuid4 ().hex [:8 ]
198+ trigger_name = f"{ trigger_name } -{ timestamp } -{ suffix } "
199+
200+ if not self .snapshot_crd_installed :
201+ return SnapshotResponse (
202+ success = False ,
203+ trigger_name = trigger_name ,
204+ snapshot_uid = None ,
205+ error_reason = "Snapshot CRD is not installed. Ensure it is installed and running." ,
206+ error_code = SNAPSHOT_ERROR_CODE ,
207+ )
208+ if not self .pod_name :
209+ return SnapshotResponse (
210+ success = False ,
211+ trigger_name = trigger_name ,
212+ snapshot_uid = None ,
213+ error_reason = "Sandbox pod name not found. Ensure sandbox is created." ,
214+ error_code = SNAPSHOT_ERROR_CODE ,
215+ )
216+
217+ manifest = {
218+ "apiVersion" : f"{ PODSNAPSHOT_API_GROUP } /{ PODSNAPSHOT_API_VERSION } " ,
219+ "kind" : f"{ PODSNAPSHOTMANUALTRIGGER_API_KIND } " ,
220+ "metadata" : {"name" : trigger_name , "namespace" : self .namespace },
221+ "spec" : {"targetPod" : self .pod_name },
222+ }
223+
224+ try :
225+ created_obj = self .custom_objects_api .create_namespaced_custom_object (
226+ group = PODSNAPSHOT_API_GROUP ,
227+ version = PODSNAPSHOT_API_VERSION ,
228+ namespace = self .namespace ,
229+ plural = PODSNAPSHOTMANUALTRIGGER_PLURAL ,
230+ body = manifest ,
231+ )
232+ self .created_manual_triggers .append (trigger_name )
233+
234+ # Start watching from the version we just created to avoid missing updates
235+ resource_version = created_obj .get ("metadata" , {}).get ("resourceVersion" )
236+ snapshot_result = self ._wait_for_snapshot_to_be_completed (
237+ trigger_name , resource_version
238+ )
239+
240+ return SnapshotResponse (
241+ success = True ,
242+ trigger_name = trigger_name ,
243+ snapshot_uid = snapshot_result .snapshot_uid ,
244+ error_reason = "" ,
245+ error_code = SNAPSHOT_SUCCESS_CODE ,
246+ )
247+ except ApiException as e :
248+ logger .exception (
249+ f"Failed to create PodSnapshotManualTrigger '{ trigger_name } ': { e } "
250+ )
251+ return SnapshotResponse (
252+ success = False ,
253+ trigger_name = trigger_name ,
254+ snapshot_uid = None ,
255+ error_reason = f"Failed to create PodSnapshotManualTrigger: { e } " ,
256+ error_code = SNAPSHOT_ERROR_CODE ,
257+ )
258+ except TimeoutError as e :
259+ logger .exception (
260+ f"Snapshot creation timed out for trigger '{ trigger_name } ': { e } "
261+ )
262+ return SnapshotResponse (
263+ success = False ,
264+ trigger_name = trigger_name ,
265+ snapshot_uid = None ,
266+ error_reason = f"Snapshot creation timed out: { e } " ,
267+ error_code = SNAPSHOT_ERROR_CODE ,
268+ )
269+
90270 def __exit__ (self , exc_type , exc_val , exc_tb ):
91271 """
272+ Cleans up the PodSnapshotManualTrigger Resources.
92273 Automatically cleans up the Sandbox.
93274 """
275+ for trigger_name in self .created_manual_triggers :
276+ try :
277+ self .custom_objects_api .delete_namespaced_custom_object (
278+ group = PODSNAPSHOT_API_GROUP ,
279+ version = PODSNAPSHOT_API_VERSION ,
280+ namespace = self .namespace ,
281+ plural = PODSNAPSHOTMANUALTRIGGER_PLURAL ,
282+ name = trigger_name ,
283+ )
284+ logger .info (f"Deleted PodSnapshotManualTrigger '{ trigger_name } '" )
285+ except ApiException as e :
286+ if e .status == 404 :
287+ # Ignore if the resource is already deleted
288+ continue
289+ logger .error (
290+ f"Failed to delete PodSnapshotManualTrigger '{ trigger_name } ': { e } "
291+ )
94292 super ().__exit__ (exc_type , exc_val , exc_tb )
0 commit comments