feat: GA Context Cache Python SDK

happy-qiao · copybara-github · commit 6bad0eecd89a · 2025-01-23T12:05:01.000-08:00
FUTURE_COPYBARA_INTEGRATE_REVIEW=#4861 from googleapis:release-please--branches--main 039f2cb PiperOrigin-RevId: 718931779
diff --git a/google/cloud/aiplatform/compat/__init__.py b/google/cloud/aiplatform/compat/__init__.py
@@ -181,8 +181,7 @@
         services.featurestore_online_serving_service_client_v1
     )
     services.featurestore_service_client = services.featurestore_service_client_v1
-    # TODO(b/342585299): Temporary code. Switch to v1 once v1 is available.
-    services.gen_ai_cache_service_client = services.gen_ai_cache_service_client_v1beta1
+    services.gen_ai_cache_service_client = services.gen_ai_cache_service_client_v1
     services.job_service_client = services.job_service_client_v1
     services.model_garden_service_client = services.model_garden_service_client_v1
     services.model_service_client = services.model_service_client_v1
@@ -203,8 +202,7 @@
     types.annotation_spec = types.annotation_spec_v1
     types.artifact = types.artifact_v1
     types.batch_prediction_job = types.batch_prediction_job_v1
-    # TODO(b/342585299): Temporary code. Switch to v1 once v1 is available.
-    types.cached_content = types.cached_content_v1beta1
+    types.cached_content = types.cached_content_v1
     types.completion_stats = types.completion_stats_v1
     types.context = types.context_v1
     types.custom_job = types.custom_job_v1
diff --git a/google/cloud/aiplatform/compat/services/__init__.py b/google/cloud/aiplatform/compat/services/__init__.py
@@ -137,6 +137,9 @@
 from google.cloud.aiplatform_v1.services.featurestore_service import (
     client as featurestore_service_client_v1,
 )
+from google.cloud.aiplatform_v1.services.gen_ai_cache_service import (
+    client as gen_ai_cache_service_client_v1,
+)
 from google.cloud.aiplatform_v1.services.index_service import (
     client as index_service_client_v1,
 )
diff --git a/google/cloud/aiplatform/compat/types/__init__.py b/google/cloud/aiplatform/compat/types/__init__.py
@@ -118,6 +118,7 @@
     annotation_spec as annotation_spec_v1,
     artifact as artifact_v1,
     batch_prediction_job as batch_prediction_job_v1,
+    cached_content as cached_content_v1,
     completion_stats as completion_stats_v1,
     context as context_v1,
     custom_job as custom_job_v1,
diff --git a/google/cloud/aiplatform/utils/__init__.py b/google/cloud/aiplatform/utils/__init__.py
@@ -77,6 +77,7 @@
     feature_registry_service_client_v1,
     featurestore_online_serving_service_client_v1,
     featurestore_service_client_v1,
+    gen_ai_cache_service_client_v1,
     index_service_client_v1,
     index_endpoint_service_client_v1,
     job_service_client_v1,
@@ -805,8 +806,7 @@ class GenAiCacheServiceClientWithOverride(ClientWithOverride):
     _version_map = (
         (
             compat.V1,
-            # TODO(b/342585299): Temporary code. Switch to v1 once v1 is available.
-            gen_ai_cache_service_client_v1beta1.GenAiCacheServiceClient,
+            gen_ai_cache_service_client_v1.GenAiCacheServiceClient,
         ),
         (
             compat.V1BETA1,
diff --git a/tests/unit/vertexai/test_caching.py b/tests/unit/vertexai/test_caching.py
@@ -22,7 +22,7 @@
 import json
 import mock
 import pytest
-from vertexai.preview import caching
+from vertexai.caching import _caching
 from google.cloud.aiplatform import initializer
 import vertexai
 from google.cloud.aiplatform_v1beta1.types.cached_content import (
@@ -35,9 +35,16 @@
 from google.cloud.aiplatform_v1beta1.types.tool import (
     ToolConfig as GapicToolConfig,
 )
-from google.cloud.aiplatform_v1beta1.services import (
+from google.cloud.aiplatform_v1.services import (
     gen_ai_cache_service,
 )
+from vertexai.generative_models._generative_models import (
+    Content,
+    PartsType,
+    Tool,
+    ToolConfig,
+    ContentsType,
+)
 
 
 _TEST_PROJECT = "test-project"
@@ -141,7 +148,7 @@ def list_cached_contents(self, request):
 
 @pytest.mark.usefixtures("google_auth_mock")
 class TestCaching:
-    """Unit tests for caching.CachedContent."""
+    """Unit tests for _caching.CachedContent."""
 
     def setup_method(self):
         vertexai.init(
@@ -156,7 +163,7 @@ def test_constructor_with_full_resource_name(self, mock_get_cached_content):
         full_resource_name = (
             "projects/123/locations/europe-west1/cachedContents/contents-id"
         )
-        cache = caching.CachedContent(
+        cache = _caching.CachedContent(
             cached_content_name=full_resource_name,
         )
 
@@ -166,7 +173,7 @@ def test_constructor_with_full_resource_name(self, mock_get_cached_content):
     def test_constructor_with_only_content_id(self, mock_get_cached_content):
         partial_resource_name = "contents-id"
 
-        cache = caching.CachedContent(
+        cache = _caching.CachedContent(
             cached_content_name=partial_resource_name,
         )
 
@@ -179,7 +186,7 @@ def test_constructor_with_only_content_id(self, mock_get_cached_content):
     def test_get_with_content_id(self, mock_get_cached_content):
         partial_resource_name = "contents-id"
 
-        cache = caching.CachedContent.get(
+        cache = _caching.CachedContent.get(
             cached_content_name=partial_resource_name,
         )
 
@@ -192,7 +199,7 @@ def test_get_with_content_id(self, mock_get_cached_content):
     def test_create_with_real_payload(
         self, mock_create_cached_content, mock_get_cached_content
     ):
-        cache = caching.CachedContent.create(
+        cache = _caching.CachedContent.create(
             model_name="model-name",
             system_instruction=GapicContent(
                 role="system", parts=[GapicPart(text="system instruction")]
@@ -219,7 +226,7 @@ def test_create_with_real_payload(
     def test_create_with_real_payload_and_wrapped_type(
         self, mock_create_cached_content, mock_get_cached_content
     ):
-        cache = caching.CachedContent.create(
+        cache = _caching.CachedContent.create(
             model_name="model-name",
             system_instruction="Please answer my questions with cool",
             tools=[],
@@ -239,15 +246,15 @@ def test_create_with_real_payload_and_wrapped_type(
         assert cache.display_name == _TEST_DISPLAY_NAME
 
     def test_list(self, mock_list_cached_contents):
-        cached_contents = caching.CachedContent.list()
+        cached_contents = _caching.CachedContent.list()
         for i, cached_content in enumerate(cached_contents):
             assert cached_content.name == f"cached_content{i + 1}_from_list_request"
             assert cached_content.model_name == f"model-name{i + 1}"
 
     def test_print_a_cached_content(
         self, mock_create_cached_content, mock_get_cached_content
     ):
-        cached_content = caching.CachedContent.create(
+        cached_content = _caching.CachedContent.create(
             model_name="model-name",
             system_instruction="Please answer my questions with cool",
             tools=[],
diff --git a/tests/unit/vertexai/test_generative_models.py b/tests/unit/vertexai/test_generative_models.py
@@ -39,14 +39,14 @@
     gapic_content_types,
     gapic_tool_types,
 )
-from google.cloud.aiplatform_v1beta1.types.cached_content import (
+from google.cloud.aiplatform_v1.types.cached_content import (
     CachedContent as GapicCachedContent,
 )
-from google.cloud.aiplatform_v1beta1.services import (
+from google.cloud.aiplatform_v1.services import (
     gen_ai_cache_service,
 )
 from vertexai.generative_models import _function_calling_utils
-from vertexai.preview import caching
+from vertexai.caching import _caching
 
 
 _TEST_PROJECT = "test-project"
@@ -655,11 +655,11 @@ def test_generative_model_from_cached_content(
         project_location_prefix = (
             f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/"
         )
-        cached_content = caching.CachedContent(
+        cached_content = _caching.CachedContent(
             "cached-content-id-in-from-cached-content-test"
         )
 
-        model = preview_generative_models.GenerativeModel.from_cached_content(
+        model = generative_models.GenerativeModel.from_cached_content(
             cached_content=cached_content
         )
 
@@ -690,7 +690,7 @@ def test_generative_model_from_cached_content_with_resource_name(
             f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/"
         )
 
-        model = preview_generative_models.GenerativeModel.from_cached_content(
+        model = generative_models.GenerativeModel.from_cached_content(
             cached_content="cached-content-id-in-from-cached-content-test"
         )
 
@@ -848,7 +848,7 @@ def test_generate_content(
         assert response5.text
 
     @mock.patch.object(
-        target=prediction_service.PredictionServiceClient,
+        target=prediction_service_v1.PredictionServiceClient,
         attribute="generate_content",
         new=lambda self, request: gapic_prediction_service_types.GenerateContentResponse(
             candidates=[
@@ -870,11 +870,11 @@ def test_generate_content_with_cached_content(
         self,
         mock_get_cached_content_fixture,
     ):
-        cached_content = caching.CachedContent(
+        cached_content = _caching.CachedContent(
             "cached-content-id-in-from-cached-content-test"
         )
 
-        model = preview_generative_models.GenerativeModel.from_cached_content(
+        model = generative_models.GenerativeModel.from_cached_content(
             cached_content=cached_content
         )
 
diff --git a/vertexai/caching/__init__.py b/vertexai/caching/__init__.py
@@ -0,0 +1,25 @@
+# Copyright 2025 Google LLC
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+#
+"""Classes for working with the Gemini models."""
+
+# We just want to re-export certain classes
+# pylint: disable=g-multiple-import,g-importing-member
+from vertexai.caching._caching import (
+    CachedContent,
+)
+
+__all__ = [
+    "CachedContent",
+]
diff --git a/vertexai/caching/_caching.py b/vertexai/caching/_caching.py
@@ -24,7 +24,7 @@
 from google.cloud.aiplatform.compat.types import (
     cached_content_v1beta1 as gca_cached_content,
 )
-from google.cloud.aiplatform_v1beta1.services import gen_ai_cache_service
+from google.cloud.aiplatform_v1.services import gen_ai_cache_service as gen_ai_cache_service_v1
 from google.cloud.aiplatform_v1beta1.types.cached_content import (
     CachedContent as GapicCachedContent,
 )
@@ -36,6 +36,7 @@
     GetCachedContentRequest,
     UpdateCachedContentRequest,
 )
+from google.cloud.aiplatform_v1 import types as types_v1
 from vertexai.generative_models import _generative_models
 from vertexai.generative_models._generative_models import (
     Content,
@@ -89,7 +90,7 @@ def _prepare_create_request(
     if ttl and expire_time:
         raise ValueError("Only one of ttl and expire_time can be set.")
 
-    request = CreateCachedContentRequest(
+    request_v1beta1 = CreateCachedContentRequest(
         parent=f"projects/{project}/locations/{location}",
         cached_content=GapicCachedContent(
             model=model_name,
@@ -102,11 +103,32 @@ def _prepare_create_request(
             display_name=display_name,
         ),
     )
-    return request
+    serialized_message_v1beta1 = type(request_v1beta1).serialize(request_v1beta1)
+    try:
+        request_v1 = types_v1.CreateCachedContentRequest.deserialize(
+            serialized_message_v1beta1
+        )
+    except Exception as ex:
+        raise ValueError(
+            "Failed to convert CreateCachedContentRequest from v1beta1 to v1:\n"
+            f"{serialized_message_v1beta1}"
+        ) from ex
+    return request_v1
 
 
 def _prepare_get_cached_content_request(name: str) -> GetCachedContentRequest:
-    return GetCachedContentRequest(name=name)
+    request_v1beta1 = GetCachedContentRequest(name=name)
+    serialized_message_v1beta1 = type(request_v1beta1).serialize(request_v1beta1)
+    try:
+        request_v1 = types_v1.GetCachedContentRequest.deserialize(
+            serialized_message_v1beta1
+        )
+    except Exception as ex:
+        raise ValueError(
+            "Failed to convert GetCachedContentRequest from v1beta1 to v1:\n"
+            f"{serialized_message_v1beta1}"
+        ) from ex
+    return request_v1
 
 
 class CachedContent(aiplatform_base._VertexAiResourceNounPlus):
@@ -122,7 +144,7 @@ class CachedContent(aiplatform_base._VertexAiResourceNounPlus):
     client_class = aiplatform_utils.GenAiCacheServiceClientWithOverride
 
     _gen_ai_cache_service_client_value: Optional[
-        gen_ai_cache_service.GenAiCacheServiceClient
+        gen_ai_cache_service_v1.GenAiCacheServiceClient
     ] = None
 
     def __init__(self, cached_content_name: str):
@@ -253,15 +275,25 @@ def update(
             update_mask.append("expire_time")
 
         update_mask = field_mask_pb2.FieldMask(paths=update_mask)
-        request = UpdateCachedContentRequest(
+        request_v1beta1 = UpdateCachedContentRequest(
             cached_content=GapicCachedContent(
                 name=self.resource_name,
                 expire_time=expire_time,
                 ttl=ttl,
             ),
             update_mask=update_mask,
         )
-        self.api_client.update_cached_content(request)
+        serialized_message_v1beta1 = type(request_v1beta1).serialize(request_v1beta1)
+        try:
+            request_v1 = types_v1.UpdateCachedContentRequest.deserialize(
+                serialized_message_v1beta1
+            )
+        except Exception as ex:
+            raise ValueError(
+                "Failed to convert UpdateCachedContentRequest from v1beta1 to v1:\n"
+                f"{serialized_message_v1beta1}"
+            ) from ex
+        self.api_client.update_cached_content(request_v1)
 
     @property
     def expire_time(self) -> datetime.datetime:
diff --git a/vertexai/generative_models/_generative_models.py b/vertexai/generative_models/_generative_models.py

Original file line number	Diff line number	Diff line change
`@@ -137,6 +137,9 @@`
`137`	`137`	`from google.cloud.aiplatform_v1.services.featurestore_service import (`
`138`	`138`	`client as featurestore_service_client_v1,`
`139`	`139`	`)`
	`140`	`+from google.cloud.aiplatform_v1.services.gen_ai_cache_service import (`
	`141`	`+ client as gen_ai_cache_service_client_v1,`
	`142`	`+)`
`140`	`143`	`from google.cloud.aiplatform_v1.services.index_service import (`
`141`	`144`	`client as index_service_client_v1,`
`142`	`145`	`)`
Original file line number	Diff line number	Diff line change
`@@ -39,14 +39,14 @@`
`39`	`39`	`gapic_content_types,`
`40`	`40`	`gapic_tool_types,`
`41`	`41`	`)`
`42`		`-from google.cloud.aiplatform_v1beta1.types.cached_content import (`
	`42`	`+from google.cloud.aiplatform_v1.types.cached_content import (`
`43`	`43`	`CachedContent as GapicCachedContent,`
`44`	`44`	`)`
`45`		`-from google.cloud.aiplatform_v1beta1.services import (`
	`45`	`+from google.cloud.aiplatform_v1.services import (`
`46`	`46`	`gen_ai_cache_service,`
`47`	`47`	`)`
`48`	`48`	`from vertexai.generative_models import _function_calling_utils`
`49`		`-from vertexai.preview import caching`
	`49`	`+from vertexai.caching import _caching`
`50`	`50`
`51`	`51`
`52`	`52`	`_TEST_PROJECT = "test-project"`
`@@ -655,11 +655,11 @@ def test_generative_model_from_cached_content(`
`655`	`655`	`project_location_prefix = (`
`656`	`656`	`f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/"`
`657`	`657`	`)`
`658`		`- cached_content = caching.CachedContent(`
	`658`	`+ cached_content = _caching.CachedContent(`
`659`	`659`	`"cached-content-id-in-from-cached-content-test"`
`660`	`660`	`)`
`661`	`661`
`662`		`- model = preview_generative_models.GenerativeModel.from_cached_content(`
	`662`	`+ model = generative_models.GenerativeModel.from_cached_content(`
`663`	`663`	`cached_content=cached_content`
`664`	`664`	`)`
`665`	`665`
`@@ -690,7 +690,7 @@ def test_generative_model_from_cached_content_with_resource_name(`
`690`	`690`	`f"projects/{_TEST_PROJECT}/locations/{_TEST_LOCATION}/"`
`691`	`691`	`)`
`692`	`692`
`693`		`- model = preview_generative_models.GenerativeModel.from_cached_content(`
	`693`	`+ model = generative_models.GenerativeModel.from_cached_content(`
`694`	`694`	`cached_content="cached-content-id-in-from-cached-content-test"`
`695`	`695`	`)`
`696`	`696`
`@@ -848,7 +848,7 @@ def test_generate_content(`
`848`	`848`	`assert response5.text`
`849`	`849`
`850`	`850`	`@mock.patch.object(`
`851`		`- target=prediction_service.PredictionServiceClient,`
	`851`	`+ target=prediction_service_v1.PredictionServiceClient,`
`852`	`852`	`attribute="generate_content",`
`853`	`853`	`new=lambda self, request: gapic_prediction_service_types.GenerateContentResponse(`
`854`	`854`	`candidates=[`
`@@ -870,11 +870,11 @@ def test_generate_content_with_cached_content(`
`870`	`870`	`self,`
`871`	`871`	`mock_get_cached_content_fixture,`
`872`	`872`	`):`
`873`		`- cached_content = caching.CachedContent(`
	`873`	`+ cached_content = _caching.CachedContent(`
`874`	`874`	`"cached-content-id-in-from-cached-content-test"`
`875`	`875`	`)`
`876`	`876`
`877`		`- model = preview_generative_models.GenerativeModel.from_cached_content(`
	`877`	`+ model = generative_models.GenerativeModel.from_cached_content(`
`878`	`878`	`cached_content=cached_content`
`879`	`879`	`)`
`880`	`880`