feat: [texttospeech] StreamingSynthesize now supports opus (#5926)

* feat: StreamingSynthesize now supports opus PiperOrigin-RevId: 712905945 Source-Link: googleapis/googleapis@bd72915 Source-Link: googleapis/googleapis-gen@fc8216c Copy-Tag: eyJwIjoicGFja2FnZXMvZ29vZ2xlLWNsb3VkLXRleHR0b3NwZWVjaC8uT3dsQm90LnlhbWwiLCJoIjoiZmM4MjE2Y2ZkOGEzMjcxM2Y1MjAyZmViMzJmMTliOTI3YTEzOTljNCJ9 * 🦉 Updates from OwlBot post-processor See https://github.com/googleapis/repo-automation-bots/blob/main/packages/owl-bot/README.md --------- Co-authored-by: Owl Bot <gcf-owl-bot[bot]@users.noreply.github.com> Co-authored-by: d-goog <[email protected]>
googleapis · Jan 11, 2025 · d8d7c9a · d8d7c9a
1 parent 2838dd5
commit d8d7c9a
Show file tree

Hide file tree

Showing 4 changed files with 466 additions and 3 deletions.
diff --git a/packages/google-cloud-texttospeech/protos/google/cloud/texttospeech/v1/cloud_tts.proto b/packages/google-cloud-texttospeech/protos/google/cloud/texttospeech/v1/cloud_tts.proto
@@ -112,6 +112,11 @@ enum AudioEncoding {
   // 8-bit samples that compand 14-bit audio samples using G.711 PCMU/A-law.
   // Audio content returned as ALAW also contains a WAV header.
   ALAW = 6;
+
+  // Uncompressed 16-bit signed little-endian samples (Linear PCM).
+  // Note that as opposed to LINEAR16, audio will not be wrapped in a WAV (or
+  // any other) header.
+  PCM = 7;
 }
 
 // The top-level message sent by the client for the `ListVoices` method.
@@ -400,10 +405,25 @@ message SynthesizeSpeechResponse {
   bytes audio_content = 1;
 }
 
+// Description of the desired output audio data.
+message StreamingAudioConfig {
+  // Required. The format of the audio byte stream.
+  // For now, streaming only supports PCM and OGG_OPUS. All other encodings
+  // will return an error.
+  AudioEncoding audio_encoding = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. The synthesis sample rate (in hertz) for this audio.
+  int32 sample_rate_hertz = 2 [(google.api.field_behavior) = OPTIONAL];
+}
+
 // Provides configuration information for the StreamingSynthesize request.
 message StreamingSynthesizeConfig {
   // Required. The desired voice of the synthesized audio.
   VoiceSelectionParams voice = 1 [(google.api.field_behavior) = REQUIRED];
+
+  // Optional. The configuration of the synthesized audio.
+  StreamingAudioConfig streaming_audio_config = 4
+      [(google.api.field_behavior) = OPTIONAL];
 }
 
 // Input to be synthesized.

diff --git a/packages/google-cloud-texttospeech/protos/protos.d.ts b/packages/google-cloud-texttospeech/protos/protos.d.ts