diff --git a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/Language.java b/modules/watsonx-ai-core/src/main/java/com/ibm/watsonx/ai/core/Language.java similarity index 97% rename from modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/Language.java rename to modules/watsonx-ai-core/src/main/java/com/ibm/watsonx/ai/core/Language.java index 8377f970..349623dd 100644 --- a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/Language.java +++ b/modules/watsonx-ai-core/src/main/java/com/ibm/watsonx/ai/core/Language.java @@ -2,7 +2,7 @@ * Copyright IBM Corp. 2025 - 2025 * SPDX-License-Identifier: Apache-2.0 */ -package com.ibm.watsonx.ai.textprocessing; +package com.ibm.watsonx.ai.core; /** * Enum representing supported languages with their corresponding ISO 639 language codes. @@ -85,4 +85,5 @@ public enum Language { public String isoCode() { return isoCode; } + } diff --git a/modules/watsonx-ai-core/src/main/java/com/ibm/watsonx/ai/core/http/HttpRequestMultipartBody.java b/modules/watsonx-ai-core/src/main/java/com/ibm/watsonx/ai/core/http/HttpRequestMultipartBody.java new file mode 100644 index 00000000..91447d11 --- /dev/null +++ b/modules/watsonx-ai-core/src/main/java/com/ibm/watsonx/ai/core/http/HttpRequestMultipartBody.java @@ -0,0 +1,119 @@ +/* + * Copyright IBM Corp. 2025 - 2025 + * SPDX-License-Identifier: Apache-2.0 + */ +package com.ibm.watsonx.ai.core.http; + +import static java.util.Objects.isNull; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +/** + * Utility for constructing multipart/form-data HTTP request bodies. + *
+ * Example usage: + * + *
{@code
+ * HTTPRequestMultipartBody body = HTTPRequestMultipartBody.builder()
+ * .addPart("model", "openai/whisper-tiny")
+ * .addPart("language", "it")
+ * .addInputStream("file", is)
+ * .build();
+ *
+ * HttpRequest request = HttpRequest.newBuilder()
+ * .uri(URI.create(endpoint))
+ * .header("Content-Type", body.getContentType())
+ * .POST(BodyPublishers.ofByteArray(body.getBody()))
+ * .build();
+ * }
+ *
+ */
+public final class HttpRequestMultipartBody {
+
+ private static final String BOUNDARY = "----watsonx-ai-sdk";
+
+ private final byte[] bytes;
+
+ private HttpRequestMultipartBody(byte[] bytes) {
+ this.bytes = bytes;
+ }
+
+ public String boundary() {
+ return BOUNDARY;
+ }
+
+ public String contentType() {
+ return "multipart/form-data; boundary=" + BOUNDARY;
+ }
+
+ public byte[] body() {
+ return bytes;
+ }
+
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ public static final class Builder {
+ private static final String CRLF = "\r\n";
+ private final List+ * Example usage: + * + *
{@code
+ * TranscriptionRequest request =
+ * TranscriptionRequest.builder()
+ * .file("path-to-file")
+ * .language(Language.ITALIAN)
+ * .build();
+ * }
+ */
+public final class TranscriptionRequest extends WatsonxParameters {
+ private String modelId;
+ private final InputStream is;
+ private final String language;
+
+ private TranscriptionRequest(Builder builder) {
+ super(builder);
+ modelId = builder.modelId;
+ is = requireNonNull(builder.file, "file cannot be null");
+ language = isNull(builder.language) ? "en" : builder.language;
+ }
+
+ public String modelId() {
+ return modelId;
+ }
+
+ public InputStream inputStream() {
+ return is;
+ }
+
+ public String language() {
+ return language;
+ }
+
+ /**
+ * Returns a new {@link Builder} instance.
+ * + * Example usage: + * + *
{@code
+ * TranscriptionRequest request =
+ * TranscriptionRequest.builder()
+ * .file("path-to-file")
+ * .language(Language.ITALIAN)
+ * .build();
+ * }
+ *
+ * @return {@link Builder} instance.
+ */
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ /**
+ * Builder class for constructing {@link TranscriptionRequest} instances with configurable parameters.
+ */
+ public final static class Builder extends WatsonxParameters.Builder+ * If not provided, the default transcription language is English ({@code "en"}). + * + * @param language ISO language code representing the target transcription language + */ + public Builder language(Language language) { + return language(language.isoCode()); + } + + /** + * Sets the target transcription language. + *
+ * If not provided, the default transcription language is English ({@code "en"}). + * + * @param language ISO language code representing the target transcription language + */ + public Builder language(String language) { + this.language = language; + return this; + } + + /** + * Builds a {@link TranscriptionRequest} instance using the configured parameters. + * + * @return a new instance of {@link TranscriptionRequest} + */ + public TranscriptionRequest build() { + return new TranscriptionRequest(this); + } + } +} diff --git a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionRestClient.java b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionRestClient.java new file mode 100644 index 00000000..d89910d8 --- /dev/null +++ b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionRestClient.java @@ -0,0 +1,50 @@ +/* + * Copyright IBM Corp. 2025 - 2025 + * SPDX-License-Identifier: Apache-2.0 + */ +package com.ibm.watsonx.ai.transcription; + +import java.util.ServiceLoader; +import java.util.function.Supplier; +import com.ibm.watsonx.ai.WatsonxRestClient; + +/** + * Abstraction of a REST client for interacting with the IBM watsonx.ai Transcribe Audio APIs. + */ +public abstract class TranscriptionRestClient extends WatsonxRestClient { + + protected TranscriptionRestClient(Builder builder) { + super(builder); + } + + /** + * Sends an audio transcription request + * + * @param request the {@link TranscriptionRequest} containing all parameters for the transcription + * @return a {@link TranscriptionResult} containing the transcription output + */ + public abstract TranscriptionResult transcribe(TranscriptionRequest request); + + /** + * Creates a new {@link Builder} using the first available {@link TranscriptionRestClientBuilderFactory} discovered via {@link ServiceLoader}. + *
+ * If no factory is found, falls back to the default {@link DefaultRestClient}.
+ */
+ static TranscriptionRestClient.Builder builder() {
+ return ServiceLoader.load(TranscriptionRestClientBuilderFactory.class).findFirst()
+ .map(Supplier::get)
+ .orElse(DefaultRestClient.builder());
+ }
+
+ /**
+ * Builder abstract class for constructing {@link TranscriptionRestClient} instances with configurable parameters.
+ */
+ public abstract static class Builder extends WatsonxRestClient.Builder
+ * This allows frameworks to provide their own client implementations.
+ */
+ public interface TranscriptionRestClientBuilderFactory extends Supplier
+ * Example usage:
+ *
+ *
+ * Example usage:
+ *
+ * {@code
+ * TranscriptionService transcriptionService = TranscriptionService.builder()
+ * .baseUrl("https://...") // or use CloudRegion
+ * .apiKey("my-api-key") // creates an IBM Cloud Authenticator
+ * .projectId("my-project-id")
+ * .modelId("openai/whisper-tiny")
+ * .build();
+ *
+ * TranscriptionResult response = transcriptionService.transcribe(
+ * TranscriptionRequest.builder()
+ * .file("path-to-file")
+ * .language(Language.ITALIAN)
+ * .build();
+ * );
+ * }
+ *
+ * To use a custom authentication mechanism, configure it explicitly with {@code authenticator(Authenticator)}.
+ *
+ * @see Authenticator
+ */
+public final class TranscriptionService extends ModelService {
+
+ private final TranscriptionRestClient client;
+
+ private TranscriptionService(Builder builder) {
+ super(builder);
+ requireNonNull(builder.authenticator(), "authenticator cannot be null");
+ client = TranscriptionRestClient.builder()
+ .baseUrl(baseUrl)
+ .version(version)
+ .logRequests(logRequests)
+ .logResponses(logResponses)
+ .timeout(timeout)
+ .httpClient(httpClient)
+ .authenticator(builder.authenticator())
+ .build();
+ }
+
+ /**
+ * Transcribes the audio file located at the given path.
+ *
+ * @param path the path to the audio file
+ * @param language the target {@link Language} for transcription
+ * @return the resulting {@link TranscriptionResult}
+ */
+ public TranscriptionResult transcribe(String path, Language language) {
+ return transcribe(Paths.get(path).toFile(), language);
+ }
+
+ /**
+ * Transcribes the given audio {@link File}.
+ *
+ * @param file the audio file to transcribe
+ * @param language the target {@link Language} for transcription
+ * @return the resulting {@link TranscriptionResult}
+ */
+ public TranscriptionResult transcribe(File file, Language language) {
+ try {
+ return transcribe(new FileInputStream(file), language);
+ } catch (FileNotFoundException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ /**
+ * Transcribes audio data from an {@link InputStream}.
+ *
+ * @param is the {@link InputStream} containing audio data
+ * @param language the target {@link Language} for transcription
+ * @return the resulting {@link TranscriptionResult}
+ */
+ public TranscriptionResult transcribe(InputStream is, Language language) {
+ return transcribe(TranscriptionRequest.builder()
+ .file(is)
+ .language(language)
+ .build());
+ }
+
+ /**
+ * Executes an audio transcription request using the provided {@link TranscriptionRequest}.
+ *
+ * @param request the {@link TranscriptionRequest}
+ * @return the resulting {@link TranscriptionResult}
+ */
+ public TranscriptionResult transcribe(TranscriptionRequest request) {
+ var projectSpace = resolveProjectSpace(request);
+ var transactionId = request.transactionId();
+
+ request = TranscriptionRequest.builder()
+ .file(request.inputStream())
+ .language(request.language())
+ .modelId(nonNull(request.modelId()) ? request.modelId() : modelId)
+ .projectId(projectSpace.projectId())
+ .spaceId(projectSpace.spaceId())
+ .transactionId(nonNull(request.transactionId()) ? request.transactionId() : transactionId)
+ .build();
+
+ return client.transcribe(request);
+ }
+
+ /**
+ * Returns a new {@link Builder} instance.
+ * {@code
+ * TranscriptionService transcriptionService = TranscriptionService.builder()
+ * .baseUrl("https://...") // or use CloudRegion
+ * .apiKey("my-api-key") // creates an IAM-based AuthenticationProvider
+ * .projectId("my-project-id")
+ * .modelId("openai/whisper-tiny")
+ * .build();
+ *
+ * TranscriptionResult response = transcriptionService.transcribe(
+ * TranscriptionRequest.builder()
+ * .file("path-to-file")
+ * .language(Language.ITALIAN)
+ * .build();
+ * );
+ * }
+ *
+ * @return {@link Builder} instance.
+ */
+ public static Builder builder() {
+ return new Builder();
+ }
+
+ /**
+ * Builder class for constructing {@link TranscriptionService} instances with configurable parameters.
+ */
+ public final static class Builder extends ModelService.Builder