diff --git a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/Language.java b/modules/watsonx-ai-core/src/main/java/com/ibm/watsonx/ai/core/Language.java similarity index 97% rename from modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/Language.java rename to modules/watsonx-ai-core/src/main/java/com/ibm/watsonx/ai/core/Language.java index 8377f970..349623dd 100644 --- a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/Language.java +++ b/modules/watsonx-ai-core/src/main/java/com/ibm/watsonx/ai/core/Language.java @@ -2,7 +2,7 @@ * Copyright IBM Corp. 2025 - 2025 * SPDX-License-Identifier: Apache-2.0 */ -package com.ibm.watsonx.ai.textprocessing; +package com.ibm.watsonx.ai.core; /** * Enum representing supported languages with their corresponding ISO 639 language codes. @@ -85,4 +85,5 @@ public enum Language { public String isoCode() { return isoCode; } + } diff --git a/modules/watsonx-ai-core/src/main/java/com/ibm/watsonx/ai/core/http/HttpRequestMultipartBody.java b/modules/watsonx-ai-core/src/main/java/com/ibm/watsonx/ai/core/http/HttpRequestMultipartBody.java new file mode 100644 index 00000000..91447d11 --- /dev/null +++ b/modules/watsonx-ai-core/src/main/java/com/ibm/watsonx/ai/core/http/HttpRequestMultipartBody.java @@ -0,0 +1,119 @@ +/* + * Copyright IBM Corp. 2025 - 2025 + * SPDX-License-Identifier: Apache-2.0 + */ +package com.ibm.watsonx.ai.core.http; + +import static java.util.Objects.isNull; +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; +import java.util.ArrayList; +import java.util.List; + +/** + * Utility for constructing multipart/form-data HTTP request bodies. + *

+ * Example usage: + * + *

{@code
+ * HTTPRequestMultipartBody body = HTTPRequestMultipartBody.builder()
+ *     .addPart("model", "openai/whisper-tiny")
+ *     .addPart("language", "it")
+ *     .addInputStream("file", is)
+ *     .build();
+ *
+ * HttpRequest request = HttpRequest.newBuilder()
+ *     .uri(URI.create(endpoint))
+ *     .header("Content-Type", body.getContentType())
+ *     .POST(BodyPublishers.ofByteArray(body.getBody()))
+ *     .build();
+ * }
+ * + */ +public final class HttpRequestMultipartBody { + + private static final String BOUNDARY = "----watsonx-ai-sdk"; + + private final byte[] bytes; + + private HttpRequestMultipartBody(byte[] bytes) { + this.bytes = bytes; + } + + public String boundary() { + return BOUNDARY; + } + + public String contentType() { + return "multipart/form-data; boundary=" + BOUNDARY; + } + + public byte[] body() { + return bytes; + } + + public static Builder builder() { + return new Builder(); + } + + public static final class Builder { + private static final String CRLF = "\r\n"; + private final List parts = new ArrayList<>(); + + private record Part(String fieldName, String fileName, String contentType, byte[] content) {} + + public Builder addPart(String name, String value) { + if (isNull(name) || isNull(value)) + return this; + + parts.add(new Part(name, null, "text/plain; charset=UTF-8", + value.getBytes(StandardCharsets.UTF_8))); + return this; + } + + public Builder addInputStream(String name, InputStream is) { + try { + parts.add(new Part(name, null, "application/octet-stream", is.readAllBytes())); + return this; + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + + public HttpRequestMultipartBody build() { + if (parts.isEmpty()) { + throw new IllegalStateException("Cannot build multipart body with no parts"); + } + + try (var out = new ByteArrayOutputStream()) { + for (Part part : parts) { + writeBoundary(out, BOUNDARY); + writePart(out, part); + } + out.write(("--" + BOUNDARY + "--" + CRLF).getBytes(StandardCharsets.UTF_8)); + return new HttpRequestMultipartBody(out.toByteArray()); + } catch (IOException e) { + throw new RuntimeException(e); + } + } + + private void writeBoundary(OutputStream out, String boundary) throws IOException { + out.write(("--" + boundary + CRLF).getBytes(StandardCharsets.UTF_8)); + } + + private void writePart(OutputStream out, Part part) throws IOException { + out.write(("Content-Disposition: form-data; name=\"" + part.fieldName() + "\"").getBytes(StandardCharsets.UTF_8)); + if (part.fileName() != null) { + out.write(("; filename=\"" + part.fileName() + "\"").getBytes(StandardCharsets.UTF_8)); + } + out.write((CRLF).getBytes(StandardCharsets.UTF_8)); + out.write(("Content-Type: " + part.contentType() + CRLF + CRLF).getBytes(StandardCharsets.UTF_8)); + out.write(part.content()); + out.write(CRLF.getBytes(StandardCharsets.UTF_8)); + } + } +} diff --git a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/WatsonxService.java b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/WatsonxService.java index 2be328d7..96580ed0 100644 --- a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/WatsonxService.java +++ b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/WatsonxService.java @@ -25,6 +25,7 @@ import com.ibm.watsonx.ai.timeseries.TimeSeriesService; import com.ibm.watsonx.ai.tokenization.TokenizationService; import com.ibm.watsonx.ai.tool.ToolService; +import com.ibm.watsonx.ai.transcription.TranscriptionRestClient; /** * This class provides common functionality and shared configuration used across various service-specific clients (e.g., {@code ChatService}, @@ -42,6 +43,7 @@ * @see FoundationModelService * @see ToolService * @see DetectionService + * @see TranscriptionRestClient */ public abstract class WatsonxService { diff --git a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/textclassification/TextClassificationParameters.java b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/textclassification/TextClassificationParameters.java index 02dcdecb..32bc3563 100644 --- a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/textclassification/TextClassificationParameters.java +++ b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/textclassification/TextClassificationParameters.java @@ -14,8 +14,8 @@ import java.util.Map; import java.util.stream.Stream; import com.ibm.watsonx.ai.WatsonxParameters; +import com.ibm.watsonx.ai.core.Language; import com.ibm.watsonx.ai.textprocessing.CosReference; -import com.ibm.watsonx.ai.textprocessing.Language; import com.ibm.watsonx.ai.textprocessing.OcrMode; /** diff --git a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/textextraction/TextExtractionParameters.java b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/textextraction/TextExtractionParameters.java index 1f6f9fd7..d18cf23e 100644 --- a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/textextraction/TextExtractionParameters.java +++ b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/textprocessing/textextraction/TextExtractionParameters.java @@ -14,8 +14,8 @@ import java.util.Map; import java.util.stream.Stream; import com.ibm.watsonx.ai.WatsonxParameters; +import com.ibm.watsonx.ai.core.Language; import com.ibm.watsonx.ai.textprocessing.CosReference; -import com.ibm.watsonx.ai.textprocessing.Language; import com.ibm.watsonx.ai.textprocessing.OcrMode; /** diff --git a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/DefaultRestClient.java b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/DefaultRestClient.java new file mode 100644 index 00000000..eeaf0f21 --- /dev/null +++ b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/DefaultRestClient.java @@ -0,0 +1,86 @@ +/* + * Copyright IBM Corp. 2025 - 2025 + * SPDX-License-Identifier: Apache-2.0 + */ +package com.ibm.watsonx.ai.transcription; + +import static com.ibm.watsonx.ai.core.Json.fromJson; +import static java.util.Objects.nonNull; +import static java.util.Objects.requireNonNull; +import java.io.IOException; +import java.net.URI; +import java.net.http.HttpRequest; +import java.net.http.HttpRequest.BodyPublishers; +import java.net.http.HttpResponse.BodyHandlers; +import com.ibm.watsonx.ai.core.factory.HttpClientFactory; +import com.ibm.watsonx.ai.core.http.HttpRequestMultipartBody; +import com.ibm.watsonx.ai.core.http.SyncHttpClient; +import com.ibm.watsonx.ai.core.http.interceptors.LoggerInterceptor.LogMode; + +/** + * Default implementation of the {@link TranscriptionRestClient} abstract class. + */ +final class DefaultRestClient extends TranscriptionRestClient { + + private final SyncHttpClient syncHttpClient; + + DefaultRestClient(Builder builder) { + super(builder); + requireNonNull(authenticator, "authenticator is mandatory"); + syncHttpClient = HttpClientFactory.createSync(authenticator, httpClient, LogMode.of(logRequests, logResponses)); + } + + @Override + public TranscriptionResult transcribe(TranscriptionRequest request) { + + var multiPartRequest = HttpRequestMultipartBody.builder() + .addPart("model", request.modelId()) + .addPart("language", request.language()) + .addPart("project_id", request.projectId()) + .addPart("space_id", request.spaceId()) + .addInputStream("file", request.inputStream()) + .build(); + + var httpRequest = HttpRequest.newBuilder(URI.create(baseUrl + "/ml/v1/audio/transcriptions?version=%s".formatted(version))) + .header("Content-Type", multiPartRequest.contentType()) + .header("Accept", "application/json") + .POST(BodyPublishers.ofByteArray(multiPartRequest.body())) + .timeout(timeout); + + if (nonNull(request.transactionId())) + httpRequest.header(TRANSACTION_ID_HEADER, request.transactionId()); + + try { + + var httpReponse = syncHttpClient.send(httpRequest.build(), BodyHandlers.ofString()); + return fromJson(httpReponse.body(), TranscriptionResult.class); + + } catch (IOException | InterruptedException e) { + throw new RuntimeException(e); + } + } + + /** + * Returns a new {@link Builder} instance. + */ + static Builder builder() { + return new Builder(); + } + + /** + * Builder class for constructing {@link DefaultRestClient} instances with configurable parameters. + */ + public final static class Builder extends TranscriptionRestClient.Builder { + + private Builder() {} + + /** + * Builds a {@link DefaultRestClient} instance using the configured parameters. + * + * @return a new instance of {@link DefaultRestClient} + */ + public DefaultRestClient build() { + return new DefaultRestClient(this); + } + } +} diff --git a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionRequest.java b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionRequest.java new file mode 100644 index 00000000..c85d343b --- /dev/null +++ b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionRequest.java @@ -0,0 +1,157 @@ +/* + * Copyright IBM Corp. 2025 - 2025 + * SPDX-License-Identifier: Apache-2.0 + */ +package com.ibm.watsonx.ai.transcription; + +import static java.util.Objects.isNull; +import static java.util.Objects.requireNonNull; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.nio.file.Path; +import com.ibm.watsonx.ai.WatsonxParameters; +import com.ibm.watsonx.ai.core.Language; + +/** + * Represents a request to the watsonx.ai Transcribe Audio API. + *

+ * Example usage: + * + *

{@code
+ * TranscriptionRequest request =
+ *     TranscriptionRequest.builder()
+ *         .file("path-to-file")
+ *         .language(Language.ITALIAN)
+ *         .build();
+ * }
+ */ +public final class TranscriptionRequest extends WatsonxParameters { + private String modelId; + private final InputStream is; + private final String language; + + private TranscriptionRequest(Builder builder) { + super(builder); + modelId = builder.modelId; + is = requireNonNull(builder.file, "file cannot be null"); + language = isNull(builder.language) ? "en" : builder.language; + } + + public String modelId() { + return modelId; + } + + public InputStream inputStream() { + return is; + } + + public String language() { + return language; + } + + /** + * Returns a new {@link Builder} instance. + *

+ * Example usage: + * + *

{@code
+     * TranscriptionRequest request =
+     *     TranscriptionRequest.builder()
+     *         .file("path-to-file")
+     *         .language(Language.ITALIAN)
+     *         .build();
+     * }
+ * + * @return {@link Builder} instance. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder class for constructing {@link TranscriptionRequest} instances with configurable parameters. + */ + public final static class Builder extends WatsonxParameters.Builder { + private String modelId; + private InputStream file; + private String language; + + private Builder() {} + + /** + * Sets the model to use for audio transcription. + * + * @param modelId the model identifier (for example, {@code "openai/whisper-tiny"}) + */ + public Builder modelId(String modelId) { + this.modelId = modelId; + return this; + } + + /** + * Sets the input audio file to be transcribed. + * + * @param path the path to a local audio file + */ + public Builder file(String path) { + return file(Path.of(path).toFile()); + } + + /** + * Sets the input audio file to be transcribed. + * + * @param file the audio file to transcribe + */ + public Builder file(File file) { + try { + return file(new FileInputStream(file)); + } catch (FileNotFoundException e) { + throw new RuntimeException(e); + } + } + + /** + * Sets the input audio to be transcribed. + * + * @param file the input stream containing the audio data + */ + public Builder file(InputStream file) { + this.file = file; + return this; + } + + /** + * Sets the target transcription language. + *

+ * If not provided, the default transcription language is English ({@code "en"}). + * + * @param language ISO language code representing the target transcription language + */ + public Builder language(Language language) { + return language(language.isoCode()); + } + + /** + * Sets the target transcription language. + *

+ * If not provided, the default transcription language is English ({@code "en"}). + * + * @param language ISO language code representing the target transcription language + */ + public Builder language(String language) { + this.language = language; + return this; + } + + /** + * Builds a {@link TranscriptionRequest} instance using the configured parameters. + * + * @return a new instance of {@link TranscriptionRequest} + */ + public TranscriptionRequest build() { + return new TranscriptionRequest(this); + } + } +} diff --git a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionRestClient.java b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionRestClient.java new file mode 100644 index 00000000..d89910d8 --- /dev/null +++ b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionRestClient.java @@ -0,0 +1,50 @@ +/* + * Copyright IBM Corp. 2025 - 2025 + * SPDX-License-Identifier: Apache-2.0 + */ +package com.ibm.watsonx.ai.transcription; + +import java.util.ServiceLoader; +import java.util.function.Supplier; +import com.ibm.watsonx.ai.WatsonxRestClient; + +/** + * Abstraction of a REST client for interacting with the IBM watsonx.ai Transcribe Audio APIs. + */ +public abstract class TranscriptionRestClient extends WatsonxRestClient { + + protected TranscriptionRestClient(Builder builder) { + super(builder); + } + + /** + * Sends an audio transcription request + * + * @param request the {@link TranscriptionRequest} containing all parameters for the transcription + * @return a {@link TranscriptionResult} containing the transcription output + */ + public abstract TranscriptionResult transcribe(TranscriptionRequest request); + + /** + * Creates a new {@link Builder} using the first available {@link TranscriptionRestClientBuilderFactory} discovered via {@link ServiceLoader}. + *

+ * If no factory is found, falls back to the default {@link DefaultRestClient}. + */ + static TranscriptionRestClient.Builder builder() { + return ServiceLoader.load(TranscriptionRestClientBuilderFactory.class).findFirst() + .map(Supplier::get) + .orElse(DefaultRestClient.builder()); + } + + /** + * Builder abstract class for constructing {@link TranscriptionRestClient} instances with configurable parameters. + */ + public abstract static class Builder extends WatsonxRestClient.Builder {} + + /** + * Service Provider Interface for supplying custom {@link Builder} implementations. + *

+ * This allows frameworks to provide their own client implementations. + */ + public interface TranscriptionRestClientBuilderFactory extends Supplier {} +} diff --git a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionResult.java b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionResult.java new file mode 100644 index 00000000..db58fba7 --- /dev/null +++ b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionResult.java @@ -0,0 +1,15 @@ +/* + * Copyright IBM Corp. 2025 - 2025 + * SPDX-License-Identifier: Apache-2.0 + */ +package com.ibm.watsonx.ai.transcription; + +/** + * Represents the result returned by the watsonx.ai Transcribe Audio API. + * + * @param model The model used for audio transcription. + * @param text The text generated by transcribing the input audio. + * @param createdAt The ISO 8601 timestamp indicating when the transcription result was created. + * @param tokenCount The number of estimated tokens in the returned text. + */ +public record TranscriptionResult(String model, String text, String createdAt, int tokenCount) {} diff --git a/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionService.java b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionService.java new file mode 100644 index 00000000..dca84c9c --- /dev/null +++ b/modules/watsonx-ai/src/main/java/com/ibm/watsonx/ai/transcription/TranscriptionService.java @@ -0,0 +1,166 @@ +/* + * Copyright IBM Corp. 2025 - 2025 + * SPDX-License-Identifier: Apache-2.0 + */ +package com.ibm.watsonx.ai.transcription; + +import static java.util.Objects.nonNull; +import static java.util.Objects.requireNonNull; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileNotFoundException; +import java.io.InputStream; +import java.nio.file.Paths; +import com.ibm.watsonx.ai.WatsonxService.ModelService; +import com.ibm.watsonx.ai.core.Language; +import com.ibm.watsonx.ai.core.auth.Authenticator; + +/** + * Service class to interact with IBM watsonx.ai Transcribe Audio APIs. + *

+ * Example usage: + * + *

{@code
+ * TranscriptionService transcriptionService = TranscriptionService.builder()
+ *     .baseUrl("https://...")  // or use CloudRegion
+ *     .apiKey("my-api-key")    // creates an IBM Cloud Authenticator
+ *     .projectId("my-project-id")
+ *     .modelId("openai/whisper-tiny")
+ *     .build();
+ *
+ * TranscriptionResult response = transcriptionService.transcribe(
+ *     TranscriptionRequest.builder()
+ *         .file("path-to-file")
+ *         .language(Language.ITALIAN)
+ *         .build();
+ * );
+ * }
+ * + * To use a custom authentication mechanism, configure it explicitly with {@code authenticator(Authenticator)}. + * + * @see Authenticator + */ +public final class TranscriptionService extends ModelService { + + private final TranscriptionRestClient client; + + private TranscriptionService(Builder builder) { + super(builder); + requireNonNull(builder.authenticator(), "authenticator cannot be null"); + client = TranscriptionRestClient.builder() + .baseUrl(baseUrl) + .version(version) + .logRequests(logRequests) + .logResponses(logResponses) + .timeout(timeout) + .httpClient(httpClient) + .authenticator(builder.authenticator()) + .build(); + } + + /** + * Transcribes the audio file located at the given path. + * + * @param path the path to the audio file + * @param language the target {@link Language} for transcription + * @return the resulting {@link TranscriptionResult} + */ + public TranscriptionResult transcribe(String path, Language language) { + return transcribe(Paths.get(path).toFile(), language); + } + + /** + * Transcribes the given audio {@link File}. + * + * @param file the audio file to transcribe + * @param language the target {@link Language} for transcription + * @return the resulting {@link TranscriptionResult} + */ + public TranscriptionResult transcribe(File file, Language language) { + try { + return transcribe(new FileInputStream(file), language); + } catch (FileNotFoundException e) { + throw new RuntimeException(e); + } + } + + /** + * Transcribes audio data from an {@link InputStream}. + * + * @param is the {@link InputStream} containing audio data + * @param language the target {@link Language} for transcription + * @return the resulting {@link TranscriptionResult} + */ + public TranscriptionResult transcribe(InputStream is, Language language) { + return transcribe(TranscriptionRequest.builder() + .file(is) + .language(language) + .build()); + } + + /** + * Executes an audio transcription request using the provided {@link TranscriptionRequest}. + * + * @param request the {@link TranscriptionRequest} + * @return the resulting {@link TranscriptionResult} + */ + public TranscriptionResult transcribe(TranscriptionRequest request) { + var projectSpace = resolveProjectSpace(request); + var transactionId = request.transactionId(); + + request = TranscriptionRequest.builder() + .file(request.inputStream()) + .language(request.language()) + .modelId(nonNull(request.modelId()) ? request.modelId() : modelId) + .projectId(projectSpace.projectId()) + .spaceId(projectSpace.spaceId()) + .transactionId(nonNull(request.transactionId()) ? request.transactionId() : transactionId) + .build(); + + return client.transcribe(request); + } + + /** + * Returns a new {@link Builder} instance. + *

+ * Example usage: + * + *

{@code
+     * TranscriptionService transcriptionService = TranscriptionService.builder()
+     *     .baseUrl("https://...")  // or use CloudRegion
+     *     .apiKey("my-api-key")    // creates an IAM-based AuthenticationProvider
+     *     .projectId("my-project-id")
+     *     .modelId("openai/whisper-tiny")
+     *     .build();
+     *
+     * TranscriptionResult response = transcriptionService.transcribe(
+     *     TranscriptionRequest.builder()
+     *         .file("path-to-file")
+     *         .language(Language.ITALIAN)
+     *         .build();
+     * );
+     * }
+ * + * @return {@link Builder} instance. + */ + public static Builder builder() { + return new Builder(); + } + + /** + * Builder class for constructing {@link TranscriptionService} instances with configurable parameters. + */ + public final static class Builder extends ModelService.Builder { + + private Builder() {} + + /** + * Builds a {@link TranscriptionService} instance using the configured parameters. + * + * @return a new instance of {@link TranscriptionService} + */ + public TranscriptionService build() { + return new TranscriptionService(this); + } + } +} diff --git a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/ContextDepedencyInjectionTest.java b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/ContextDepedencyInjectionTest.java index 63a72644..cc12d738 100644 --- a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/ContextDepedencyInjectionTest.java +++ b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/ContextDepedencyInjectionTest.java @@ -26,6 +26,7 @@ import com.ibm.watsonx.ai.tool.builtin.WeatherTool; import com.ibm.watsonx.ai.tool.builtin.WebCrawlerTool; import com.ibm.watsonx.ai.tool.builtin.WikipediaTool; +import com.ibm.watsonx.ai.transcription.TranscriptionService; import jakarta.enterprise.context.ApplicationScoped; import jakarta.enterprise.inject.Produces; import jakarta.inject.Inject; @@ -71,6 +72,9 @@ public class ContextDepedencyInjectionTest { @Inject TimeSeriesService timeSeriesService; + @Inject + TranscriptionService transcriptionService; + @Inject ToolService toolService; @@ -142,6 +146,11 @@ void should_inject_time_series_service() { assertNotNull(timeSeriesService); } + @Test + void should_inject_transcription_service() { + assertNotNull(transcriptionService); + } + @Test void should_inject_tool_service() { assertNotNull(toolService); @@ -252,6 +261,16 @@ public TimeSeriesService produceTimeSeriesService() { .build(); } + @Produces + public TranscriptionService produceTranscriptionService() { + return TranscriptionService.builder() + .baseUrl("https://example.com") + .apiKey("api-key") + .projectId("project-id") + .modelId("model-id") + .build(); + } + @Produces public ToolService produceToolService() { return ToolService.builder() diff --git a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/CustomHttpClientTest.java b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/CustomHttpClientTest.java index ba0375eb..6cff1631 100644 --- a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/CustomHttpClientTest.java +++ b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/CustomHttpClientTest.java @@ -22,6 +22,7 @@ import com.ibm.watsonx.ai.timeseries.TimeSeriesService; import com.ibm.watsonx.ai.tokenization.TokenizationService; import com.ibm.watsonx.ai.tool.ToolService; +import com.ibm.watsonx.ai.transcription.TranscriptionService; public class CustomHttpClientTest { @@ -555,6 +556,47 @@ void should_use_custom_http_client_for_tool_service() throws Exception { assertNotEquals(HttpClientProvider.httpClient(), getFieldValue(syncHttpClient, "delegate")); } + @Test + void should_use_custom_http_client_for_transcription_service() throws Exception { + + HttpClient customClient = HttpClient.newHttpClient(); + TranscriptionService transcriptionService = TranscriptionService.builder() + .baseUrl("https://localhost") + .modelId("modelId") + .apiKey("apiKey") + .projectId("projectId") + .httpClient(customClient) + .build(); + + Object restclient = getFieldValue(transcriptionService, "client"); + assertEquals(customClient, getFieldValue(restclient, "httpClient")); + assertNotEquals(HttpClientProvider.httpClient(), getFieldValue(restclient, "httpClient")); + + Object syncHttpClient = getFieldValue(restclient, "syncHttpClient"); + assertEquals(customClient, getFieldValue(syncHttpClient, "delegate")); + assertNotEquals(HttpClientProvider.httpClient(), getFieldValue(syncHttpClient, "delegate")); + } + + @Test + void should_use_default_http_client_for_transcription_service() throws Exception { + + HttpClient customClient = HttpClient.newHttpClient(); + TranscriptionService transcriptionService = TranscriptionService.builder() + .baseUrl("https://localhost") + .modelId("modelId") + .apiKey("apiKey") + .projectId("projectId") + .build(); + + Object restclient = getFieldValue(transcriptionService, "client"); + assertNotEquals(customClient, getFieldValue(restclient, "httpClient")); + assertEquals(HttpClientProvider.httpClient(), getFieldValue(restclient, "httpClient")); + + Object syncHttpClient = getFieldValue(restclient, "syncHttpClient"); + assertNotEquals(customClient, getFieldValue(syncHttpClient, "delegate")); + assertEquals(HttpClientProvider.httpClient(), getFieldValue(syncHttpClient, "delegate")); + } + @Test void should_use_default_http_client_for_tool_service() throws Exception { diff --git a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/TranscriptionServiceTest.java b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/TranscriptionServiceTest.java new file mode 100644 index 00000000..c119d8f7 --- /dev/null +++ b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/TranscriptionServiceTest.java @@ -0,0 +1,169 @@ +/* + * Copyright IBM Corp. 2025 - 2025 + * SPDX-License-Identifier: Apache-2.0 + */ +package com.ibm.watsonx.ai; + +import static com.github.tomakehurst.wiremock.client.WireMock.aResponse; +import static com.github.tomakehurst.wiremock.client.WireMock.containing; +import static com.github.tomakehurst.wiremock.client.WireMock.equalTo; +import static com.github.tomakehurst.wiremock.client.WireMock.post; +import static org.junit.jupiter.api.Assertions.assertEquals; +import static org.junit.jupiter.api.Assertions.assertThrows; +import static org.junit.jupiter.api.Assertions.assertThrowsExactly; +import static org.mockito.ArgumentMatchers.any; +import static org.mockito.Mockito.when; +import java.io.ByteArrayInputStream; +import java.io.File; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.nio.file.Files; +import org.junit.jupiter.api.BeforeEach; +import org.junit.jupiter.api.Test; +import org.junit.jupiter.api.extension.ExtendWith; +import org.mockito.junit.jupiter.MockitoExtension; +import org.mockito.junit.jupiter.MockitoSettings; +import org.mockito.quality.Strictness; +import com.ibm.watsonx.ai.core.Language; +import com.ibm.watsonx.ai.transcription.TranscriptionRequest; +import com.ibm.watsonx.ai.transcription.TranscriptionService; + +@ExtendWith(MockitoExtension.class) +public class TranscriptionServiceTest extends AbstractWatsonxTest { + + @BeforeEach + void setup() { + when(mockAuthenticator.token()).thenReturn("token"); + } + + @Test + void should_transcribe_audio_successfully() throws Exception { + + var file = Files.createTempFile("", ""); + file.toFile().deleteOnExit(); + Files.writeString(file, "the ending was terrific."); + + wireMock.stubFor(post("/ml/v1/audio/transcriptions?version=%s".formatted(API_VERSION)) + .withHeader("Authorization", equalTo("Bearer token")) + .withHeader("Content-Type", containing("multipart/form-data; boundary=----watsonx-ai-sdk")) + .withRequestBody(containing("name=\"model\"")) + .withRequestBody(containing("openai/whisper-tiny")) + .withRequestBody(containing("name=\"language\"")) + .withRequestBody(containing("it")) + .withRequestBody(containing("name=\"project_id\"")) + .withRequestBody(containing("pid")) + .withRequestBody(containing("the ending was terrific.")) + .willReturn(aResponse() + .withStatus(200) + .withHeader("Content-Type", "application/json") + .withBody(""" + { + "model": "openai/whisper-tiny", + "text": "the ending was terrific.", + "created_at": "2023-07-21T16:52:32.190Z", + "token_count": 8 + }"""))); + + TranscriptionService service = TranscriptionService.builder() + .authenticator(mockAuthenticator) + .baseUrl("http://localhost:".concat(String.valueOf(wireMock.getPort()))) + .projectId("pid") + .modelId("openai/whisper-tiny") + .build(); + + var result = service.transcribe(file.toAbsolutePath().toString(), Language.ITALIAN); + assertEquals("openai/whisper-tiny", result.model()); + assertEquals("the ending was terrific.", result.text()); + assertEquals(8, result.tokenCount()); + assertEquals("2023-07-21T16:52:32.190Z", result.createdAt()); + } + + @Test + void should_transcribe_audio_successfully_overriding_parameters() throws Exception { + + var file = Files.createTempFile("", ""); + file.toFile().deleteOnExit(); + Files.writeString(file, "the ending was terrific."); + + wireMock.stubFor(post("/ml/v1/audio/transcriptions?version=%s".formatted(API_VERSION)) + .withHeader("Authorization", equalTo("Bearer token")) + .withHeader("Content-Type", containing("multipart/form-data; boundary=----watsonx-ai-sdk")) + .withHeader(TRANSACTION_ID_HEADER, equalTo("transaction-id")) + .withRequestBody(containing("name=\"model\"")) + .withRequestBody(containing("my-openai/whisper-tiny")) + .withRequestBody(containing("name=\"language\"")) + .withRequestBody(containing("en")) + .withRequestBody(containing("name=\"project_id\"")) + .withRequestBody(containing("mid")) + .withRequestBody(containing("name=\"space_id\"")) + .withRequestBody(containing("sid")) + .withRequestBody(containing("the ending was terrific.")) + .willReturn(aResponse() + .withStatus(200) + .withHeader("Content-Type", "application/json") + .withBody(""" + { + "model": "openai/whisper-tiny", + "text": "the ending was terrific.", + "created_at": "2023-07-21T16:52:32.190Z", + "token_count": 8 + }"""))); + + TranscriptionService service = TranscriptionService.builder() + .authenticator(mockAuthenticator) + .baseUrl("http://localhost:".concat(String.valueOf(wireMock.getPort()))) + .projectId("pid") + .modelId("openai/whisper-tiny") + .build(); + + var result = service.transcribe( + TranscriptionRequest.builder() + .projectId("mid") + .spaceId("sid") + .modelId("my-openai/whisper-tiny") + .file(file.toAbsolutePath().toString()) + .transactionId("transaction-id") + .build()); + + assertEquals("openai/whisper-tiny", result.model()); + assertEquals("the ending was terrific.", result.text()); + assertEquals(8, result.tokenCount()); + assertEquals("2023-07-21T16:52:32.190Z", result.createdAt()); + } + + @Test + @MockitoSettings(strictness = Strictness.LENIENT) + void should_throw_file_not_found_exception() { + var ex = assertThrowsExactly(RuntimeException.class, () -> TranscriptionRequest.builder().file("/fileNotFound/file.txt").build()); + assertEquals(FileNotFoundException.class, ex.getCause().getClass()); + + TranscriptionService service = TranscriptionService.builder() + .authenticator(mockAuthenticator) + .baseUrl("http://localhost:".concat(String.valueOf(wireMock.getPort()))) + .projectId("pid") + .modelId("openai/whisper-tiny") + .build(); + + ex = assertThrowsExactly(RuntimeException.class, () -> service.transcribe(new File("/fileNotFound/file.txt"), Language.ENGLISH)); + assertEquals(FileNotFoundException.class, ex.getCause().getClass()); + } + + @Test + void should_throw_io_exception() throws Exception { + + when(mockHttpClient.send(any(), any())).thenThrow(IOException.class); + + withWatsonxServiceMock(() -> { + + TranscriptionService service = TranscriptionService.builder() + .authenticator(mockAuthenticator) + .baseUrl("http://localhost:".concat(String.valueOf(wireMock.getPort()))) + .projectId("pid") + .modelId("openai/whisper-tiny") + .build(); + + var ex = assertThrows(RuntimeException.class, () -> service.transcribe(new ByteArrayInputStream("Hello".getBytes()), Language.ITALIAN)); + assertEquals(IOException.class, ex.getCause().getClass()); + }); + } +} diff --git a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/client/CustomRestClientTest.java b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/client/CustomRestClientTest.java index 65fb1051..35ddc206 100644 --- a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/client/CustomRestClientTest.java +++ b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/client/CustomRestClientTest.java @@ -25,6 +25,7 @@ import com.ibm.watsonx.ai.client.impl.CustomTimeSeriesRestClient; import com.ibm.watsonx.ai.client.impl.CustomTokenizationRestClient; import com.ibm.watsonx.ai.client.impl.CustomToolRestClient; +import com.ibm.watsonx.ai.client.impl.CustomTranscriptionRestClient; import com.ibm.watsonx.ai.core.auth.Authenticator; import com.ibm.watsonx.ai.core.auth.cp4d.AuthMode; import com.ibm.watsonx.ai.core.auth.cp4d.CP4DAuthenticator; @@ -40,6 +41,7 @@ import com.ibm.watsonx.ai.timeseries.TimeSeriesService; import com.ibm.watsonx.ai.tokenization.TokenizationService; import com.ibm.watsonx.ai.tool.ToolService; +import com.ibm.watsonx.ai.transcription.TranscriptionService; import com.ibm.watsonx.ai.utils.ServiceLoaderUtils; public class CustomRestClientTest { @@ -332,4 +334,22 @@ public void should_use_custom_rest_client_when_building_detection_service() thro var client = clientField.get(detectionService); assertTrue(client instanceof CustomDetectionRestClient); } + + @Test + // com.ibm.watsonx.ai.transcription.TranscriptionRestClient$TranscriptionRestClientBuilderFactory + public void should_use_custom_rest_client_when_building_transcription_service() throws Exception { + + TranscriptionService transcriptionService = TranscriptionService.builder() + .apiKey("test") + .baseUrl("http://localhost") + .modelId("openai/whisper-tiny") + .projectId("project-id") + .build(); + + Class clazz = TranscriptionService.class; + var clientField = clazz.getDeclaredField("client"); + clientField.setAccessible(true); + var client = clientField.get(transcriptionService); + assertTrue(client instanceof CustomTranscriptionRestClient); + } } diff --git a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/client/impl/CustomTranscriptionRestClient.java b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/client/impl/CustomTranscriptionRestClient.java new file mode 100644 index 00000000..08cce736 --- /dev/null +++ b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/client/impl/CustomTranscriptionRestClient.java @@ -0,0 +1,35 @@ +/* + * Copyright IBM Corp. 2025 - 2025 + * SPDX-License-Identifier: Apache-2.0 + */ +package com.ibm.watsonx.ai.client.impl; + +import com.ibm.watsonx.ai.transcription.TranscriptionRequest; +import com.ibm.watsonx.ai.transcription.TranscriptionRestClient; +import com.ibm.watsonx.ai.transcription.TranscriptionResult; + +public class CustomTranscriptionRestClient extends TranscriptionRestClient { + + CustomTranscriptionRestClient(Builder builder) { + super(builder); + } + + @Override + public TranscriptionResult transcribe(TranscriptionRequest request) { + throw new UnsupportedOperationException("Unimplemented method 'transcribe'"); + } + + public static final class CustomTranscriptionRestClientBuilderFactory implements TranscriptionRestClientBuilderFactory { + @Override + public Builder get() { + return new CustomTranscriptionRestClient.Builder(); + } + } + + static final class Builder extends TranscriptionRestClient.Builder { + @Override + public TranscriptionRestClient build() { + return new CustomTranscriptionRestClient(this); + } + } +} diff --git a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/it/ClassificationServiceIT.java b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/it/ClassificationServiceIT.java index 986b5386..4831eabd 100644 --- a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/it/ClassificationServiceIT.java +++ b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/it/ClassificationServiceIT.java @@ -16,11 +16,11 @@ import java.time.Duration; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import com.ibm.watsonx.ai.core.Language; import com.ibm.watsonx.ai.core.auth.ibmcloud.IBMCloudAuthenticator; import com.ibm.watsonx.ai.core.exception.WatsonxException; import com.ibm.watsonx.ai.textprocessing.KvpFields; import com.ibm.watsonx.ai.textprocessing.KvpFields.KvpField; -import com.ibm.watsonx.ai.textprocessing.Language; import com.ibm.watsonx.ai.textprocessing.Schema; import com.ibm.watsonx.ai.textprocessing.SemanticConfig.SchemaMergeStrategy; import com.ibm.watsonx.ai.textprocessing.textclassification.TextClassificationDeleteParameters; diff --git a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/it/TextExtractionServiceIT.java b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/it/TextExtractionServiceIT.java index 95bc86b0..e983e6dd 100644 --- a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/it/TextExtractionServiceIT.java +++ b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/it/TextExtractionServiceIT.java @@ -15,9 +15,9 @@ import java.time.Duration; import org.junit.jupiter.api.Test; import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable; +import com.ibm.watsonx.ai.core.Language; import com.ibm.watsonx.ai.core.auth.ibmcloud.IBMCloudAuthenticator; import com.ibm.watsonx.ai.core.exception.WatsonxException; -import com.ibm.watsonx.ai.textprocessing.Language; import com.ibm.watsonx.ai.textprocessing.textextraction.TextExtractionDeleteParameters; import com.ibm.watsonx.ai.textprocessing.textextraction.TextExtractionParameters; import com.ibm.watsonx.ai.textprocessing.textextraction.TextExtractionParameters.Mode; diff --git a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/textprocessing/textclassification/TextClassificationTest.java b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/textprocessing/textclassification/TextClassificationTest.java index 6c19cab3..9f36aa36 100644 --- a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/textprocessing/textclassification/TextClassificationTest.java +++ b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/textprocessing/textclassification/TextClassificationTest.java @@ -49,6 +49,7 @@ import com.github.tomakehurst.wiremock.stubbing.Scenario; import com.ibm.watsonx.ai.AbstractWatsonxTest; import com.ibm.watsonx.ai.core.Json; +import com.ibm.watsonx.ai.core.Language; import com.ibm.watsonx.ai.core.auth.Authenticator; import com.ibm.watsonx.ai.core.exception.WatsonxException; import com.ibm.watsonx.ai.core.exception.model.WatsonxError; @@ -60,7 +61,6 @@ import com.ibm.watsonx.ai.textprocessing.KvpFields.KvpField; import com.ibm.watsonx.ai.textprocessing.KvpPage; import com.ibm.watsonx.ai.textprocessing.KvpSlice; -import com.ibm.watsonx.ai.textprocessing.Language; import com.ibm.watsonx.ai.textprocessing.Metadata; import com.ibm.watsonx.ai.textprocessing.OcrMode; import com.ibm.watsonx.ai.textprocessing.Schema; diff --git a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/textprocessing/textextraction/TextExtractionTest.java b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/textprocessing/textextraction/TextExtractionTest.java index 4f982c8f..cd61c089 100644 --- a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/textprocessing/textextraction/TextExtractionTest.java +++ b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/textprocessing/textextraction/TextExtractionTest.java @@ -62,6 +62,7 @@ import com.github.tomakehurst.wiremock.stubbing.Scenario; import com.ibm.watsonx.ai.AbstractWatsonxTest; import com.ibm.watsonx.ai.core.Json; +import com.ibm.watsonx.ai.core.Language; import com.ibm.watsonx.ai.core.auth.Authenticator; import com.ibm.watsonx.ai.core.exception.WatsonxException; import com.ibm.watsonx.ai.core.exception.model.WatsonxError; @@ -75,7 +76,6 @@ import com.ibm.watsonx.ai.textprocessing.KvpFields.KvpField; import com.ibm.watsonx.ai.textprocessing.KvpPage; import com.ibm.watsonx.ai.textprocessing.KvpSlice; -import com.ibm.watsonx.ai.textprocessing.Language; import com.ibm.watsonx.ai.textprocessing.Metadata; import com.ibm.watsonx.ai.textprocessing.OcrMode; import com.ibm.watsonx.ai.textprocessing.Schema; diff --git a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/utils/ServiceLoaderUtils.java b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/utils/ServiceLoaderUtils.java index 8acdc68e..deb53703 100644 --- a/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/utils/ServiceLoaderUtils.java +++ b/modules/watsonx-ai/src/test/java/com/ibm/watsonx/ai/utils/ServiceLoaderUtils.java @@ -86,6 +86,10 @@ public static void setupServiceLoader() throws Exception { "com.ibm.watsonx.ai.detection.DetectionRestClient$DetectionRestClientBuilderFactory", "com.ibm.watsonx.ai.client.impl.CustomDetectionRestClient$CustomDetectionRestClientBuilderFactory"); + createServiceFile(metaInfServices, + "com.ibm.watsonx.ai.transcription.TranscriptionRestClient$TranscriptionRestClientBuilderFactory", + "com.ibm.watsonx.ai.client.impl.CustomTranscriptionRestClient$CustomTranscriptionRestClientBuilderFactory"); + URLClassLoader tempClassLoader = new URLClassLoader( new URL[] { tempDir.toUri().toURL() }, originalClassLoader