feat: Add ElevenLabs Text-to-Speech support
This commit introduces a new `spring-ai-elevenlabs` module to integrate ElevenLabs' TTS service into Spring AI. Key Features: - **New Module:** `spring-ai-elevenlabs` with a Spring Boot starter for auto-configuration. - **Core Classes:** - `ElevenLabsTextToSpeechModel`: Implements `TextToSpeechModel` and `StreamingTextToSpeechModel`. - `ElevenLabsTextToSpeechOptions`: Configurable TTS options (voice, format, speed, etc.). - `ElevenLabsApi` and `ElevenLabsVoicesApi`: Low-level REST clients for ElevenLabs APIs. - DTOs: `Speech`, `TextToSpeechMessage`, `TextToSpeechPrompt`, `TextToSpeechResponse`. - **Auto-configuration:** - `ElevenLabsAutoConfiguration`, `ElevenLabsConnectionProperties`, and `ElevenLabsSpeechProperties`. - **Functionality:** - Text-to-speech conversion with ElevenLabs voices. - Real-time streaming playback support. - Flexible runtime configuration via properties and model options. - **Documentation:** Updated Spring AI reference guide with usage examples. - **Tests:** Includes unit and integration tests for both success and failure scenarios. Note: - Some `tts` package classes will be relocated to the `core` module to support shared TTS abstractions, including upcoming OpenAI Speech API support. - Added metadata support to `TextToSpeechResponse`. - Added tests and updated documentation. Signed-off-by: Alexandros Pappas <apappascs@gmail.com>
This commit is contained in:
committed by
Mark Pollack
parent
2be1e42505
commit
9398850c2b
@@ -0,0 +1,90 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-parent</artifactId>
|
||||
<version>1.1.0-SNAPSHOT</version>
|
||||
<relativePath>../../../pom.xml</relativePath>
|
||||
</parent>
|
||||
<artifactId>spring-ai-autoconfigure-model-elevenlabs</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Spring AI ElevenLabs Auto Configuration</name>
|
||||
<description>Spring AI ElevenLabs Auto Configuration</description>
|
||||
<url>https://github.com/spring-projects/spring-ai</url>
|
||||
|
||||
<scm>
|
||||
<url>https://github.com/spring-projects/spring-ai</url>
|
||||
<connection>git://github.com/spring-projects/spring-ai.git</connection>
|
||||
<developerConnection>git@github.com:spring-projects/spring-ai.git</developerConnection>
|
||||
</scm>
|
||||
|
||||
|
||||
<dependencies>
|
||||
|
||||
<!-- Spring AI dependencies -->
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-elevenlabs</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
|
||||
<!-- Spring AI auto configurations -->
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-autoconfigure-model-tool</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-autoconfigure-retry</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
</dependency>
|
||||
|
||||
<!-- Boot dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter</artifactId>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-configuration-processor</artifactId>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-autoconfigure-processor</artifactId>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
|
||||
<!-- Test dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-test</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.mockito</groupId>
|
||||
<artifactId>mockito-core</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
@@ -0,0 +1,79 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.model.elevenlabs.autoconfigure;
|
||||
|
||||
import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechModel;
|
||||
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
|
||||
import org.springframework.ai.retry.autoconfigure.SpringAiRetryAutoConfiguration;
|
||||
import org.springframework.beans.factory.ObjectProvider;
|
||||
import org.springframework.boot.autoconfigure.AutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
|
||||
import org.springframework.boot.autoconfigure.web.client.RestClientAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.web.reactive.function.client.WebClientAutoConfiguration;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.retry.support.RetryTemplate;
|
||||
import org.springframework.web.client.ResponseErrorHandler;
|
||||
import org.springframework.web.client.RestClient;
|
||||
import org.springframework.web.reactive.function.client.WebClient;
|
||||
|
||||
/**
|
||||
* {@link AutoConfiguration Auto-configuration} for ElevenLabs.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
@AutoConfiguration(after = { RestClientAutoConfiguration.class, SpringAiRetryAutoConfiguration.class,
|
||||
WebClientAutoConfiguration.class })
|
||||
@ConditionalOnClass(ElevenLabsApi.class)
|
||||
@EnableConfigurationProperties({ ElevenLabsSpeechProperties.class, ElevenLabsConnectionProperties.class })
|
||||
@ConditionalOnProperty(prefix = ElevenLabsSpeechProperties.CONFIG_PREFIX, name = "enabled", havingValue = "true",
|
||||
matchIfMissing = true)
|
||||
@ImportAutoConfiguration(classes = { SpringAiRetryAutoConfiguration.class, RestClientAutoConfiguration.class,
|
||||
WebClientAutoConfiguration.class })
|
||||
public class ElevenLabsAutoConfiguration {
|
||||
|
||||
@Bean
|
||||
@ConditionalOnMissingBean
|
||||
public ElevenLabsApi elevenLabsApi(ElevenLabsConnectionProperties connectionProperties,
|
||||
ObjectProvider<RestClient.Builder> restClientBuilderProvider,
|
||||
ObjectProvider<WebClient.Builder> webClientBuilderProvider, ResponseErrorHandler responseErrorHandler) {
|
||||
|
||||
return ElevenLabsApi.builder()
|
||||
.baseUrl(connectionProperties.getBaseUrl())
|
||||
.apiKey(connectionProperties.getApiKey())
|
||||
.restClientBuilder(restClientBuilderProvider.getIfAvailable(RestClient::builder))
|
||||
.webClientBuilder(webClientBuilderProvider.getIfAvailable(WebClient::builder))
|
||||
.responseErrorHandler(responseErrorHandler)
|
||||
.build();
|
||||
}
|
||||
|
||||
@Bean
|
||||
@ConditionalOnMissingBean
|
||||
public ElevenLabsTextToSpeechModel elevenLabsSpeechModel(ElevenLabsApi elevenLabsApi,
|
||||
ElevenLabsSpeechProperties speechProperties, RetryTemplate retryTemplate) {
|
||||
|
||||
return ElevenLabsTextToSpeechModel.builder()
|
||||
.elevenLabsApi(elevenLabsApi)
|
||||
.defaultOptions(speechProperties.getOptions())
|
||||
.retryTemplate(retryTemplate)
|
||||
.build();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.model.elevenlabs.autoconfigure;
|
||||
|
||||
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
/**
|
||||
* Configuration properties for the ElevenLabs API connection.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
@ConfigurationProperties(ElevenLabsConnectionProperties.CONFIG_PREFIX)
|
||||
public class ElevenLabsConnectionProperties {
|
||||
|
||||
public static final String CONFIG_PREFIX = "spring.ai.elevenlabs";
|
||||
|
||||
/**
|
||||
* ElevenLabs API access key.
|
||||
*/
|
||||
private String apiKey;
|
||||
|
||||
/**
|
||||
* ElevenLabs API base URL.
|
||||
*/
|
||||
private String baseUrl = ElevenLabsApi.DEFAULT_BASE_URL;
|
||||
|
||||
public String getApiKey() {
|
||||
return this.apiKey;
|
||||
}
|
||||
|
||||
public void setApiKey(String apiKey) {
|
||||
this.apiKey = apiKey;
|
||||
}
|
||||
|
||||
public String getBaseUrl() {
|
||||
return this.baseUrl;
|
||||
}
|
||||
|
||||
public void setBaseUrl(String baseUrl) {
|
||||
this.baseUrl = baseUrl;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,68 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.model.elevenlabs.autoconfigure;
|
||||
|
||||
import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechOptions;
|
||||
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.boot.context.properties.NestedConfigurationProperty;
|
||||
|
||||
/**
|
||||
* Configuration properties for the ElevenLabs Text-to-Speech API.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
@ConfigurationProperties(ElevenLabsSpeechProperties.CONFIG_PREFIX)
|
||||
public class ElevenLabsSpeechProperties {
|
||||
|
||||
public static final String CONFIG_PREFIX = "spring.ai.elevenlabs.tts";
|
||||
|
||||
public static final String DEFAULT_MODEL_ID = "eleven_turbo_v2_5";
|
||||
|
||||
private static final String DEFAULT_VOICE_ID = "9BWtsMINqrJLrRacOk9x";
|
||||
|
||||
private static final ElevenLabsApi.OutputFormat DEFAULT_OUTPUT_FORMAT = ElevenLabsApi.OutputFormat.MP3_22050_32;
|
||||
|
||||
/**
|
||||
* Enable ElevenLabs speech model.
|
||||
*/
|
||||
private boolean enabled = true;
|
||||
|
||||
@NestedConfigurationProperty
|
||||
private ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder()
|
||||
.modelId(DEFAULT_MODEL_ID)
|
||||
.voiceId(DEFAULT_VOICE_ID)
|
||||
.outputFormat(DEFAULT_OUTPUT_FORMAT.getValue())
|
||||
.build();
|
||||
|
||||
public ElevenLabsTextToSpeechOptions getOptions() {
|
||||
return this.options;
|
||||
}
|
||||
|
||||
public void setOptions(ElevenLabsTextToSpeechOptions options) {
|
||||
this.options = options;
|
||||
}
|
||||
|
||||
public boolean isEnabled() {
|
||||
return this.enabled;
|
||||
}
|
||||
|
||||
public void setEnabled(boolean enabled) {
|
||||
this.enabled = enabled;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,16 @@
|
||||
#
|
||||
# Copyright 2025-2025 the original author or authors.
|
||||
#
|
||||
# Licensed under the Apache License, Version 2.0 (the "License");
|
||||
# you may not use this file except in compliance with the License.
|
||||
# You may obtain a copy of the License at
|
||||
#
|
||||
# https://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing, software
|
||||
# distributed under the License is distributed on an "AS IS" BASIS,
|
||||
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
# See the License for the specific language governing permissions and
|
||||
# limitations under the License.
|
||||
#
|
||||
org.springframework.ai.model.elevenlabs.autoconfigure.elevenlabsChatAutoConfiguration
|
||||
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.model.elevenlabs.autoconfigure;
|
||||
|
||||
import java.util.Arrays;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
|
||||
|
||||
import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechModel;
|
||||
import org.springframework.boot.autoconfigure.AutoConfigurations;
|
||||
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
|
||||
|
||||
/**
|
||||
* Integration tests for the {@link ElevenLabsAutoConfiguration}.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".*")
|
||||
public class ElevenLabsAutoConfigurationIT {
|
||||
|
||||
private static final org.apache.commons.logging.Log logger = org.apache.commons.logging.LogFactory
|
||||
.getLog(ElevenLabsAutoConfigurationIT.class);
|
||||
|
||||
private final ApplicationContextRunner contextRunner = new ApplicationContextRunner()
|
||||
.withPropertyValues("spring.ai.elevenlabs.api-key=" + System.getenv("ELEVEN_LABS_API_KEY"))
|
||||
.withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class));
|
||||
|
||||
@Test
|
||||
void speech() {
|
||||
this.contextRunner.run(context -> {
|
||||
ElevenLabsTextToSpeechModel speechModel = context.getBean(ElevenLabsTextToSpeechModel.class);
|
||||
byte[] response = speechModel.call("H");
|
||||
assertThat(response).isNotNull();
|
||||
assertThat(verifyMp3FrameHeader(response))
|
||||
.withFailMessage("Expected MP3 frame header to be present in the response, but it was not found.")
|
||||
.isTrue();
|
||||
assertThat(response).isNotEmpty();
|
||||
|
||||
logger.debug("Response: " + Arrays.toString(response));
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
void speechStream() {
|
||||
this.contextRunner.run(context -> {
|
||||
ElevenLabsTextToSpeechModel speechModel = context.getBean(ElevenLabsTextToSpeechModel.class);
|
||||
byte[] response = speechModel.call("Hello");
|
||||
assertThat(response).isNotNull();
|
||||
assertThat(verifyMp3FrameHeader(response))
|
||||
.withFailMessage("Expected MP3 frame header to be present in the response, but it was not found.")
|
||||
.isTrue();
|
||||
assertThat(response).isNotEmpty();
|
||||
|
||||
logger.debug("Response: " + Arrays.toString(response));
|
||||
});
|
||||
}
|
||||
|
||||
public boolean verifyMp3FrameHeader(byte[] audioResponse) {
|
||||
if (audioResponse == null || audioResponse.length < 3) {
|
||||
return false;
|
||||
}
|
||||
// Accept ID3 tag (MP3 metadata) or MP3 frame header
|
||||
boolean hasId3 = audioResponse[0] == 'I' && audioResponse[1] == 'D' && audioResponse[2] == '3';
|
||||
boolean hasFrame = (audioResponse[0] & 0xFF) == 0xFF && (audioResponse[1] & 0xE0) == 0xE0;
|
||||
return hasId3 || hasFrame;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,141 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.model.elevenlabs.autoconfigure;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechModel;
|
||||
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
|
||||
import org.springframework.boot.autoconfigure.AutoConfigurations;
|
||||
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
|
||||
|
||||
/**
|
||||
* Tests for the {@link ElevenLabsSpeechProperties} and
|
||||
* {@link ElevenLabsConnectionProperties}.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public class ElevenLabsPropertiesTests {
|
||||
|
||||
@Test
|
||||
public void connectionProperties() {
|
||||
new ApplicationContextRunner().withPropertyValues(
|
||||
// @formatter:off
|
||||
"spring.ai.elevenlabs.api-key=YOUR_API_KEY",
|
||||
"spring.ai.elevenlabs.base-url=https://custom.api.elevenlabs.io",
|
||||
"spring.ai.elevenlabs.tts.options.model-id=custom-model",
|
||||
"spring.ai.elevenlabs.tts.options.voice=custom-voice",
|
||||
"spring.ai.elevenlabs.tts.options.voice-settings.stability=0.6",
|
||||
"spring.ai.elevenlabs.tts.options.voice-settings.similarity-boost=0.8",
|
||||
"spring.ai.elevenlabs.tts.options.voice-settings.style=0.2",
|
||||
"spring.ai.elevenlabs.tts.options.voice-settings.use-speaker-boost=false",
|
||||
"spring.ai.elevenlabs.tts.options.voice-settings.speed=1.5"
|
||||
// @formatter:on
|
||||
).withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)).run(context -> {
|
||||
var speechProperties = context.getBean(ElevenLabsSpeechProperties.class);
|
||||
var connectionProperties = context.getBean(ElevenLabsConnectionProperties.class);
|
||||
|
||||
assertThat(connectionProperties.getApiKey()).isEqualTo("YOUR_API_KEY");
|
||||
assertThat(connectionProperties.getBaseUrl()).isEqualTo("https://custom.api.elevenlabs.io");
|
||||
|
||||
assertThat(speechProperties.getOptions().getModelId()).isEqualTo("custom-model");
|
||||
assertThat(speechProperties.getOptions().getVoice()).isEqualTo("custom-voice");
|
||||
assertThat(speechProperties.getOptions().getVoiceSettings().stability()).isEqualTo(0.6);
|
||||
assertThat(speechProperties.getOptions().getVoiceSettings().similarityBoost()).isEqualTo(0.8);
|
||||
assertThat(speechProperties.getOptions().getVoiceSettings().style()).isEqualTo(0.2);
|
||||
assertThat(speechProperties.getOptions().getVoiceSettings().useSpeakerBoost()).isFalse();
|
||||
assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(1.5f);
|
||||
|
||||
// enabled is true by default
|
||||
assertThat(speechProperties.isEnabled()).isTrue();
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void speechOptionsTest() {
|
||||
new ApplicationContextRunner().withPropertyValues(
|
||||
// @formatter:off
|
||||
"spring.ai.elevenlabs.api-key=YOUR_API_KEY",
|
||||
"spring.ai.elevenlabs.tts.options.model-id=custom-model",
|
||||
"spring.ai.elevenlabs.tts.options.voice=custom-voice",
|
||||
"spring.ai.elevenlabs.tts.options.format=pcm_44100",
|
||||
"spring.ai.elevenlabs.tts.options.voice-settings.stability=0.6",
|
||||
"spring.ai.elevenlabs.tts.options.voice-settings.similarity-boost=0.8",
|
||||
"spring.ai.elevenlabs.tts.options.voice-settings.style=0.2",
|
||||
"spring.ai.elevenlabs.tts.options.voice-settings.use-speaker-boost=false",
|
||||
"spring.ai.elevenlabs.tts.options.voice-settings.speed=1.2",
|
||||
"spring.ai.elevenlabs.tts.options.language-code=en",
|
||||
"spring.ai.elevenlabs.tts.options.seed=12345",
|
||||
"spring.ai.elevenlabs.tts.options.previous-text=previous",
|
||||
"spring.ai.elevenlabs.tts.options.next-text=next",
|
||||
"spring.ai.elevenlabs.tts.options.apply-text-normalization=ON",
|
||||
"spring.ai.elevenlabs.tts.options.apply-language-text-normalization=true"
|
||||
// @formatter:on
|
||||
).withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)).run(context -> {
|
||||
var speechProperties = context.getBean(ElevenLabsSpeechProperties.class);
|
||||
|
||||
assertThat(speechProperties.getOptions().getModelId()).isEqualTo("custom-model");
|
||||
assertThat(speechProperties.getOptions().getVoice()).isEqualTo("custom-voice");
|
||||
assertThat(speechProperties.getOptions().getFormat()).isEqualTo("pcm_44100");
|
||||
assertThat(speechProperties.getOptions().getVoiceSettings().stability()).isEqualTo(0.6);
|
||||
assertThat(speechProperties.getOptions().getVoiceSettings().similarityBoost()).isEqualTo(0.8);
|
||||
assertThat(speechProperties.getOptions().getVoiceSettings().style()).isEqualTo(0.2);
|
||||
assertThat(speechProperties.getOptions().getVoiceSettings().useSpeakerBoost()).isFalse();
|
||||
assertThat(speechProperties.getOptions().getVoiceSettings().speed()).isEqualTo(1.2);
|
||||
assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(1.2);
|
||||
assertThat(speechProperties.getOptions().getLanguageCode()).isEqualTo("en");
|
||||
assertThat(speechProperties.getOptions().getSeed()).isEqualTo(12345);
|
||||
assertThat(speechProperties.getOptions().getPreviousText()).isEqualTo("previous");
|
||||
assertThat(speechProperties.getOptions().getNextText()).isEqualTo("next");
|
||||
assertThat(speechProperties.getOptions().getApplyTextNormalization())
|
||||
.isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON);
|
||||
assertThat(speechProperties.getOptions().getApplyLanguageTextNormalization()).isTrue();
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
public void speechActivation() {
|
||||
|
||||
// It is enabled by default
|
||||
new ApplicationContextRunner().withPropertyValues("spring.ai.elevenlabs.api-key=YOUR_API_KEY")
|
||||
.withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class))
|
||||
.run(context -> {
|
||||
assertThat(context.getBeansOfType(ElevenLabsSpeechProperties.class)).isNotEmpty();
|
||||
assertThat(context.getBeansOfType(ElevenLabsTextToSpeechModel.class)).isNotEmpty();
|
||||
});
|
||||
|
||||
// Explicitly enable the text-to-speech autoconfiguration.
|
||||
new ApplicationContextRunner()
|
||||
.withPropertyValues("spring.ai.elevenlabs.api-key=YOUR_API_KEY", "spring.ai.elevenlabs.tts.enabled=true")
|
||||
.withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class))
|
||||
.run(context -> {
|
||||
assertThat(context.getBeansOfType(ElevenLabsSpeechProperties.class)).isNotEmpty();
|
||||
assertThat(context.getBeansOfType(ElevenLabsTextToSpeechModel.class)).isNotEmpty();
|
||||
});
|
||||
|
||||
// Explicitly disable the text-to-speech autoconfiguration.
|
||||
new ApplicationContextRunner()
|
||||
.withPropertyValues("spring.ai.elevenlabs.api-key=YOUR_API_KEY", "spring.ai.elevenlabs.tts.enabled=false")
|
||||
.withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class))
|
||||
.run(context -> {
|
||||
assertThat(context.getBeansOfType(ElevenLabsSpeechProperties.class)).isEmpty();
|
||||
assertThat(context.getBeansOfType(ElevenLabsTextToSpeechModel.class)).isEmpty();
|
||||
});
|
||||
}
|
||||
|
||||
}
|
||||
3
models/spring-ai-elevenlabs/README.md
Normal file
3
models/spring-ai-elevenlabs/README.md
Normal file
@@ -0,0 +1,3 @@
|
||||
# Spring AI - ElevenLabs Text-to-Speech
|
||||
|
||||
[ElevenLabs Text-to-Speech Documentation](https://docs.spring.io/spring-ai/reference/api/audio/speech/elevenlabs-speech.html)
|
||||
92
models/spring-ai-elevenlabs/pom.xml
Normal file
92
models/spring-ai-elevenlabs/pom.xml
Normal file
@@ -0,0 +1,92 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-parent</artifactId>
|
||||
<version>1.1.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
|
||||
<artifactId>spring-ai-elevenlabs</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Spring AI Model - ElevenLabs</name>
|
||||
<description>ElevenLabs Text-to-Speech model support</description>
|
||||
<url>https://github.com/spring-projects/spring-ai</url>
|
||||
|
||||
<scm>
|
||||
<url>https://github.com/spring-projects/spring-ai</url>
|
||||
<connection>git://github.com/spring-projects/spring-ai.git</connection>
|
||||
<developerConnection>git@github.com:spring-projects/spring-ai.git</developerConnection>
|
||||
</scm>
|
||||
|
||||
<properties>
|
||||
<!-- ElevenLabs-specific properties here, if needed -->
|
||||
</properties>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<!-- production dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-model</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-retry</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.rest-assured</groupId>
|
||||
<artifactId>json-path</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework</groupId>
|
||||
<artifactId>spring-context-support</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework</groupId>
|
||||
<artifactId>spring-webflux</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- test dependencies -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-test</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.micrometer</groupId>
|
||||
<artifactId>micrometer-observation-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.fasterxml.jackson.dataformat</groupId>
|
||||
<artifactId>jackson-dataformat-xml</artifactId>
|
||||
<version>2.11.1</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>io.projectreactor</groupId>
|
||||
<artifactId>reactor-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
@@ -0,0 +1,219 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.elevenlabs;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import reactor.core.publisher.Flux;
|
||||
|
||||
import org.springframework.ai.audio.tts.Speech;
|
||||
import org.springframework.ai.audio.tts.StreamingTextToSpeechModel;
|
||||
import org.springframework.ai.audio.tts.TextToSpeechModel;
|
||||
import org.springframework.ai.audio.tts.TextToSpeechPrompt;
|
||||
import org.springframework.ai.audio.tts.TextToSpeechResponse;
|
||||
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
|
||||
import org.springframework.ai.retry.RetryUtils;
|
||||
import org.springframework.retry.support.RetryTemplate;
|
||||
import org.springframework.util.Assert;
|
||||
import org.springframework.util.LinkedMultiValueMap;
|
||||
import org.springframework.util.MultiValueMap;
|
||||
|
||||
/**
|
||||
* Implementation of the {@link TextToSpeechModel} and {@link StreamingTextToSpeechModel}
|
||||
* interfaces
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public class ElevenLabsTextToSpeechModel implements TextToSpeechModel, StreamingTextToSpeechModel {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private final ElevenLabsApi elevenLabsApi;
|
||||
|
||||
private final RetryTemplate retryTemplate;
|
||||
|
||||
private final ElevenLabsTextToSpeechOptions defaultOptions;
|
||||
|
||||
public ElevenLabsTextToSpeechModel(ElevenLabsApi elevenLabsApi, ElevenLabsTextToSpeechOptions defaultOptions) {
|
||||
this(elevenLabsApi, defaultOptions, RetryUtils.DEFAULT_RETRY_TEMPLATE);
|
||||
}
|
||||
|
||||
public ElevenLabsTextToSpeechModel(ElevenLabsApi elevenLabsApi, ElevenLabsTextToSpeechOptions defaultOptions,
|
||||
RetryTemplate retryTemplate) {
|
||||
Assert.notNull(elevenLabsApi, "ElevenLabsApi must not be null");
|
||||
Assert.notNull(defaultOptions, "ElevenLabsSpeechOptions must not be null");
|
||||
Assert.notNull(retryTemplate, "RetryTemplate must not be null");
|
||||
|
||||
this.elevenLabsApi = elevenLabsApi;
|
||||
this.defaultOptions = defaultOptions;
|
||||
this.retryTemplate = retryTemplate;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
@Override
|
||||
public TextToSpeechResponse call(TextToSpeechPrompt prompt) {
|
||||
RequestContext requestContext = prepareRequest(prompt);
|
||||
|
||||
byte[] audioData = retryTemplate.execute(context -> {
|
||||
var response = elevenLabsApi.textToSpeech(requestContext.request, requestContext.voiceId,
|
||||
requestContext.queryParameters);
|
||||
if (response.getBody() == null) {
|
||||
logger.warn("No speech response returned for request: {}", requestContext.request);
|
||||
return new byte[0];
|
||||
}
|
||||
return response.getBody();
|
||||
});
|
||||
|
||||
return new TextToSpeechResponse(List.of(new Speech(audioData)));
|
||||
}
|
||||
|
||||
@Override
|
||||
public Flux<TextToSpeechResponse> stream(TextToSpeechPrompt prompt) {
|
||||
RequestContext requestContext = prepareRequest(prompt);
|
||||
|
||||
return retryTemplate.execute(context -> elevenLabsApi
|
||||
.textToSpeechStream(requestContext.request, requestContext.voiceId, requestContext.queryParameters)
|
||||
.map(entity -> new TextToSpeechResponse(List.of(new Speech(entity.getBody())))));
|
||||
}
|
||||
|
||||
private RequestContext prepareRequest(TextToSpeechPrompt prompt) {
|
||||
ElevenLabsApi.SpeechRequest request = createRequest(prompt);
|
||||
ElevenLabsTextToSpeechOptions options = getOptions(prompt);
|
||||
String voiceId = options.getVoice();
|
||||
MultiValueMap<String, String> queryParameters = buildQueryParameters(options);
|
||||
|
||||
return new RequestContext(request, voiceId, queryParameters);
|
||||
}
|
||||
|
||||
private record RequestContext(ElevenLabsApi.SpeechRequest request, String voiceId,
|
||||
MultiValueMap<String, String> queryParameters) {
|
||||
}
|
||||
|
||||
private MultiValueMap<String, String> buildQueryParameters(ElevenLabsTextToSpeechOptions options) {
|
||||
MultiValueMap<String, String> queryParameters = new LinkedMultiValueMap<>();
|
||||
if (options.getEnableLogging() != null) {
|
||||
queryParameters.add("enable_logging", options.getEnableLogging().toString());
|
||||
}
|
||||
if (options.getFormat() != null) {
|
||||
queryParameters.add("output_format", options.getFormat());
|
||||
}
|
||||
return queryParameters;
|
||||
}
|
||||
|
||||
private ElevenLabsApi.SpeechRequest createRequest(TextToSpeechPrompt prompt) {
|
||||
ElevenLabsTextToSpeechOptions options = getOptions(prompt);
|
||||
|
||||
String voiceId = options.getVoice();
|
||||
Assert.notNull(voiceId, "A voiceId must be specified in the ElevenLabsSpeechOptions.");
|
||||
|
||||
String text = prompt.getInstructions().getText();
|
||||
Assert.hasText(text, "Prompt must contain text to convert to speech.");
|
||||
|
||||
return ElevenLabsApi.SpeechRequest.builder()
|
||||
.text(text)
|
||||
.modelId(options.getModelId())
|
||||
.voiceSettings(options.getVoiceSettings())
|
||||
.languageCode(options.getLanguageCode())
|
||||
.pronunciationDictionaryLocators(options.getPronunciationDictionaryLocators())
|
||||
.seed(options.getSeed())
|
||||
.previousText(options.getPreviousText())
|
||||
.nextText(options.getNextText())
|
||||
.previousRequestIds(options.getPreviousRequestIds())
|
||||
.nextRequestIds(options.getNextRequestIds())
|
||||
.applyTextNormalization(options.getApplyTextNormalization())
|
||||
.applyLanguageTextNormalization(options.getApplyLanguageTextNormalization())
|
||||
.build();
|
||||
}
|
||||
|
||||
private ElevenLabsTextToSpeechOptions getOptions(TextToSpeechPrompt prompt) {
|
||||
ElevenLabsTextToSpeechOptions runtimeOptions = (prompt
|
||||
.getOptions() instanceof ElevenLabsTextToSpeechOptions elevenLabsSpeechOptions) ? elevenLabsSpeechOptions
|
||||
: null;
|
||||
return (runtimeOptions != null) ? merge(runtimeOptions, this.defaultOptions) : this.defaultOptions;
|
||||
}
|
||||
|
||||
private ElevenLabsTextToSpeechOptions merge(ElevenLabsTextToSpeechOptions runtimeOptions,
|
||||
ElevenLabsTextToSpeechOptions defaultOptions) {
|
||||
return ElevenLabsTextToSpeechOptions.builder()
|
||||
.modelId(getOrDefault(runtimeOptions.getModelId(), defaultOptions.getModelId()))
|
||||
.voice(getOrDefault(runtimeOptions.getVoice(), defaultOptions.getVoice()))
|
||||
.voiceId(getOrDefault(runtimeOptions.getVoiceId(), defaultOptions.getVoiceId()))
|
||||
.format(getOrDefault(runtimeOptions.getFormat(), defaultOptions.getFormat()))
|
||||
.outputFormat(getOrDefault(runtimeOptions.getOutputFormat(), defaultOptions.getOutputFormat()))
|
||||
.voiceSettings(getOrDefault(runtimeOptions.getVoiceSettings(), defaultOptions.getVoiceSettings()))
|
||||
.languageCode(getOrDefault(runtimeOptions.getLanguageCode(), defaultOptions.getLanguageCode()))
|
||||
.pronunciationDictionaryLocators(getOrDefault(runtimeOptions.getPronunciationDictionaryLocators(),
|
||||
defaultOptions.getPronunciationDictionaryLocators()))
|
||||
.seed(getOrDefault(runtimeOptions.getSeed(), defaultOptions.getSeed()))
|
||||
.previousText(getOrDefault(runtimeOptions.getPreviousText(), defaultOptions.getPreviousText()))
|
||||
.nextText(getOrDefault(runtimeOptions.getNextText(), defaultOptions.getNextText()))
|
||||
.previousRequestIds(
|
||||
getOrDefault(runtimeOptions.getPreviousRequestIds(), defaultOptions.getPreviousRequestIds()))
|
||||
.nextRequestIds(getOrDefault(runtimeOptions.getNextRequestIds(), defaultOptions.getNextRequestIds()))
|
||||
.applyTextNormalization(getOrDefault(runtimeOptions.getApplyTextNormalization(),
|
||||
defaultOptions.getApplyTextNormalization()))
|
||||
.applyLanguageTextNormalization(getOrDefault(runtimeOptions.getApplyLanguageTextNormalization(),
|
||||
defaultOptions.getApplyLanguageTextNormalization()))
|
||||
.build();
|
||||
}
|
||||
|
||||
private <T> T getOrDefault(T runtimeValue, T defaultValue) {
|
||||
return runtimeValue != null ? runtimeValue : defaultValue;
|
||||
}
|
||||
|
||||
@Override
|
||||
public ElevenLabsTextToSpeechOptions getDefaultOptions() {
|
||||
return this.defaultOptions;
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private ElevenLabsApi elevenLabsApi;
|
||||
|
||||
private RetryTemplate retryTemplate = RetryUtils.DEFAULT_RETRY_TEMPLATE;
|
||||
|
||||
private ElevenLabsTextToSpeechOptions defaultOptions = ElevenLabsTextToSpeechOptions.builder().build();
|
||||
|
||||
public Builder elevenLabsApi(ElevenLabsApi elevenLabsApi) {
|
||||
this.elevenLabsApi = elevenLabsApi;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder retryTemplate(RetryTemplate retryTemplate) {
|
||||
this.retryTemplate = retryTemplate;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder defaultOptions(ElevenLabsTextToSpeechOptions defaultOptions) {
|
||||
this.defaultOptions = defaultOptions;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ElevenLabsTextToSpeechModel build() {
|
||||
Assert.notNull(elevenLabsApi, "ElevenLabsApi must not be null");
|
||||
Assert.notNull(defaultOptions, "ElevenLabsSpeechOptions must not be null");
|
||||
return new ElevenLabsTextToSpeechModel(elevenLabsApi, defaultOptions, retryTemplate);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,439 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.elevenlabs;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonIgnore;
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
|
||||
import org.springframework.ai.audio.tts.TextToSpeechOptions;
|
||||
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
|
||||
|
||||
/**
|
||||
* Options for ElevenLabs text-to-speech.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class ElevenLabsTextToSpeechOptions implements TextToSpeechOptions {
|
||||
|
||||
@JsonProperty("model_id")
|
||||
private String modelId;
|
||||
|
||||
// Path Params
|
||||
@JsonProperty("voice_id")
|
||||
private String voiceId;
|
||||
|
||||
// End Path Params
|
||||
|
||||
// Query Params
|
||||
@JsonProperty("enable_logging")
|
||||
private Boolean enableLogging;
|
||||
|
||||
@JsonProperty("output_format")
|
||||
private String outputFormat;
|
||||
|
||||
// End Query Params
|
||||
|
||||
@JsonProperty("voice_settings")
|
||||
private ElevenLabsApi.SpeechRequest.VoiceSettings voiceSettings;
|
||||
|
||||
@JsonProperty("language_code")
|
||||
private String languageCode;
|
||||
|
||||
@JsonProperty("pronunciation_dictionary_locators")
|
||||
private List<ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator> pronunciationDictionaryLocators;
|
||||
|
||||
@JsonProperty("seed")
|
||||
private Integer seed;
|
||||
|
||||
@JsonProperty("previous_text")
|
||||
private String previousText;
|
||||
|
||||
@JsonProperty("next_text")
|
||||
private String nextText;
|
||||
|
||||
@JsonProperty("previous_request_ids")
|
||||
private List<String> previousRequestIds;
|
||||
|
||||
@JsonProperty("next_request_ids")
|
||||
private List<String> nextRequestIds;
|
||||
|
||||
@JsonProperty("apply_text_normalization")
|
||||
private ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization;
|
||||
|
||||
@JsonProperty("apply_language_text_normalization")
|
||||
private Boolean applyLanguageTextNormalization;
|
||||
|
||||
public static Builder builder() {
|
||||
return new ElevenLabsTextToSpeechOptions.Builder();
|
||||
}
|
||||
|
||||
@Override
|
||||
@JsonIgnore
|
||||
public String getModel() {
|
||||
return getModelId();
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public void setModel(String model) {
|
||||
setModelId(model);
|
||||
}
|
||||
|
||||
public String getModelId() {
|
||||
return this.modelId;
|
||||
}
|
||||
|
||||
public void setModelId(String modelId) {
|
||||
this.modelId = modelId;
|
||||
}
|
||||
|
||||
@Override
|
||||
@JsonIgnore
|
||||
public String getVoice() {
|
||||
return getVoiceId();
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public void setVoice(String voice) {
|
||||
setVoiceId(voice);
|
||||
}
|
||||
|
||||
public String getVoiceId() {
|
||||
return this.voiceId;
|
||||
}
|
||||
|
||||
public void setVoiceId(String voiceId) {
|
||||
this.voiceId = voiceId;
|
||||
}
|
||||
|
||||
public Boolean getEnableLogging() {
|
||||
return this.enableLogging;
|
||||
}
|
||||
|
||||
public void setEnableLogging(Boolean enableLogging) {
|
||||
this.enableLogging = enableLogging;
|
||||
}
|
||||
|
||||
@Override
|
||||
@JsonIgnore
|
||||
public String getFormat() {
|
||||
return getOutputFormat();
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public void setFormat(String format) {
|
||||
setOutputFormat(format);
|
||||
}
|
||||
|
||||
public String getOutputFormat() {
|
||||
return this.outputFormat;
|
||||
}
|
||||
|
||||
public void setOutputFormat(String outputFormat) {
|
||||
this.outputFormat = outputFormat;
|
||||
}
|
||||
|
||||
@Override
|
||||
@JsonIgnore
|
||||
public Double getSpeed() {
|
||||
if (this.getVoiceSettings() != null) {
|
||||
return this.getVoiceSettings().speed();
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
@JsonIgnore
|
||||
public void setSpeed(Double speed) {
|
||||
if (speed != null) {
|
||||
if (this.getVoiceSettings() == null) {
|
||||
this.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(null, null, null, null, speed));
|
||||
}
|
||||
else {
|
||||
this.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(this.getVoiceSettings().stability(),
|
||||
this.getVoiceSettings().similarityBoost(), this.getVoiceSettings().style(),
|
||||
this.getVoiceSettings().useSpeakerBoost(), speed));
|
||||
}
|
||||
}
|
||||
else {
|
||||
if (this.getVoiceSettings() != null) {
|
||||
this.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(this.getVoiceSettings().stability(),
|
||||
this.getVoiceSettings().similarityBoost(), this.getVoiceSettings().style(),
|
||||
this.getVoiceSettings().useSpeakerBoost(), null));
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public ElevenLabsApi.SpeechRequest.VoiceSettings getVoiceSettings() {
|
||||
return this.voiceSettings;
|
||||
}
|
||||
|
||||
public void setVoiceSettings(ElevenLabsApi.SpeechRequest.VoiceSettings voiceSettings) {
|
||||
this.voiceSettings = voiceSettings;
|
||||
}
|
||||
|
||||
public String getLanguageCode() {
|
||||
return this.languageCode;
|
||||
}
|
||||
|
||||
public void setLanguageCode(String languageCode) {
|
||||
this.languageCode = languageCode;
|
||||
}
|
||||
|
||||
public List<ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator> getPronunciationDictionaryLocators() {
|
||||
return this.pronunciationDictionaryLocators;
|
||||
}
|
||||
|
||||
public void setPronunciationDictionaryLocators(
|
||||
List<ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator> pronunciationDictionaryLocators) {
|
||||
this.pronunciationDictionaryLocators = pronunciationDictionaryLocators;
|
||||
}
|
||||
|
||||
public Integer getSeed() {
|
||||
return this.seed;
|
||||
}
|
||||
|
||||
public void setSeed(Integer seed) {
|
||||
this.seed = seed;
|
||||
}
|
||||
|
||||
public String getPreviousText() {
|
||||
return this.previousText;
|
||||
}
|
||||
|
||||
public void setPreviousText(String previousText) {
|
||||
this.previousText = previousText;
|
||||
}
|
||||
|
||||
public String getNextText() {
|
||||
return this.nextText;
|
||||
}
|
||||
|
||||
public void setNextText(String nextText) {
|
||||
this.nextText = nextText;
|
||||
}
|
||||
|
||||
public List<String> getPreviousRequestIds() {
|
||||
return this.previousRequestIds;
|
||||
}
|
||||
|
||||
public void setPreviousRequestIds(List<String> previousRequestIds) {
|
||||
this.previousRequestIds = previousRequestIds;
|
||||
}
|
||||
|
||||
public List<String> getNextRequestIds() {
|
||||
return this.nextRequestIds;
|
||||
}
|
||||
|
||||
public void setNextRequestIds(List<String> nextRequestIds) {
|
||||
this.nextRequestIds = nextRequestIds;
|
||||
}
|
||||
|
||||
public ElevenLabsApi.SpeechRequest.TextNormalizationMode getApplyTextNormalization() {
|
||||
return this.applyTextNormalization;
|
||||
}
|
||||
|
||||
public void setApplyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization) {
|
||||
this.applyTextNormalization = applyTextNormalization;
|
||||
}
|
||||
|
||||
public Boolean getApplyLanguageTextNormalization() {
|
||||
return this.applyLanguageTextNormalization;
|
||||
}
|
||||
|
||||
public void setApplyLanguageTextNormalization(Boolean applyLanguageTextNormalization) {
|
||||
this.applyLanguageTextNormalization = applyLanguageTextNormalization;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (!(o instanceof ElevenLabsTextToSpeechOptions that))
|
||||
return false;
|
||||
return Objects.equals(modelId, that.modelId) && Objects.equals(voiceId, that.voiceId)
|
||||
&& Objects.equals(outputFormat, that.outputFormat) && Objects.equals(voiceSettings, that.voiceSettings)
|
||||
&& Objects.equals(languageCode, that.languageCode)
|
||||
&& Objects.equals(pronunciationDictionaryLocators, that.pronunciationDictionaryLocators)
|
||||
&& Objects.equals(seed, that.seed) && Objects.equals(previousText, that.previousText)
|
||||
&& Objects.equals(nextText, that.nextText)
|
||||
&& Objects.equals(previousRequestIds, that.previousRequestIds)
|
||||
&& Objects.equals(applyTextNormalization, that.applyTextNormalization)
|
||||
&& Objects.equals(nextRequestIds, that.nextRequestIds)
|
||||
&& Objects.equals(applyLanguageTextNormalization, that.applyLanguageTextNormalization);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(modelId, voiceId, outputFormat, voiceSettings, languageCode,
|
||||
pronunciationDictionaryLocators, seed, previousText, nextText, previousRequestIds, nextRequestIds,
|
||||
applyTextNormalization, applyLanguageTextNormalization);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "ElevenLabsSpeechOptions{" + "modelId='" + modelId + '\'' + ", voiceId='" + voiceId + '\''
|
||||
+ ", outputFormat='" + outputFormat + '\'' + ", voiceSettings=" + voiceSettings + ", languageCode='"
|
||||
+ languageCode + '\'' + ", pronunciationDictionaryLocators=" + pronunciationDictionaryLocators
|
||||
+ ", seed=" + seed + ", previousText='" + previousText + '\'' + ", nextText='" + nextText + '\''
|
||||
+ ", previousRequestIds=" + previousRequestIds + ", nextRequestIds=" + nextRequestIds
|
||||
+ ", applyTextNormalization=" + applyTextNormalization + ", applyLanguageTextNormalization="
|
||||
+ applyLanguageTextNormalization + '}';
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public ElevenLabsTextToSpeechOptions copy() {
|
||||
return ElevenLabsTextToSpeechOptions.builder()
|
||||
.modelId(this.getModelId())
|
||||
.voice(this.getVoice())
|
||||
.voiceId(this.getVoiceId())
|
||||
.format(this.getFormat())
|
||||
.outputFormat(this.getOutputFormat())
|
||||
.voiceSettings(this.getVoiceSettings())
|
||||
.languageCode(this.getLanguageCode())
|
||||
.pronunciationDictionaryLocators(this.getPronunciationDictionaryLocators())
|
||||
.seed(this.getSeed())
|
||||
.previousText(this.getPreviousText())
|
||||
.nextText(this.getNextText())
|
||||
.previousRequestIds(this.getPreviousRequestIds())
|
||||
.nextRequestIds(this.getNextRequestIds())
|
||||
.applyTextNormalization(this.getApplyTextNormalization())
|
||||
.applyLanguageTextNormalization(this.getApplyLanguageTextNormalization())
|
||||
.build();
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private final ElevenLabsTextToSpeechOptions options = new ElevenLabsTextToSpeechOptions();
|
||||
|
||||
/**
|
||||
* Sets the model ID using the generic 'model' property. This is an alias for
|
||||
* {@link #modelId(String)}.
|
||||
* @param model The model ID to use.
|
||||
* @return this builder.
|
||||
*/
|
||||
public Builder model(String model) {
|
||||
options.setModel(model);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the model ID using the ElevenLabs specific 'modelId' property. This is an
|
||||
* alias for {@link #model(String)}.
|
||||
* @param modelId The model ID to use.
|
||||
* @return this builder.
|
||||
*/
|
||||
public Builder modelId(String modelId) {
|
||||
options.setModelId(modelId);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the voice ID using the generic 'voice' property. This is an alias for
|
||||
* {@link #voiceId(String)}.
|
||||
* @param voice The voice ID to use.
|
||||
* @return this builder.
|
||||
*/
|
||||
public Builder voice(String voice) {
|
||||
options.setVoice(voice);
|
||||
return this;
|
||||
}
|
||||
|
||||
/**
|
||||
* Sets the voice ID using the ElevenLabs specific 'voiceId' property. This is an
|
||||
* alias for {@link #voice(String)}.
|
||||
* @param voiceId The voice ID to use.
|
||||
* @return this builder.
|
||||
*/
|
||||
public Builder voiceId(String voiceId) {
|
||||
options.setVoiceId(voiceId);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder format(String format) {
|
||||
options.setFormat(format);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder outputFormat(String outputFormat) {
|
||||
options.setOutputFormat(outputFormat);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder voiceSettings(ElevenLabsApi.SpeechRequest.VoiceSettings voiceSettings) {
|
||||
options.setVoiceSettings(voiceSettings);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder languageCode(String languageCode) {
|
||||
options.setLanguageCode(languageCode);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder pronunciationDictionaryLocators(
|
||||
List<ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator> pronunciationDictionaryLocators) {
|
||||
options.setPronunciationDictionaryLocators(pronunciationDictionaryLocators);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder seed(Integer seed) {
|
||||
options.setSeed(seed);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder previousText(String previousText) {
|
||||
options.setPreviousText(previousText);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder nextText(String nextText) {
|
||||
options.setNextText(nextText);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder previousRequestIds(List<String> previousRequestIds) {
|
||||
options.setPreviousRequestIds(previousRequestIds);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder nextRequestIds(List<String> nextRequestIds) {
|
||||
options.setNextRequestIds(nextRequestIds);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder applyTextNormalization(
|
||||
ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization) {
|
||||
options.setApplyTextNormalization(applyTextNormalization);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder applyLanguageTextNormalization(Boolean applyLanguageTextNormalization) {
|
||||
options.setApplyLanguageTextNormalization(applyLanguageTextNormalization);
|
||||
return this;
|
||||
}
|
||||
|
||||
public ElevenLabsTextToSpeechOptions build() {
|
||||
return this.options;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.elevenlabs.aot;
|
||||
|
||||
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
|
||||
import org.springframework.aot.hint.MemberCategory;
|
||||
import org.springframework.aot.hint.RuntimeHints;
|
||||
import org.springframework.aot.hint.RuntimeHintsRegistrar;
|
||||
import org.springframework.lang.NonNull;
|
||||
import org.springframework.lang.Nullable;
|
||||
|
||||
import static org.springframework.ai.aot.AiRuntimeHints.findJsonAnnotatedClassesInPackage;
|
||||
|
||||
/**
|
||||
* The ElevenLabsRuntimeHints class is responsible for registering runtime hints for
|
||||
* ElevenLabs API classes.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public class ElevenLabsRuntimeHints implements RuntimeHintsRegistrar {
|
||||
|
||||
@Override
|
||||
public void registerHints(@NonNull RuntimeHints hints, @Nullable ClassLoader classLoader) {
|
||||
var mcs = MemberCategory.values();
|
||||
for (var tr : findJsonAnnotatedClassesInPackage(ElevenLabsApi.class)) {
|
||||
hints.reflection().registerType(tr, mcs);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,391 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.elevenlabs.api;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonValue;
|
||||
import reactor.core.publisher.Flux;
|
||||
import reactor.core.publisher.Mono;
|
||||
|
||||
import org.springframework.ai.model.ApiKey;
|
||||
import org.springframework.ai.model.NoopApiKey;
|
||||
import org.springframework.ai.model.SimpleApiKey;
|
||||
import org.springframework.ai.retry.RetryUtils;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.util.Assert;
|
||||
import org.springframework.util.LinkedMultiValueMap;
|
||||
import org.springframework.util.MultiValueMap;
|
||||
import org.springframework.web.client.ResponseErrorHandler;
|
||||
import org.springframework.web.client.RestClient;
|
||||
import org.springframework.web.reactive.function.client.WebClient;
|
||||
import org.springframework.web.util.UriComponentsBuilder;
|
||||
|
||||
/**
|
||||
* Client for the ElevenLabs Text-to-Speech API.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public class ElevenLabsApi {
|
||||
|
||||
public static final String DEFAULT_BASE_URL = "https://api.elevenlabs.io";
|
||||
|
||||
private final RestClient restClient;
|
||||
|
||||
private final WebClient webClient;
|
||||
|
||||
/**
|
||||
* Create a new ElevenLabs API client.
|
||||
* @param baseUrl The base URL for the ElevenLabs API.
|
||||
* @param apiKey Your ElevenLabs API key.
|
||||
* @param headers the http headers to use.
|
||||
* @param restClientBuilder A builder for the Spring RestClient.
|
||||
* @param webClientBuilder A builder for the Spring WebClient.
|
||||
* @param responseErrorHandler A custom error handler for API responses.
|
||||
*/
|
||||
private ElevenLabsApi(String baseUrl, ApiKey apiKey, MultiValueMap<String, String> headers,
|
||||
RestClient.Builder restClientBuilder, WebClient.Builder webClientBuilder,
|
||||
ResponseErrorHandler responseErrorHandler) {
|
||||
|
||||
Consumer<HttpHeaders> jsonContentHeaders = h -> {
|
||||
if (!(apiKey instanceof NoopApiKey)) {
|
||||
h.set("xi-api-key", apiKey.getValue());
|
||||
}
|
||||
h.addAll(headers);
|
||||
h.setContentType(MediaType.APPLICATION_JSON);
|
||||
};
|
||||
|
||||
this.restClient = restClientBuilder.baseUrl(baseUrl)
|
||||
.defaultHeaders(jsonContentHeaders)
|
||||
.defaultStatusHandler(responseErrorHandler)
|
||||
.build();
|
||||
|
||||
this.webClient = webClientBuilder.baseUrl(baseUrl).defaultHeaders(jsonContentHeaders).build();
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert text to speech using the specified voice and parameters.
|
||||
* @param requestBody The request body containing text, model, and voice settings.
|
||||
* @param voiceId The ID of the voice to use. Must not be null.
|
||||
* @param queryParameters Additional query parameters for the API call.
|
||||
* @return A ResponseEntity containing the generated audio as a byte array.
|
||||
*/
|
||||
public ResponseEntity<byte[]> textToSpeech(SpeechRequest requestBody, String voiceId,
|
||||
MultiValueMap<String, String> queryParameters) {
|
||||
|
||||
Assert.notNull(voiceId, "voiceId must be provided. It cannot be null.");
|
||||
Assert.notNull(requestBody, "requestBody can not be null.");
|
||||
Assert.hasText(requestBody.text(), "requestBody.text must be provided. It cannot be null or empty.");
|
||||
|
||||
UriComponentsBuilder uriBuilder = UriComponentsBuilder.fromPath("/v1/text-to-speech/{voice_id}")
|
||||
.queryParams(queryParameters);
|
||||
|
||||
return this.restClient.post()
|
||||
.uri(uriBuilder.buildAndExpand(voiceId).toUriString())
|
||||
.body(requestBody)
|
||||
.retrieve()
|
||||
.toEntity(byte[].class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Convert text to speech using the specified voice and parameters, streaming the
|
||||
* results.
|
||||
* @param requestBody The request body containing text, model, and voice settings.
|
||||
* @param voiceId The ID of the voice to use. Must not be null.
|
||||
* @param queryParameters Additional query parameters for the API call.
|
||||
* @return A Flux of ResponseEntity containing the generated audio chunks as byte
|
||||
* arrays.
|
||||
*/
|
||||
public Flux<ResponseEntity<byte[]>> textToSpeechStream(SpeechRequest requestBody, String voiceId,
|
||||
MultiValueMap<String, String> queryParameters) {
|
||||
Assert.notNull(voiceId, "voiceId must be provided for streaming. It cannot be null.");
|
||||
Assert.notNull(requestBody, "requestBody can not be null.");
|
||||
Assert.hasText(requestBody.text(), "requestBody.text must be provided. It cannot be null or empty.");
|
||||
|
||||
UriComponentsBuilder uriBuilder = UriComponentsBuilder.fromPath("/v1/text-to-speech/{voice_id}/stream")
|
||||
.queryParams(queryParameters);
|
||||
|
||||
return this.webClient.post()
|
||||
.uri(uriBuilder.buildAndExpand(voiceId).toUriString())
|
||||
.body(Mono.just(requestBody), SpeechRequest.class)
|
||||
.accept(MediaType.APPLICATION_OCTET_STREAM)
|
||||
.exchangeToFlux(clientResponse -> {
|
||||
HttpHeaders headers = clientResponse.headers().asHttpHeaders();
|
||||
return clientResponse.bodyToFlux(byte[].class)
|
||||
.map(bytes -> ResponseEntity.ok().headers(headers).body(bytes));
|
||||
});
|
||||
}
|
||||
|
||||
/**
|
||||
* The output format of the generated audio.
|
||||
*/
|
||||
public enum OutputFormat {
|
||||
|
||||
MP3_22050_32("mp3_22050_32"), MP3_44100_32("mp3_44100_32"), MP3_44100_64("mp3_44100_64"),
|
||||
MP3_44100_96("mp3_44100_96"), MP3_44100_128("mp3_44100_128"), MP3_44100_192("mp3_44100_192"),
|
||||
PCM_8000("pcm_8000"), PCM_16000("pcm_16000"), PCM_22050("pcm_22050"), PCM_24000("pcm_24000"),
|
||||
PCM_44100("pcm_44100"), PCM_48000("pcm_48000"), ULAW_8000("ulaw_8000"), ALAW_8000("alaw_8000"),
|
||||
OPUS_48000_32("opus_48000_32"), OPUS_48000_64("opus_48000_64"), OPUS_48000_96("opus_48000_96"),
|
||||
OPUS_48000_128("opus_48000_128"), OPUS_48000_192("opus_48000_192");
|
||||
|
||||
private final String value;
|
||||
|
||||
OutputFormat(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
public String getValue() {
|
||||
return this.value;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a request to the ElevenLabs Text-to-Speech API.
|
||||
*/
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record SpeechRequest(@JsonProperty("text") String text, @JsonProperty("model_id") String modelId,
|
||||
@JsonProperty("language_code") String languageCode,
|
||||
@JsonProperty("voice_settings") VoiceSettings voiceSettings,
|
||||
@JsonProperty("pronunciation_dictionary_locators") List<PronunciationDictionaryLocator> pronunciationDictionaryLocators,
|
||||
@JsonProperty("seed") Integer seed, @JsonProperty("previous_text") String previousText,
|
||||
@JsonProperty("next_text") String nextText,
|
||||
@JsonProperty("previous_request_ids") List<String> previousRequestIds,
|
||||
@JsonProperty("next_request_ids") List<String> nextRequestIds,
|
||||
@JsonProperty("apply_text_normalization") TextNormalizationMode applyTextNormalization,
|
||||
@JsonProperty("apply_language_text_normalization") Boolean applyLanguageTextNormalization) {
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
/**
|
||||
* Text normalization mode.
|
||||
*/
|
||||
public enum TextNormalizationMode {
|
||||
|
||||
@JsonProperty("auto")
|
||||
AUTO("auto"), @JsonProperty("on")
|
||||
ON("on"), @JsonProperty("off")
|
||||
OFF("off");
|
||||
|
||||
public final String value;
|
||||
|
||||
TextNormalizationMode(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@JsonValue
|
||||
public String getValue() {
|
||||
return this.value;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Voice settings to override defaults for the given voice.
|
||||
*/
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record VoiceSettings(@JsonProperty("stability") Double stability,
|
||||
@JsonProperty("similarity_boost") Double similarityBoost, @JsonProperty("style") Double style,
|
||||
@JsonProperty("use_speaker_boost") Boolean useSpeakerBoost, @JsonProperty("speed") Double speed) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Locator for a pronunciation dictionary.
|
||||
*/
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record PronunciationDictionaryLocator(
|
||||
@JsonProperty("pronunciation_dictionary_id") String pronunciationDictionaryId,
|
||||
@JsonProperty("version_id") String versionId) {
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private String text;
|
||||
|
||||
private String modelId;
|
||||
|
||||
private String languageCode;
|
||||
|
||||
private VoiceSettings voiceSettings;
|
||||
|
||||
private List<PronunciationDictionaryLocator> pronunciationDictionaryLocators;
|
||||
|
||||
private Integer seed;
|
||||
|
||||
private String previousText;
|
||||
|
||||
private String nextText;
|
||||
|
||||
private List<String> previousRequestIds;
|
||||
|
||||
private List<String> nextRequestIds;
|
||||
|
||||
private TextNormalizationMode applyTextNormalization;
|
||||
|
||||
private Boolean applyLanguageTextNormalization = false;
|
||||
|
||||
public Builder text(String text) {
|
||||
this.text = text;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder modelId(String modelId) {
|
||||
this.modelId = modelId;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder languageCode(String languageCode) {
|
||||
this.languageCode = languageCode;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder voiceSettings(VoiceSettings voiceSettings) {
|
||||
this.voiceSettings = voiceSettings;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder pronunciationDictionaryLocators(
|
||||
List<PronunciationDictionaryLocator> pronunciationDictionaryLocators) {
|
||||
this.pronunciationDictionaryLocators = pronunciationDictionaryLocators;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder seed(Integer seed) {
|
||||
this.seed = seed;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder previousText(String previousText) {
|
||||
this.previousText = previousText;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder nextText(String nextText) {
|
||||
this.nextText = nextText;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder previousRequestIds(List<String> previousRequestIds) {
|
||||
this.previousRequestIds = previousRequestIds;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder nextRequestIds(List<String> nextRequestIds) {
|
||||
this.nextRequestIds = nextRequestIds;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder applyTextNormalization(TextNormalizationMode applyTextNormalization) {
|
||||
this.applyTextNormalization = applyTextNormalization;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder applyLanguageTextNormalization(Boolean applyLanguageTextNormalization) {
|
||||
this.applyLanguageTextNormalization = applyLanguageTextNormalization;
|
||||
return this;
|
||||
}
|
||||
|
||||
public SpeechRequest build() {
|
||||
Assert.hasText(text, "text must not be empty");
|
||||
return new SpeechRequest(text, modelId, languageCode, voiceSettings, pronunciationDictionaryLocators,
|
||||
seed, previousText, nextText, previousRequestIds, nextRequestIds, applyTextNormalization,
|
||||
applyLanguageTextNormalization);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Builder to construct {@link ElevenLabsApi} instance.
|
||||
*/
|
||||
public static class Builder {
|
||||
|
||||
private String baseUrl = DEFAULT_BASE_URL;
|
||||
|
||||
private ApiKey apiKey;
|
||||
|
||||
private MultiValueMap<String, String> headers = new LinkedMultiValueMap<>();
|
||||
|
||||
private RestClient.Builder restClientBuilder = RestClient.builder();
|
||||
|
||||
private WebClient.Builder webClientBuilder = WebClient.builder();
|
||||
|
||||
private ResponseErrorHandler responseErrorHandler = RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER;
|
||||
|
||||
public Builder baseUrl(String baseUrl) {
|
||||
Assert.hasText(baseUrl, "baseUrl cannot be null or empty");
|
||||
this.baseUrl = baseUrl;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder apiKey(ApiKey apiKey) {
|
||||
Assert.notNull(apiKey, "apiKey cannot be null");
|
||||
this.apiKey = apiKey;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder apiKey(String simpleApiKey) {
|
||||
Assert.notNull(simpleApiKey, "simpleApiKey cannot be null");
|
||||
this.apiKey = new SimpleApiKey(simpleApiKey);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder headers(MultiValueMap<String, String> headers) {
|
||||
Assert.notNull(headers, "headers cannot be null");
|
||||
this.headers = headers;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder restClientBuilder(RestClient.Builder restClientBuilder) {
|
||||
Assert.notNull(restClientBuilder, "restClientBuilder cannot be null");
|
||||
this.restClientBuilder = restClientBuilder;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder webClientBuilder(WebClient.Builder webClientBuilder) {
|
||||
Assert.notNull(webClientBuilder, "webClientBuilder cannot be null");
|
||||
this.webClientBuilder = webClientBuilder;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler) {
|
||||
Assert.notNull(responseErrorHandler, "responseErrorHandler cannot be null");
|
||||
this.responseErrorHandler = responseErrorHandler;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ElevenLabsApi build() {
|
||||
Assert.notNull(this.apiKey, "apiKey must be set");
|
||||
return new ElevenLabsApi(this.baseUrl, this.apiKey, this.headers, this.restClientBuilder,
|
||||
this.webClientBuilder, this.responseErrorHandler);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,452 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.elevenlabs.api;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.function.Consumer;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
import com.fasterxml.jackson.annotation.JsonProperty;
|
||||
import com.fasterxml.jackson.annotation.JsonValue;
|
||||
|
||||
import org.springframework.ai.model.ApiKey;
|
||||
import org.springframework.ai.model.NoopApiKey;
|
||||
import org.springframework.ai.model.SimpleApiKey;
|
||||
import org.springframework.ai.retry.RetryUtils;
|
||||
import org.springframework.http.HttpHeaders;
|
||||
import org.springframework.http.MediaType;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.util.Assert;
|
||||
import org.springframework.util.LinkedMultiValueMap;
|
||||
import org.springframework.util.MultiValueMap;
|
||||
import org.springframework.web.client.ResponseErrorHandler;
|
||||
import org.springframework.web.client.RestClient;
|
||||
|
||||
/**
|
||||
* Client for the ElevenLabs Voices API.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public class ElevenLabsVoicesApi {
|
||||
|
||||
private static final String DEFAULT_BASE_URL = "https://api.elevenlabs.io";
|
||||
|
||||
private final RestClient restClient;
|
||||
|
||||
/**
|
||||
* Create a new ElevenLabs Voices API client.
|
||||
* @param baseUrl The base URL for the ElevenLabs API.
|
||||
* @param apiKey Your ElevenLabs API key.
|
||||
* @param headers the http headers to use.
|
||||
* @param restClientBuilder A builder for the Spring RestClient.
|
||||
* @param responseErrorHandler A custom error handler for API responses.
|
||||
*/
|
||||
public ElevenLabsVoicesApi(String baseUrl, ApiKey apiKey, MultiValueMap<String, String> headers,
|
||||
RestClient.Builder restClientBuilder, ResponseErrorHandler responseErrorHandler) {
|
||||
Consumer<HttpHeaders> jsonContentHeaders = h -> {
|
||||
if (!(apiKey instanceof NoopApiKey)) {
|
||||
h.set("xi-api-key", apiKey.getValue());
|
||||
}
|
||||
h.addAll(headers);
|
||||
h.setContentType(MediaType.APPLICATION_JSON);
|
||||
};
|
||||
|
||||
this.restClient = restClientBuilder.baseUrl(baseUrl)
|
||||
.defaultHeaders(jsonContentHeaders)
|
||||
.defaultStatusHandler(responseErrorHandler)
|
||||
.build();
|
||||
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
/**
|
||||
* Retrieves a list of all available voices from the ElevenLabs API.
|
||||
* @return A ResponseEntity containing a Voices object, which contains the list of
|
||||
* voices.
|
||||
*/
|
||||
public ResponseEntity<Voices> getVoices() {
|
||||
return this.restClient.get().uri("/v1/voices").retrieve().toEntity(Voices.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Gets the default settings for voices. "similarity_boost" corresponds to ”Clarity +
|
||||
* Similarity Enhancement” in the web app and "stability" corresponds to "Stability"
|
||||
* slider in the web app.
|
||||
* @return {@link ResponseEntity} containing the {@link VoiceSettings} record.
|
||||
*/
|
||||
public ResponseEntity<VoiceSettings> getDefaultVoiceSettings() {
|
||||
return this.restClient.get().uri("/v1/voices/settings/default").retrieve().toEntity(VoiceSettings.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the settings for a specific voice. "similarity_boost" corresponds to
|
||||
* "Clarity + Similarity Enhancement" in the web app and "stability" corresponds to
|
||||
* the "Stability" slider in the web app.
|
||||
* @param voiceId The ID of the voice to get settings for. Required.
|
||||
* @return {@link ResponseEntity} containing the {@link VoiceSettings} record.
|
||||
*/
|
||||
public ResponseEntity<VoiceSettings> getVoiceSettings(String voiceId) {
|
||||
Assert.hasText(voiceId, "voiceId cannot be null or empty");
|
||||
return this.restClient.get()
|
||||
.uri("/v1/voices/{voiceId}/settings", voiceId)
|
||||
.retrieve()
|
||||
.toEntity(VoiceSettings.class);
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns metadata about a specific voice.
|
||||
* @param voiceId ID of the voice to be used. You can use the Get voices endpoint list
|
||||
* all the available voices. Required.
|
||||
* @return {@link ResponseEntity} containing the {@link Voice} record.
|
||||
*/
|
||||
public ResponseEntity<Voice> getVoice(String voiceId) {
|
||||
Assert.hasText(voiceId, "voiceId cannot be null or empty");
|
||||
return this.restClient.get().uri("/v1/voices/{voiceId}", voiceId).retrieve().toEntity(Voice.class);
|
||||
}
|
||||
|
||||
public enum CategoryEnum {
|
||||
|
||||
@JsonProperty("generated")
|
||||
GENERATED("generated"), @JsonProperty("cloned")
|
||||
CLONED("cloned"), @JsonProperty("premade")
|
||||
PREMADE("premade"), @JsonProperty("professional")
|
||||
PROFESSIONAL("professional"), @JsonProperty("famous")
|
||||
FAMOUS("famous"), @JsonProperty("high_quality")
|
||||
HIGH_QUALITY("high_quality");
|
||||
|
||||
public final String value;
|
||||
|
||||
CategoryEnum(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@JsonValue
|
||||
public String getValue() {
|
||||
return this.value;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public enum SafetyControlEnum {
|
||||
|
||||
@JsonProperty("NONE")
|
||||
NONE("NONE"), @JsonProperty("BAN")
|
||||
BAN("BAN"), @JsonProperty("CAPTCHA")
|
||||
CAPTCHA("CAPTCHA"), @JsonProperty("CAPTCHA_AND_MODERATION")
|
||||
CAPTCHA_AND_MODERATION("CAPTCHA_AND_MODERATION"), @JsonProperty("ENTERPRISE_BAN")
|
||||
ENTERPRISE_BAN("ENTERPRISE_BAN"), @JsonProperty("ENTERPRISE_CAPTCHA")
|
||||
ENTERPRISE_CAPTCHA("ENTERPRISE_CAPTCHA");
|
||||
|
||||
public final String value;
|
||||
|
||||
SafetyControlEnum(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@JsonValue
|
||||
public String getValue() {
|
||||
return this.value;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents the response from the /v1/voices endpoint.
|
||||
*
|
||||
* @param voices A list of Voice objects representing the available voices.
|
||||
*/
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record Voices(@JsonProperty("voices") List<Voice> voices) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a single voice from the ElevenLabs API.
|
||||
*/
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record Voice(@JsonProperty("voice_id") String voiceId, @JsonProperty("name") String name,
|
||||
@JsonProperty("samples") List<Sample> samples, @JsonProperty("category") CategoryEnum category,
|
||||
@JsonProperty("fine_tuning") FineTuning fineTuning, @JsonProperty("labels") Map<String, String> labels,
|
||||
@JsonProperty("description") String description, @JsonProperty("preview_url") String previewUrl,
|
||||
@JsonProperty("available_for_tiers") List<String> availableForTiers,
|
||||
@JsonProperty("settings") VoiceSettings settings, @JsonProperty("sharing") VoiceSharing sharing,
|
||||
@JsonProperty("high_quality_base_model_ids") List<String> highQualityBaseModelIds,
|
||||
@JsonProperty("verified_languages") List<VerifiedVoiceLanguage> verifiedLanguages,
|
||||
@JsonProperty("safety_control") SafetyControlEnum safetyControl,
|
||||
@JsonProperty("voice_verification") VoiceVerification voiceVerification,
|
||||
@JsonProperty("permission_on_resource") String permissionOnResource,
|
||||
@JsonProperty("is_owner") Boolean isOwner, @JsonProperty("is_legacy") Boolean isLegacy,
|
||||
@JsonProperty("is_mixed") Boolean isMixed, @JsonProperty("created_at_unix") Integer createdAtUnix) {
|
||||
}
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record Sample(@JsonProperty("sample_id") String sampleId, @JsonProperty("file_name") String fileName,
|
||||
@JsonProperty("mime_type") String mimeType, @JsonProperty("size_bytes") Integer sizeBytes,
|
||||
@JsonProperty("hash") String hash) {
|
||||
}
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record FineTuning(@JsonProperty("is_allowed_to_fine_tune") Boolean isAllowedToFineTune,
|
||||
@JsonProperty("state") Map<String, String> state,
|
||||
@JsonProperty("verification_failures") List<String> verificationFailures,
|
||||
@JsonProperty("verification_attempts_count") Integer verificationAttemptsCount,
|
||||
@JsonProperty("manual_verification_requested") Boolean manualVerificationRequested,
|
||||
@JsonProperty("language") String language, @JsonProperty("progress") Map<String, Double> progress,
|
||||
@JsonProperty("message") Map<String, String> message,
|
||||
@JsonProperty("dataset_duration_seconds") Double datasetDurationSeconds,
|
||||
@JsonProperty("verification_attempts") List<VerificationAttempt> verificationAttempts,
|
||||
@JsonProperty("slice_ids") List<String> sliceIds,
|
||||
@JsonProperty("manual_verification") ManualVerification manualVerification,
|
||||
@JsonProperty("max_verification_attempts") Integer maxVerificationAttempts,
|
||||
@JsonProperty("next_max_verification_attempts_reset_unix_ms") Long nextMaxVerificationAttemptsResetUnixMs) {
|
||||
}
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record VoiceVerification(@JsonProperty("requires_verification") Boolean requiresVerification,
|
||||
@JsonProperty("is_verified") Boolean isVerified,
|
||||
@JsonProperty("verification_failures") List<String> verificationFailures,
|
||||
@JsonProperty("verification_attempts_count") Integer verificationAttemptsCount,
|
||||
@JsonProperty("language") String language,
|
||||
@JsonProperty("verification_attempts") List<VerificationAttempt> verificationAttempts) {
|
||||
}
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record VerificationAttempt(@JsonProperty("text") String text, @JsonProperty("date_unix") Integer dateUnix,
|
||||
@JsonProperty("accepted") Boolean accepted, @JsonProperty("similarity") Double similarity,
|
||||
@JsonProperty("levenshtein_distance") Double levenshteinDistance,
|
||||
@JsonProperty("recording") Recording recording) {
|
||||
}
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record Recording(@JsonProperty("recording_id") String recordingId,
|
||||
@JsonProperty("mime_type") String mimeType, @JsonProperty("size_bytes") Integer sizeBytes,
|
||||
@JsonProperty("upload_date_unix") Integer uploadDateUnix,
|
||||
@JsonProperty("transcription") String transcription) {
|
||||
}
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record ManualVerification(@JsonProperty("extra_text") String extraText,
|
||||
@JsonProperty("request_time_unix") Integer requestTimeUnix,
|
||||
@JsonProperty("files") List<ManualVerificationFile> files) {
|
||||
}
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record ManualVerificationFile(@JsonProperty("file_id") String fileId,
|
||||
@JsonProperty("file_name") String fileName, @JsonProperty("mime_type") String mimeType,
|
||||
@JsonProperty("size_bytes") Integer sizeBytes, @JsonProperty("upload_date_unix") Integer uploadDateUnix) {
|
||||
}
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record VoiceSettings(@JsonProperty("stability") Double stability,
|
||||
@JsonProperty("similarity_boost") Double similarityBoost, @JsonProperty("style") Double style,
|
||||
@JsonProperty("use_speaker_boost") Boolean useSpeakerBoost, @JsonProperty("speed") Double speed) {
|
||||
}
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record VoiceSharing(@JsonProperty("status") StatusEnum status,
|
||||
@JsonProperty("history_item_sample_id") String historyItemSampleId,
|
||||
@JsonProperty("date_unix") Integer dateUnix,
|
||||
@JsonProperty("whitelisted_emails") List<String> whitelistedEmails,
|
||||
@JsonProperty("public_owner_id") String publicOwnerId,
|
||||
@JsonProperty("original_voice_id") String originalVoiceId,
|
||||
@JsonProperty("financial_rewards_enabled") Boolean financialRewardsEnabled,
|
||||
@JsonProperty("free_users_allowed") Boolean freeUsersAllowed,
|
||||
@JsonProperty("live_moderation_enabled") Boolean liveModerationEnabled, @JsonProperty("rate") Double rate,
|
||||
@JsonProperty("notice_period") Integer noticePeriod, @JsonProperty("disable_at_unix") Integer disableAtUnix,
|
||||
@JsonProperty("voice_mixing_allowed") Boolean voiceMixingAllowed,
|
||||
@JsonProperty("featured") Boolean featured, @JsonProperty("category") CategoryEnum category,
|
||||
@JsonProperty("reader_app_enabled") Boolean readerAppEnabled, @JsonProperty("image_url") String imageUrl,
|
||||
@JsonProperty("ban_reason") String banReason, @JsonProperty("liked_by_count") Integer likedByCount,
|
||||
@JsonProperty("cloned_by_count") Integer clonedByCount, @JsonProperty("name") String name,
|
||||
@JsonProperty("description") String description, @JsonProperty("labels") Map<String, String> labels,
|
||||
@JsonProperty("review_status") ReviewStatusEnum reviewStatus,
|
||||
@JsonProperty("review_message") String reviewMessage,
|
||||
@JsonProperty("enabled_in_library") Boolean enabledInLibrary,
|
||||
@JsonProperty("instagram_username") String instagramUsername,
|
||||
@JsonProperty("twitter_username") String twitterUsername,
|
||||
@JsonProperty("youtube_username") String youtubeUsername,
|
||||
@JsonProperty("tiktok_username") String tiktokUsername,
|
||||
@JsonProperty("moderation_check") VoiceSharingModerationCheck moderationCheck,
|
||||
@JsonProperty("reader_restricted_on") List<ReaderResource> readerRestrictedOn) {
|
||||
public enum StatusEnum {
|
||||
|
||||
@JsonProperty("enabled")
|
||||
ENABLED("enabled"), @JsonProperty("disabled")
|
||||
DISABLED("disabled"), @JsonProperty("copied")
|
||||
COPIED("copied"), @JsonProperty("copied_disabled")
|
||||
COPIED_DISABLED("copied_disabled");
|
||||
|
||||
public final String value;
|
||||
|
||||
StatusEnum(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@JsonValue
|
||||
public String getValue() {
|
||||
return this.value;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public enum CategoryEnum {
|
||||
|
||||
@JsonProperty("generated")
|
||||
GENERATED("generated"), @JsonProperty("professional")
|
||||
PROFESSIONAL("professional"), @JsonProperty("high_quality")
|
||||
HIGH_QUALITY("high_quality"), @JsonProperty("famous")
|
||||
FAMOUS("famous");
|
||||
|
||||
public final String value;
|
||||
|
||||
CategoryEnum(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@JsonValue
|
||||
public String getValue() {
|
||||
return this.value;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
public enum ReviewStatusEnum {
|
||||
|
||||
@JsonProperty("not_requested")
|
||||
NOT_REQUESTED("not_requested"), @JsonProperty("pending")
|
||||
PENDING("pending"), @JsonProperty("declined")
|
||||
DECLINED("declined"), @JsonProperty("allowed")
|
||||
ALLOWED("allowed"), @JsonProperty("allowed_with_changes")
|
||||
ALLOWED_WITH_CHANGES("allowed_with_changes");
|
||||
|
||||
public final String value;
|
||||
|
||||
ReviewStatusEnum(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@JsonValue
|
||||
public String getValue() {
|
||||
return this.value;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record VoiceSharingModerationCheck(@JsonProperty("date_checked_unix") Integer dateCheckedUnix,
|
||||
@JsonProperty("name_value") String nameValue, @JsonProperty("name_check") Boolean nameCheck,
|
||||
@JsonProperty("description_value") String descriptionValue,
|
||||
@JsonProperty("description_check") Boolean descriptionCheck,
|
||||
@JsonProperty("sample_ids") List<String> sampleIds,
|
||||
@JsonProperty("sample_checks") List<Double> sampleChecks,
|
||||
@JsonProperty("captcha_ids") List<String> captchaIds,
|
||||
@JsonProperty("captcha_checks") List<Double> captchaChecks) {
|
||||
}
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record ReaderResource(@JsonProperty("resource_type") ResourceTypeEnum resourceType,
|
||||
@JsonProperty("resource_id") String resourceId) {
|
||||
|
||||
public enum ResourceTypeEnum {
|
||||
|
||||
@JsonProperty("read")
|
||||
READ("read"), @JsonProperty("collection")
|
||||
COLLECTION("collection");
|
||||
|
||||
public final String value;
|
||||
|
||||
ResourceTypeEnum(String value) {
|
||||
this.value = value;
|
||||
}
|
||||
|
||||
@JsonValue
|
||||
public String getValue() {
|
||||
return this.value;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public record VerifiedVoiceLanguage(@JsonProperty("language") String language,
|
||||
@JsonProperty("model_id") String modelId, @JsonProperty("accent") String accent) {
|
||||
}
|
||||
|
||||
/**
|
||||
* Builder to construct {@link ElevenLabsVoicesApi} instance.
|
||||
*/
|
||||
public static class Builder {
|
||||
|
||||
private String baseUrl = DEFAULT_BASE_URL;
|
||||
|
||||
private ApiKey apiKey;
|
||||
|
||||
private MultiValueMap<String, String> headers = new LinkedMultiValueMap<>();
|
||||
|
||||
private RestClient.Builder restClientBuilder = RestClient.builder();
|
||||
|
||||
private ResponseErrorHandler responseErrorHandler = RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER;
|
||||
|
||||
public Builder baseUrl(String baseUrl) {
|
||||
Assert.hasText(baseUrl, "baseUrl cannot be null or empty");
|
||||
this.baseUrl = baseUrl;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder apiKey(ApiKey apiKey) {
|
||||
Assert.notNull(apiKey, "apiKey cannot be null");
|
||||
this.apiKey = apiKey;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder apiKey(String simpleApiKey) {
|
||||
Assert.notNull(simpleApiKey, "simpleApiKey cannot be null");
|
||||
this.apiKey = new SimpleApiKey(simpleApiKey);
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder headers(MultiValueMap<String, String> headers) {
|
||||
Assert.notNull(headers, "headers cannot be null");
|
||||
this.headers = headers;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder restClientBuilder(RestClient.Builder restClientBuilder) {
|
||||
Assert.notNull(restClientBuilder, "restClientBuilder cannot be null");
|
||||
this.restClientBuilder = restClientBuilder;
|
||||
return this;
|
||||
}
|
||||
|
||||
public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler) {
|
||||
Assert.notNull(responseErrorHandler, "responseErrorHandler cannot be null");
|
||||
this.responseErrorHandler = responseErrorHandler;
|
||||
return this;
|
||||
}
|
||||
|
||||
public ElevenLabsVoicesApi build() {
|
||||
Assert.notNull(this.apiKey, "apiKey must be set");
|
||||
return new ElevenLabsVoicesApi(this.baseUrl, this.apiKey, this.headers, this.restClientBuilder,
|
||||
this.responseErrorHandler);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,2 @@
|
||||
org.springframework.aot.hint.RuntimeHintsRegistrar=\
|
||||
org.springframework.ai.elevenlabs.aot.ElevenLabsRuntimeHints
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.elevenlabs;
|
||||
|
||||
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
|
||||
import org.springframework.ai.elevenlabs.api.ElevenLabsVoicesApi;
|
||||
import org.springframework.ai.model.SimpleApiKey;
|
||||
import org.springframework.boot.SpringBootConfiguration;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
||||
/**
|
||||
* Configuration class for the ElevenLabs API.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
@SpringBootConfiguration
|
||||
public class ElevenLabsTestConfiguration {
|
||||
|
||||
@Bean
|
||||
public ElevenLabsApi elevenLabsApi() {
|
||||
return ElevenLabsApi.builder().apiKey(getApiKey()).build();
|
||||
}
|
||||
|
||||
@Bean
|
||||
public ElevenLabsVoicesApi elevenLabsVoicesApi() {
|
||||
return ElevenLabsVoicesApi.builder().apiKey(getApiKey()).build();
|
||||
}
|
||||
|
||||
private SimpleApiKey getApiKey() {
|
||||
String apiKey = System.getenv("ELEVEN_LABS_API_KEY");
|
||||
if (!StringUtils.hasText(apiKey)) {
|
||||
throw new IllegalArgumentException(
|
||||
"You must provide an API key. Put it in an environment variable under the name ELEVEN_LABS_API_KEY");
|
||||
}
|
||||
return new SimpleApiKey(apiKey);
|
||||
}
|
||||
|
||||
@Bean
|
||||
public ElevenLabsTextToSpeechModel elevenLabsSpeechModel() {
|
||||
return ElevenLabsTextToSpeechModel.builder().elevenLabsApi(elevenLabsApi()).build();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,110 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.elevenlabs;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
|
||||
import reactor.core.publisher.Flux;
|
||||
|
||||
import org.springframework.ai.audio.tts.Speech;
|
||||
import org.springframework.ai.audio.tts.TextToSpeechPrompt;
|
||||
import org.springframework.ai.audio.tts.TextToSpeechResponse;
|
||||
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.ai.retry.NonTransientAiException;
|
||||
import org.springframework.web.client.HttpClientErrorException;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.assertThatThrownBy;
|
||||
|
||||
/**
|
||||
* Integration tests for the {@link ElevenLabsTextToSpeechModel}.
|
||||
*
|
||||
* <p>
|
||||
* These tests require a valid ElevenLabs API key to be set as an environment variable
|
||||
* named {@code ELEVEN_LABS_API_KEY}.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
@SpringBootTest(classes = ElevenLabsTestConfiguration.class)
|
||||
@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".+")
|
||||
public class ElevenLabsTextToSpeechModelIT {
|
||||
|
||||
private static final String VOICE_ID = "9BWtsMINqrJLrRacOk9x";
|
||||
|
||||
@Autowired
|
||||
private ElevenLabsTextToSpeechModel textToSpeechModel;
|
||||
|
||||
@Test
|
||||
void textToSpeechWithVoiceTest() {
|
||||
ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder().voice(VOICE_ID).build();
|
||||
TextToSpeechPrompt prompt = new TextToSpeechPrompt("Hello, world!", options);
|
||||
TextToSpeechResponse response = textToSpeechModel.call(prompt);
|
||||
|
||||
assertThat(response).isNotNull();
|
||||
List<Speech> results = response.getResults();
|
||||
assertThat(results).hasSize(1);
|
||||
Speech speech = results.get(0);
|
||||
assertThat(speech.getOutput()).isNotEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
void textToSpeechStreamWithVoiceTest() {
|
||||
ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder().voice(VOICE_ID).build();
|
||||
TextToSpeechPrompt prompt = new TextToSpeechPrompt(
|
||||
"Hello, world! This is a test of streaming speech synthesis.", options);
|
||||
Flux<TextToSpeechResponse> responseFlux = textToSpeechModel.stream(prompt);
|
||||
|
||||
List<TextToSpeechResponse> responses = responseFlux.collectList().block();
|
||||
assertThat(responses).isNotNull().isNotEmpty();
|
||||
|
||||
responses.forEach(response -> {
|
||||
assertThat(response).isNotNull();
|
||||
assertThat(response.getResults()).hasSize(1);
|
||||
assertThat(response.getResults().get(0).getOutput()).isNotEmpty();
|
||||
});
|
||||
}
|
||||
|
||||
@Test
|
||||
void invalidVoiceId() {
|
||||
ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder()
|
||||
.model("eleven_turbo_v2_5")
|
||||
.voiceId("invalid-voice-id")
|
||||
.outputFormat(ElevenLabsApi.OutputFormat.MP3_44100_128.getValue())
|
||||
.build();
|
||||
|
||||
TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Hello, this is a text-to-speech example.", options);
|
||||
|
||||
assertThatThrownBy(() -> {
|
||||
textToSpeechModel.call(speechPrompt);
|
||||
}).isInstanceOf(NonTransientAiException.class)
|
||||
.hasMessageContaining("An invalid ID has been received: 'invalid-voice-id'");
|
||||
}
|
||||
|
||||
@Test
|
||||
void emptyInputText() {
|
||||
TextToSpeechPrompt prompt = new TextToSpeechPrompt("");
|
||||
assertThatThrownBy(() -> {
|
||||
textToSpeechModel.call(prompt);
|
||||
}).isInstanceOf(IllegalArgumentException.class)
|
||||
.hasMessageContaining("A voiceId must be specified in the ElevenLabsSpeechOptions.");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,232 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.elevenlabs;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* Tests for the {@link ElevenLabsTextToSpeechOptions}.
|
||||
*
|
||||
* <p>
|
||||
* These tests require a valid ElevenLabs API key to be set as an environment variable
|
||||
* named {@code ELEVEN_LABS_API_KEY}.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public class ElevenLabsTextToSpeechOptionsTests {
|
||||
|
||||
@Test
|
||||
public void testBuilderWithAllFields() {
|
||||
ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder()
|
||||
.modelId("test-model")
|
||||
.voice("test-voice")
|
||||
.voiceId("test-voice-id") // Test both voice and voiceId
|
||||
.format("mp3_44100_128")
|
||||
.outputFormat("mp3_44100_128")
|
||||
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.8, 0.9, true, 1.2))
|
||||
.languageCode("en")
|
||||
.pronunciationDictionaryLocators(
|
||||
List.of(new ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator("dict1", "v1")))
|
||||
.seed(12345)
|
||||
.previousText("previous")
|
||||
.nextText("next")
|
||||
.previousRequestIds(List.of("req1", "req2"))
|
||||
.nextRequestIds(List.of("req3", "req4"))
|
||||
.applyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON)
|
||||
.applyLanguageTextNormalization(true)
|
||||
.build();
|
||||
|
||||
assertThat(options.getModelId()).isEqualTo("test-model");
|
||||
assertThat(options.getVoice()).isEqualTo("test-voice-id");
|
||||
assertThat(options.getVoiceId()).isEqualTo("test-voice-id");
|
||||
assertThat(options.getFormat()).isEqualTo("mp3_44100_128");
|
||||
assertThat(options.getOutputFormat()).isEqualTo("mp3_44100_128");
|
||||
assertThat(options.getVoiceSettings()).isNotNull();
|
||||
assertThat(options.getVoiceSettings().stability()).isEqualTo(0.5);
|
||||
assertThat(options.getVoiceSettings().similarityBoost()).isEqualTo(0.8);
|
||||
assertThat(options.getVoiceSettings().style()).isEqualTo(0.9);
|
||||
assertThat(options.getVoiceSettings().useSpeakerBoost()).isTrue();
|
||||
assertThat(options.getSpeed()).isEqualTo(1.2); // Check via getter
|
||||
assertThat(options.getLanguageCode()).isEqualTo("en");
|
||||
assertThat(options.getPronunciationDictionaryLocators()).hasSize(1);
|
||||
assertThat(options.getPronunciationDictionaryLocators().get(0).pronunciationDictionaryId()).isEqualTo("dict1");
|
||||
assertThat(options.getPronunciationDictionaryLocators().get(0).versionId()).isEqualTo("v1");
|
||||
assertThat(options.getSeed()).isEqualTo(12345);
|
||||
assertThat(options.getPreviousText()).isEqualTo("previous");
|
||||
assertThat(options.getNextText()).isEqualTo("next");
|
||||
assertThat(options.getPreviousRequestIds()).containsExactly("req1", "req2");
|
||||
assertThat(options.getNextRequestIds()).containsExactly("req3", "req4");
|
||||
assertThat(options.getApplyTextNormalization()).isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON);
|
||||
assertThat(options.getApplyLanguageTextNormalization()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testCopy() {
|
||||
ElevenLabsTextToSpeechOptions original = ElevenLabsTextToSpeechOptions.builder()
|
||||
.modelId("test-model")
|
||||
.voice("test-voice")
|
||||
.format("mp3_44100_128")
|
||||
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.8, null, null, null))
|
||||
.build();
|
||||
|
||||
ElevenLabsTextToSpeechOptions copied = original.copy();
|
||||
|
||||
assertThat(copied).isNotSameAs(original).isEqualTo(original);
|
||||
|
||||
copied = ElevenLabsTextToSpeechOptions.builder().modelId("new-model").build();
|
||||
assertThat(original.getModelId()).isEqualTo("test-model");
|
||||
assertThat(copied.getModelId()).isEqualTo("new-model");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSetters() {
|
||||
ElevenLabsTextToSpeechOptions options = new ElevenLabsTextToSpeechOptions();
|
||||
options.setModelId("test-model");
|
||||
options.setVoice("test-voice");
|
||||
options.setVoiceId("test-voice-id");
|
||||
options.setOutputFormat("mp3_44100_128");
|
||||
options.setFormat("mp3_44100_128");
|
||||
options.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.8, null, null, null));
|
||||
options.setLanguageCode("en");
|
||||
options.setPronunciationDictionaryLocators(
|
||||
List.of(new ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator("dict1", "v1")));
|
||||
options.setSeed(12345);
|
||||
options.setPreviousText("previous");
|
||||
options.setNextText("next");
|
||||
options.setPreviousRequestIds(List.of("req1", "req2"));
|
||||
options.setNextRequestIds(List.of("req3", "req4"));
|
||||
options.setApplyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON);
|
||||
options.setApplyLanguageTextNormalization(true);
|
||||
|
||||
assertThat(options.getModelId()).isEqualTo("test-model");
|
||||
assertThat(options.getVoice()).isEqualTo("test-voice-id");
|
||||
assertThat(options.getVoiceId()).isEqualTo("test-voice-id");
|
||||
assertThat(options.getFormat()).isEqualTo("mp3_44100_128");
|
||||
assertThat(options.getOutputFormat()).isEqualTo("mp3_44100_128");
|
||||
assertThat(options.getVoiceSettings()).isNotNull();
|
||||
assertThat(options.getVoiceSettings().stability()).isEqualTo(0.5);
|
||||
assertThat(options.getVoiceSettings().similarityBoost()).isEqualTo(0.8);
|
||||
assertThat(options.getLanguageCode()).isEqualTo("en");
|
||||
assertThat(options.getPronunciationDictionaryLocators()).hasSize(1);
|
||||
assertThat(options.getPronunciationDictionaryLocators().get(0).pronunciationDictionaryId()).isEqualTo("dict1");
|
||||
assertThat(options.getPronunciationDictionaryLocators().get(0).versionId()).isEqualTo("v1");
|
||||
assertThat(options.getSeed()).isEqualTo(12345);
|
||||
assertThat(options.getPreviousText()).isEqualTo("previous");
|
||||
assertThat(options.getNextText()).isEqualTo("next");
|
||||
assertThat(options.getPreviousRequestIds()).containsExactly("req1", "req2");
|
||||
assertThat(options.getNextRequestIds()).containsExactly("req3", "req4");
|
||||
assertThat(options.getApplyTextNormalization()).isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON);
|
||||
assertThat(options.getApplyLanguageTextNormalization()).isTrue();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testDefaultValues() {
|
||||
ElevenLabsTextToSpeechOptions options = new ElevenLabsTextToSpeechOptions();
|
||||
assertThat(options.getModelId()).isNull();
|
||||
assertThat(options.getVoice()).isNull();
|
||||
assertThat(options.getVoiceId()).isNull();
|
||||
assertThat(options.getFormat()).isNull();
|
||||
assertThat(options.getOutputFormat()).isNull();
|
||||
assertThat(options.getSpeed()).isNull();
|
||||
assertThat(options.getVoiceSettings()).isNull();
|
||||
assertThat(options.getLanguageCode()).isNull();
|
||||
assertThat(options.getPronunciationDictionaryLocators()).isNull();
|
||||
assertThat(options.getSeed()).isNull();
|
||||
assertThat(options.getPreviousText()).isNull();
|
||||
assertThat(options.getNextText()).isNull();
|
||||
assertThat(options.getPreviousRequestIds()).isNull();
|
||||
assertThat(options.getNextRequestIds()).isNull();
|
||||
assertThat(options.getApplyTextNormalization()).isNull();
|
||||
assertThat(options.getApplyLanguageTextNormalization()).isNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testSetSpeed() {
|
||||
// 1. Setting speed via voiceSettings, no existing voiceSettings
|
||||
ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder()
|
||||
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(null, null, null, null, 1.5))
|
||||
.build();
|
||||
assertThat(options.getSpeed()).isEqualTo(1.5);
|
||||
assertThat(options.getVoiceSettings()).isNotNull();
|
||||
assertThat(options.getVoiceSettings().speed()).isEqualTo(1.5);
|
||||
|
||||
// 2. Setting speed via voiceSettings, existing voiceSettings
|
||||
ElevenLabsTextToSpeechOptions options2 = ElevenLabsTextToSpeechOptions.builder()
|
||||
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, null))
|
||||
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 2.0)) // Overwrite
|
||||
.build();
|
||||
assertThat(options2.getSpeed()).isEqualTo(2.0f);
|
||||
assertThat(options2.getVoiceSettings().speed()).isEqualTo(2.0f);
|
||||
assertThat(options2.getVoiceSettings().stability()).isEqualTo(0.1);
|
||||
|
||||
// 3. Setting voiceSettings with null speed, existing voiceSettings
|
||||
ElevenLabsTextToSpeechOptions options3 = ElevenLabsTextToSpeechOptions.builder()
|
||||
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 2.0))
|
||||
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, null)) // Overwrite
|
||||
.build();
|
||||
assertThat(options3.getSpeed()).isNull();
|
||||
assertThat(options3.getVoiceSettings().speed()).isNull();
|
||||
assertThat(options3.getVoiceSettings().stability()).isEqualTo(0.1);
|
||||
|
||||
// 4. Setting voiceSettings to null, no existing voiceSettings (shouldn't create
|
||||
// voiceSettings)
|
||||
ElevenLabsTextToSpeechOptions options4 = ElevenLabsTextToSpeechOptions.builder().build();
|
||||
assertThat(options4.getSpeed()).isNull();
|
||||
assertThat(options4.getVoiceSettings()).isNull();
|
||||
|
||||
// 5. Setting voiceSettings directly, with speed.
|
||||
ElevenLabsTextToSpeechOptions options5 = ElevenLabsTextToSpeechOptions.builder()
|
||||
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 2.5))
|
||||
.build();
|
||||
assertThat(options5.getSpeed()).isEqualTo(2.5f);
|
||||
assertThat(options5.getVoiceSettings().speed()).isEqualTo(2.5f);
|
||||
|
||||
// 6. Setting voiceSettings directly, without speed (speed should be null).
|
||||
ElevenLabsTextToSpeechOptions options6 = ElevenLabsTextToSpeechOptions.builder()
|
||||
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, null))
|
||||
.build();
|
||||
assertThat(options6.getSpeed()).isNull();
|
||||
assertThat(options6.getVoiceSettings().speed()).isNull();
|
||||
|
||||
// 7. Setting voiceSettings to null, after previously setting it.
|
||||
ElevenLabsTextToSpeechOptions options7 = ElevenLabsTextToSpeechOptions.builder()
|
||||
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 1.5))
|
||||
.voiceSettings(null)
|
||||
.build();
|
||||
assertThat(options7.getSpeed()).isNull();
|
||||
assertThat(options7.getVoiceSettings()).isNull();
|
||||
|
||||
// 8. Setting speed via setSpeed method
|
||||
ElevenLabsTextToSpeechOptions options8 = ElevenLabsTextToSpeechOptions.builder().build();
|
||||
options8.setSpeed(3.0);
|
||||
assertThat(options8.getSpeed()).isEqualTo(3.0);
|
||||
assertThat(options8.getVoiceSettings()).isNotNull();
|
||||
assertThat(options8.getVoiceSettings().speed()).isEqualTo(3.0);
|
||||
|
||||
// 9. Setting speed to null via setSpeed method
|
||||
options8.setSpeed(null);
|
||||
assertThat(options8.getSpeed()).isNull();
|
||||
assertThat(options8.getVoiceSettings().speed()).isNull();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,224 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.elevenlabs.api;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.concurrent.atomic.AtomicInteger;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
|
||||
import reactor.core.publisher.Flux;
|
||||
import reactor.test.StepVerifier;
|
||||
|
||||
import org.springframework.ai.elevenlabs.ElevenLabsTestConfiguration;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
import org.springframework.util.LinkedMultiValueMap;
|
||||
import org.springframework.util.MultiValueMap;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.junit.jupiter.api.Assertions.assertThrows;
|
||||
|
||||
/**
|
||||
* Integration tests for the {@link ElevenLabsApi}.
|
||||
*
|
||||
* <p>
|
||||
* These tests require a valid ElevenLabs API key to be set as an environment variable
|
||||
* named {@code ELEVEN_LABS_API_KEY}.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
@SpringBootTest(classes = ElevenLabsTestConfiguration.class)
|
||||
@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".+")
|
||||
public class ElevenLabsApiIT {
|
||||
|
||||
@Autowired
|
||||
private ElevenLabsApi elevenLabsApi;
|
||||
|
||||
@Test
|
||||
public void testTextToSpeech() throws IOException {
|
||||
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
|
||||
.text("Hello, world!")
|
||||
.modelId("eleven_turbo_v2_5")
|
||||
.build();
|
||||
|
||||
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
|
||||
ResponseEntity<byte[]> response = elevenLabsApi.textToSpeech(request, validVoiceId, null);
|
||||
|
||||
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
|
||||
assertThat(response.getBody()).isNotNull().isNotEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTextToSpeechWithVoiceSettings() {
|
||||
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
|
||||
.text("Hello, with Voice settings!")
|
||||
.modelId("eleven_turbo_v2_5")
|
||||
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.7, 0.0, true, 1.0))
|
||||
.build();
|
||||
|
||||
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
|
||||
ResponseEntity<byte[]> response = elevenLabsApi.textToSpeech(request, validVoiceId, null);
|
||||
|
||||
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
|
||||
assertThat(response.getBody()).isNotNull().isNotEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTextToSpeechWithQueryParams() {
|
||||
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
|
||||
.text("Hello, testing query params!")
|
||||
.modelId("eleven_turbo_v2_5")
|
||||
.build();
|
||||
|
||||
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
|
||||
MultiValueMap<String, String> queryParams = new LinkedMultiValueMap<>();
|
||||
queryParams.add("optimize_streaming_latency", "2");
|
||||
queryParams.add("enable_logging", "true");
|
||||
queryParams.add("output_format", ElevenLabsApi.OutputFormat.MP3_22050_32.getValue());
|
||||
|
||||
ResponseEntity<byte[]> response = elevenLabsApi.textToSpeech(request, validVoiceId, queryParams);
|
||||
|
||||
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
|
||||
assertThat(response.getBody()).isNotNull().isNotEmpty();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTextToSpeechVoiceIdNull() {
|
||||
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
|
||||
.text("This should fail.")
|
||||
.modelId("eleven_turbo_v2_5")
|
||||
.build();
|
||||
|
||||
Exception exception = assertThrows(IllegalArgumentException.class,
|
||||
() -> elevenLabsApi.textToSpeech(request, null, null));
|
||||
assertThat(exception.getMessage()).isEqualTo("voiceId must be provided. It cannot be null.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTextToSpeechTextEmpty() {
|
||||
Exception exception = assertThrows(IllegalArgumentException.class,
|
||||
() -> ElevenLabsApi.SpeechRequest.builder().text("").modelId("eleven_turbo_v2_5").build());
|
||||
assertThat(exception.getMessage()).isEqualTo("text must not be empty");
|
||||
}
|
||||
|
||||
// Streaming API tests
|
||||
|
||||
@Test
|
||||
public void testTextToSpeechStream() {
|
||||
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
|
||||
.text("This is a longer text to ensure multiple chunks are received through the streaming API.")
|
||||
.modelId("eleven_turbo_v2_5")
|
||||
.build();
|
||||
|
||||
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
|
||||
Flux<ResponseEntity<byte[]>> responseFlux = elevenLabsApi.textToSpeechStream(request, validVoiceId, null);
|
||||
|
||||
// Track the number of chunks received
|
||||
AtomicInteger chunkCount = new AtomicInteger(0);
|
||||
|
||||
StepVerifier.create(responseFlux).thenConsumeWhile(response -> {
|
||||
// Verify each chunk's response properties
|
||||
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
|
||||
assertThat(response.getBody()).isNotNull().isNotEmpty();
|
||||
// Count this chunk
|
||||
chunkCount.incrementAndGet();
|
||||
return true;
|
||||
}).verifyComplete();
|
||||
|
||||
// Verify we received at least one chunk
|
||||
assertThat(chunkCount.get()).isPositive();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTextToSpeechStreamWithVoiceSettings() {
|
||||
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
|
||||
.text("Hello, with Voice settings in streaming mode!")
|
||||
.modelId("eleven_turbo_v2_5")
|
||||
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.7, null, null, null))
|
||||
.build();
|
||||
|
||||
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
|
||||
Flux<ResponseEntity<byte[]>> responseFlux = elevenLabsApi.textToSpeechStream(request, validVoiceId, null);
|
||||
|
||||
StepVerifier.create(responseFlux).thenConsumeWhile(response -> {
|
||||
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
|
||||
assertThat(response.getBody()).isNotNull().isNotEmpty();
|
||||
return true;
|
||||
}).verifyComplete();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTextToSpeechStreamWithQueryParams() {
|
||||
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
|
||||
.text("Hello, testing streaming with query params!")
|
||||
.modelId("eleven_turbo_v2_5")
|
||||
.build();
|
||||
|
||||
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
|
||||
MultiValueMap<String, String> queryParams = new LinkedMultiValueMap<>();
|
||||
queryParams.add("optimize_streaming_latency", "2");
|
||||
queryParams.add("enable_logging", "true");
|
||||
queryParams.add("output_format", "mp3_44100_128");
|
||||
|
||||
Flux<ResponseEntity<byte[]>> responseFlux = elevenLabsApi.textToSpeechStream(request, validVoiceId,
|
||||
queryParams);
|
||||
|
||||
StepVerifier.create(responseFlux).thenConsumeWhile(response -> {
|
||||
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
|
||||
assertThat(response.getBody()).isNotNull().isNotEmpty();
|
||||
return true;
|
||||
}).verifyComplete();
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTextToSpeechStreamVoiceIdNull() {
|
||||
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
|
||||
.text("This should fail.")
|
||||
.modelId("eleven_turbo_v2_5")
|
||||
.build();
|
||||
|
||||
Exception exception = assertThrows(IllegalArgumentException.class,
|
||||
() -> elevenLabsApi.textToSpeechStream(request, null, null));
|
||||
assertThat(exception.getMessage()).isEqualTo("voiceId must be provided for streaming. It cannot be null.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTextToSpeechStreamRequestBodyNull() {
|
||||
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
|
||||
|
||||
Exception exception = assertThrows(IllegalArgumentException.class,
|
||||
() -> elevenLabsApi.textToSpeechStream(null, validVoiceId, null));
|
||||
assertThat(exception.getMessage()).isEqualTo("requestBody can not be null.");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testTextToSpeechStreamTextEmpty() {
|
||||
Exception exception = assertThrows(IllegalArgumentException.class, () -> {
|
||||
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
|
||||
.text("")
|
||||
.modelId("eleven_turbo_v2_5")
|
||||
.build();
|
||||
|
||||
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
|
||||
elevenLabsApi.textToSpeechStream(request, validVoiceId, null);
|
||||
});
|
||||
assertThat(exception.getMessage()).isEqualTo("text must not be empty");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,113 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.elevenlabs.api;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
|
||||
|
||||
import org.springframework.ai.elevenlabs.ElevenLabsTestConfiguration;
|
||||
import org.springframework.beans.factory.annotation.Autowired;
|
||||
import org.springframework.boot.test.context.SpringBootTest;
|
||||
import org.springframework.http.ResponseEntity;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
/**
|
||||
* Integration tests for the {@link ElevenLabsVoicesApi}.
|
||||
*
|
||||
* <p>
|
||||
* These tests require a valid ElevenLabs API key to be set as an environment variable
|
||||
* named {@code ELEVEN_LABS_API_KEY}.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
@SpringBootTest(classes = ElevenLabsTestConfiguration.class)
|
||||
@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".+")
|
||||
public class ElevenLabsVoicesApiIT {
|
||||
|
||||
@Autowired
|
||||
private ElevenLabsVoicesApi voicesApi;
|
||||
|
||||
@Test
|
||||
void getVoices() {
|
||||
ResponseEntity<ElevenLabsVoicesApi.Voices> response = voicesApi.getVoices();
|
||||
System.out.println("Response: " + response);
|
||||
|
||||
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
|
||||
assertThat(response.getBody()).isNotNull();
|
||||
ElevenLabsVoicesApi.Voices voicesResponse = response.getBody();
|
||||
|
||||
List<ElevenLabsVoicesApi.Voice> voices = voicesResponse.voices();
|
||||
assertThat(voices).isNotNull().isNotEmpty();
|
||||
|
||||
for (ElevenLabsVoicesApi.Voice voice : voices) {
|
||||
assertThat(voice.voiceId()).isNotBlank();
|
||||
}
|
||||
}
|
||||
|
||||
@Test
|
||||
void getDefaultVoiceSettings() {
|
||||
ResponseEntity<ElevenLabsVoicesApi.VoiceSettings> response = voicesApi.getDefaultVoiceSettings();
|
||||
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
|
||||
assertThat(response.getBody()).isNotNull();
|
||||
|
||||
ElevenLabsVoicesApi.VoiceSettings settings = response.getBody();
|
||||
assertThat(settings.stability()).isNotNull();
|
||||
assertThat(settings.similarityBoost()).isNotNull();
|
||||
assertThat(settings.style()).isNotNull();
|
||||
assertThat(settings.useSpeakerBoost()).isNotNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void getVoiceSettings() {
|
||||
ResponseEntity<ElevenLabsVoicesApi.Voices> voicesResponse = voicesApi.getVoices();
|
||||
assertThat(voicesResponse.getStatusCode().is2xxSuccessful()).isTrue();
|
||||
List<ElevenLabsVoicesApi.Voice> voices = voicesResponse.getBody().voices();
|
||||
assertThat(voices).isNotEmpty();
|
||||
String voiceId = voices.get(0).voiceId();
|
||||
|
||||
ResponseEntity<ElevenLabsVoicesApi.VoiceSettings> settingsResponse = voicesApi.getVoiceSettings(voiceId);
|
||||
assertThat(settingsResponse.getStatusCode().is2xxSuccessful()).isTrue();
|
||||
assertThat(settingsResponse.getBody()).isNotNull();
|
||||
|
||||
ElevenLabsVoicesApi.VoiceSettings settings = settingsResponse.getBody();
|
||||
assertThat(settings.stability()).isNotNull();
|
||||
assertThat(settings.similarityBoost()).isNotNull();
|
||||
assertThat(settings.style()).isNotNull();
|
||||
assertThat(settings.useSpeakerBoost()).isNotNull();
|
||||
}
|
||||
|
||||
@Test
|
||||
void getVoice() {
|
||||
ResponseEntity<ElevenLabsVoicesApi.Voices> voicesResponse = voicesApi.getVoices();
|
||||
assertThat(voicesResponse.getStatusCode().is2xxSuccessful()).isTrue();
|
||||
List<ElevenLabsVoicesApi.Voice> voices = voicesResponse.getBody().voices();
|
||||
assertThat(voices).isNotEmpty();
|
||||
String voiceId = voices.get(0).voiceId();
|
||||
|
||||
ResponseEntity<ElevenLabsVoicesApi.Voice> voiceResponse = voicesApi.getVoice(voiceId);
|
||||
assertThat(voiceResponse.getStatusCode().is2xxSuccessful()).isTrue();
|
||||
assertThat(voiceResponse.getBody()).isNotNull();
|
||||
|
||||
ElevenLabsVoicesApi.Voice voice = voiceResponse.getBody();
|
||||
assertThat(voice.voiceId()).isEqualTo(voiceId);
|
||||
assertThat(voice.name()).isNotBlank();
|
||||
}
|
||||
|
||||
}
|
||||
1482
models/spring-ai-elevenlabs/src/test/resources/voices.json
Normal file
1482
models/spring-ai-elevenlabs/src/test/resources/voices.json
Normal file
File diff suppressed because it is too large
Load Diff
@@ -29,7 +29,10 @@ import org.springframework.lang.Nullable;
|
||||
*
|
||||
* @author Ahmed Yousri
|
||||
* @since 1.0.0-M1
|
||||
* @deprecated Use {@link org.springframework.ai.audio.tts.Speech} from the core package
|
||||
* instead. This class will be removed in a future release.
|
||||
*/
|
||||
@Deprecated
|
||||
public class Speech implements ModelResult<byte[]> {
|
||||
|
||||
private final byte[] audio;
|
||||
|
||||
@@ -24,7 +24,10 @@ import java.util.Objects;
|
||||
*
|
||||
* @author Ahmed Yousri
|
||||
* @since 1.0.0-M1
|
||||
* @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechMessage} from the
|
||||
* core package instead. This class will be removed in a future release.
|
||||
*/
|
||||
@Deprecated
|
||||
public class SpeechMessage {
|
||||
|
||||
private String text;
|
||||
|
||||
@@ -25,7 +25,10 @@ import org.springframework.ai.model.Model;
|
||||
*
|
||||
* @author Ahmed Yousri
|
||||
* @since 1.0.0-M1
|
||||
* @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechModel} from the
|
||||
* core package instead. This interface will be removed in a future release.
|
||||
*/
|
||||
@Deprecated
|
||||
@FunctionalInterface
|
||||
public interface SpeechModel extends Model<SpeechPrompt, SpeechResponse> {
|
||||
|
||||
|
||||
@@ -29,7 +29,10 @@ import org.springframework.ai.openai.OpenAiAudioSpeechOptions;
|
||||
*
|
||||
* @author Ahmed Yousri
|
||||
* @since 1.0.0-M1
|
||||
* @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechPrompt} from the
|
||||
* core package instead. This class will be removed in a future release.
|
||||
*/
|
||||
@Deprecated
|
||||
public class SpeechPrompt implements ModelRequest<SpeechMessage> {
|
||||
|
||||
private final SpeechMessage message;
|
||||
|
||||
@@ -28,7 +28,10 @@ import org.springframework.ai.openai.metadata.audio.OpenAiAudioSpeechResponseMet
|
||||
*
|
||||
* @author Ahmed Yousri
|
||||
* @since 1.0.0-M1
|
||||
* @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechResponse} from the
|
||||
* core package instead. This class will be removed in a future release.
|
||||
*/
|
||||
@Deprecated
|
||||
public class SpeechResponse implements ModelResponse<Speech> {
|
||||
|
||||
private final Speech speech;
|
||||
|
||||
@@ -27,7 +27,10 @@ import org.springframework.ai.model.StreamingModel;
|
||||
*
|
||||
* @author Ahmed Yousri
|
||||
* @since 1.0.0-M1
|
||||
* @deprecated Use {@link org.springframework.ai.audio.tts.StreamingTextToSpeechModel}
|
||||
* from the core package instead. This interface will be removed in a future release.
|
||||
*/
|
||||
@Deprecated
|
||||
@FunctionalInterface
|
||||
public interface StreamingSpeechModel extends StreamingModel<SpeechPrompt, SpeechResponse> {
|
||||
|
||||
|
||||
@@ -16,9 +16,9 @@
|
||||
|
||||
package org.springframework.ai.openai.metadata.audio;
|
||||
|
||||
import org.springframework.ai.audio.tts.TextToSpeechResponseMetadata;
|
||||
import org.springframework.ai.chat.metadata.EmptyRateLimit;
|
||||
import org.springframework.ai.chat.metadata.RateLimit;
|
||||
import org.springframework.ai.model.MutableResponseMetadata;
|
||||
import org.springframework.ai.openai.api.OpenAiAudioApi;
|
||||
import org.springframework.lang.Nullable;
|
||||
import org.springframework.util.Assert;
|
||||
@@ -29,7 +29,7 @@ import org.springframework.util.Assert;
|
||||
* @author Ahmed Yousri
|
||||
* @see RateLimit
|
||||
*/
|
||||
public class OpenAiAudioSpeechResponseMetadata extends MutableResponseMetadata {
|
||||
public class OpenAiAudioSpeechResponseMetadata extends TextToSpeechResponseMetadata {
|
||||
|
||||
public static final OpenAiAudioSpeechResponseMetadata NULL = new OpenAiAudioSpeechResponseMetadata() {
|
||||
|
||||
|
||||
7
pom.xml
7
pom.xml
@@ -99,6 +99,7 @@
|
||||
<module>auto-configurations/models/spring-ai-autoconfigure-model-anthropic</module>
|
||||
<module>auto-configurations/models/spring-ai-autoconfigure-model-azure-openai</module>
|
||||
<module>auto-configurations/models/spring-ai-autoconfigure-model-bedrock-ai</module>
|
||||
<module>auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs</module>
|
||||
<module>auto-configurations/models/spring-ai-autoconfigure-model-huggingface</module>
|
||||
<module>auto-configurations/models/spring-ai-autoconfigure-model-openai</module>
|
||||
<module>auto-configurations/models/spring-ai-autoconfigure-model-minimax</module>
|
||||
@@ -162,6 +163,7 @@
|
||||
<module>models/spring-ai-azure-openai</module>
|
||||
<module>models/spring-ai-bedrock</module>
|
||||
<module>models/spring-ai-bedrock-converse</module>
|
||||
<module>models/spring-ai-elevenlabs</module>
|
||||
<module>models/spring-ai-huggingface</module>
|
||||
<module>models/spring-ai-minimax</module>
|
||||
<module>models/spring-ai-mistral-ai</module>
|
||||
@@ -180,6 +182,7 @@
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-azure-openai</module>
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-bedrock</module>
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-bedrock-converse</module>
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs</module>
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-huggingface</module>
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-minimax</module>
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-mistral-ai</module>
|
||||
@@ -712,7 +715,8 @@
|
||||
<exclude>org.springframework.ai.anthropic/**/*IT.java</exclude>
|
||||
<exclude>org.springframework.ai.azure.openai/**/*IT.java</exclude>
|
||||
<exclude>org.springframework.ai.bedrock/**/*IT.java</exclude>
|
||||
<exclude>org.springframework.ai.bedrock.converse/**/*IT.java</exclude>
|
||||
<exclude>org.springframework.ai.bedrock.converse/**/*IT.java</exclude>
|
||||
<exclude>org.springframework.ai.elevenlabs/**/*IT.java</exclude>
|
||||
<exclude>org.springframework.ai.huggingface/**/*IT.java</exclude>
|
||||
<exclude>org.springframework.ai.minimax/**/*IT.java</exclude>
|
||||
<exclude>org.springframework.ai.mistralai/**/*IT.java</exclude>
|
||||
@@ -760,6 +764,7 @@
|
||||
<exclude>org.springframework.ai.autoconfigure.huggingface/**/**IT.java</exclude>
|
||||
|
||||
<exclude>org.springframework.ai.autoconfigure.chat/**/**IT.java</exclude>
|
||||
<exclude>org.springframework.ai.autoconfigure.elevenlabs/**/**IT.java</exclude>
|
||||
<exclude>org.springframework.ai.autoconfigure.embedding/**/**IT.java</exclude>
|
||||
<exclude>org.springframework.ai.autoconfigure.image/**/**IT.java</exclude>
|
||||
|
||||
|
||||
@@ -243,6 +243,13 @@
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-elevenlabs</artifactId>
|
||||
<version>${project.version}</version>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-huggingface</artifactId>
|
||||
@@ -310,7 +317,6 @@
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-zhipuai</artifactId>
|
||||
@@ -565,6 +571,11 @@
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-autoconfigure-model-elevenlabs</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-autoconfigure-model-huggingface</artifactId>
|
||||
@@ -914,6 +925,11 @@
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-starter-model-elevenlabs</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-starter-model-minimax</artifactId>
|
||||
|
||||
@@ -1,5 +1,9 @@
|
||||
[[Speech]]
|
||||
= Text-To-Speech (TTS) API
|
||||
|
||||
Spring AI provides support for OpenAI's Speech API.
|
||||
When additional providers for Speech are implemented, a common `SpeechModel` and `StreamingSpeechModel` interface will be extracted.
|
||||
Spring AI provides support for the following Text-To-Speech (TTS) providers:
|
||||
|
||||
- xref:api/audio/speech/openai-speech.adoc[OpenAI's Speech API]
|
||||
- xref:api/audio/speech/elevenlabs-speech.adoc[Eleven Labs Text-To-Speech API]
|
||||
|
||||
Future enhancements may introduce additional providers, at which point a common `TextToSpeechModel` and `StreamingTextToSpeechModel` interface will be extracted.
|
||||
@@ -0,0 +1,268 @@
|
||||
= ElevenLabs Text-to-Speech (TTS)
|
||||
|
||||
== Introduction
|
||||
|
||||
ElevenLabs provides natural-sounding speech synthesis software using deep learning. Its AI audio models generate realistic, versatile, and contextually-aware speech, voices, and sound effects across 32 languages. The ElevenLabs Text-to-Speech API enables users to bring any book, article, PDF, newsletter, or text to life with ultra-realistic AI narration.
|
||||
|
||||
== Prerequisites
|
||||
|
||||
. Create an ElevenLabs account and obtain an API key. You can sign up at the https://elevenlabs.io/sign-up[ElevenLabs signup page]. Your API key can be found on your profile page after logging in.
|
||||
. Add the `spring-ai-elevenlabs` dependency to your project's build file. For more information, refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section.
|
||||
|
||||
== Auto-configuration
|
||||
|
||||
Spring AI provides Spring Boot auto-configuration for the ElevenLabs Text-to-Speech Client.
|
||||
To enable it, add the following dependency to your project's Maven `pom.xml` file:
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-starter-model-elevenlabs</artifactId>
|
||||
</dependency>
|
||||
----
|
||||
|
||||
or to your Gradle `build.gradle` build file:
|
||||
|
||||
[source,groovy]
|
||||
----
|
||||
dependencies {
|
||||
implementation 'org.springframework.ai:spring-ai-starter-model-elevenlabs'
|
||||
}
|
||||
----
|
||||
|
||||
TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file.
|
||||
|
||||
== Speech Properties
|
||||
|
||||
=== Connection Properties
|
||||
|
||||
The prefix `spring.ai.elevenlabs` is used as the property prefix for *all* ElevenLabs related configurations (both connection and TTS specific settings). This is defined in `ElevenLabsConnectionProperties`.
|
||||
|
||||
[cols="3,5,1"]
|
||||
|====
|
||||
| Property | Description | Default
|
||||
| spring.ai.elevenlabs.base-url | The base URL for the ElevenLabs API. | https://api.elevenlabs.io
|
||||
| spring.ai.elevenlabs.api-key | Your ElevenLabs API key. | -
|
||||
|====
|
||||
|
||||
=== Configuration Properties
|
||||
|
||||
The prefix `spring.ai.elevenlabs.tts` is used as the property prefix to configure the ElevenLabs Text-to-Speech client, specifically. This is defined in `ElevenLabsSpeechProperties`.
|
||||
|
||||
[cols="3,5,2"]
|
||||
|====
|
||||
| Property | Description | Default
|
||||
|
||||
| spring.ai.elevenlabs.tts.options.model-id | The ID of the model to use. | eleven_turbo_v2_5
|
||||
| spring.ai.elevenlabs.tts.options.voice-id | The ID of the voice to use. This is the *voice ID*, not the voice name. | 9BWtsMINqrJLrRacOk9x
|
||||
| spring.ai.elevenlabs.tts.options.output-format | The output format for the generated audio. See xref:#output-formats[Output Formats] below. | mp3_22050_32
|
||||
| spring.ai.elevenlabs.tts.enabled | Enable or disable the ElevenLabs Text-to-Speech client. | true
|
||||
|====
|
||||
|
||||
NOTE: The base URL and API key can also be configured *specifically* for TTS using `spring.ai.elevenlabs.tts.base-url` and `spring.ai.elevenlabs.tts.api-key`. However, it is generally recommended to use the global `spring.ai.elevenlabs` prefix for simplicity, unless you have a specific reason to use different credentials for different ElevenLabs services. The more specific `tts` properties will override the global ones.
|
||||
|
||||
TIP: All properties prefixed with `spring.ai.elevenlabs.tts.options` can be overridden at runtime.
|
||||
|
||||
[[output-formats]]
|
||||
.Available Output Formats
|
||||
[cols="1,1"]
|
||||
|====
|
||||
| Enum Value | Description
|
||||
| MP3_22050_32 | MP3, 22.05 kHz, 32 kbps
|
||||
| MP3_44100_32 | MP3, 44.1 kHz, 32 kbps
|
||||
| MP3_44100_64 | MP3, 44.1 kHz, 64 kbps
|
||||
| MP3_44100_96 | MP3, 44.1 kHz, 96 kbps
|
||||
| MP3_44100_128 | MP3, 44.1 kHz, 128 kbps
|
||||
| MP3_44100_192 | MP3, 44.1 kHz, 192 kbps
|
||||
| PCM_8000 | PCM, 8 kHz
|
||||
| PCM_16000 | PCM, 16 kHz
|
||||
| PCM_22050 | PCM, 22.05 kHz
|
||||
| PCM_24000 | PCM, 24 kHz
|
||||
| PCM_44100 | PCM, 44.1 kHz
|
||||
| PCM_48000 | PCM, 48 kHz
|
||||
| ULAW_8000 | µ-law, 8 kHz
|
||||
| ALAW_8000 | A-law, 8 kHz
|
||||
| OPUS_48000_32 | Opus, 48 kHz, 32 kbps
|
||||
| OPUS_48000_64 | Opus, 48 kHz, 64 kbps
|
||||
| OPUS_48000_96 | Opus, 48 kHz, 96 kbps
|
||||
| OPUS_48000_128 | Opus, 48 kHz, 128 kbps
|
||||
| OPUS_48000_192 | Opus, 48 kHz, 192 kbps
|
||||
|====
|
||||
|
||||
|
||||
== Runtime Options [[speech-options]]
|
||||
|
||||
The `ElevenLabsTextToSpeechOptions` class provides options to use when making a text-to-speech request. On start-up, the options specified by `spring.ai.elevenlabs.tts` are used, but you can override these at runtime. The following options are available:
|
||||
|
||||
* `modelId`: The ID of the model to use.
|
||||
* `voiceId`: The ID of the voice to use.
|
||||
* `outputFormat`: The output format of the generated audio.
|
||||
* `voiceSettings`: An object containing voice settings such as `stability`, `similarityBoost`, `style`, `useSpeakerBoost`, and `speed`.
|
||||
* `enableLogging`: A boolean to enable or disable logging.
|
||||
* `languageCode`: The language code of the input text (e.g., "en" for English).
|
||||
* `pronunciationDictionaryLocators`: A list of pronunciation dictionary locators.
|
||||
* `seed`: A seed for random number generation, for reproducibility.
|
||||
* `previousText`: Text before the main text, for context in multi-turn conversations.
|
||||
* `nextText`: Text after the main text, for context in multi-turn conversations.
|
||||
* `previousRequestIds`: Request IDs from previous turns in a conversation.
|
||||
* `nextRequestIds`: Request IDs for subsequent turns in a conversation.
|
||||
* `applyTextNormalization`: Apply text normalization ("auto", "on", or "off").
|
||||
* `applyLanguageTextNormalization`: Apply language text normalization.
|
||||
|
||||
For example:
|
||||
|
||||
[source,java]
|
||||
----
|
||||
ElevenLabsTextToSpeechOptions speechOptions = ElevenLabsTextToSpeechOptions.builder()
|
||||
.model("eleven_multilingual_v2")
|
||||
.voiceId("your_voice_id")
|
||||
.outputFormat(ElevenLabsApi.OutputFormat.MP3_44100_128.getValue())
|
||||
.build();
|
||||
|
||||
TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Hello, this is a text-to-speech example.", speechOptions);
|
||||
TextToSpeechResponse response = elevenLabsTextToSpeechModel.call(speechPrompt);
|
||||
----
|
||||
|
||||
=== Using Voice Settings
|
||||
|
||||
You can customize the voice output by providing `VoiceSettings` in the options. This allows you to control properties like stability and similarity.
|
||||
|
||||
[source,java]
|
||||
----
|
||||
var voiceSettings = new ElevenLabsApi.SpeechRequest.VoiceSettings(0.75f, 0.75f, 0.0f, true);
|
||||
|
||||
ElevenLabsTextToSpeechOptions speechOptions = ElevenLabsTextToSpeechOptions.builder()
|
||||
.model("eleven_multilingual_v2")
|
||||
.voiceId("your_voice_id")
|
||||
.voiceSettings(voiceSettings)
|
||||
.build();
|
||||
|
||||
TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("This is a test with custom voice settings!", speechOptions);
|
||||
TextToSpeechResponse response = elevenLabsTextToSpeechModel.call(speechPrompt);
|
||||
----
|
||||
|
||||
== Manual Configuration
|
||||
|
||||
Add the `spring-ai-elevenlabs` dependency to your project's Maven `pom.xml` file:
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-elevenlabs</artifactId>
|
||||
</dependency>
|
||||
----
|
||||
|
||||
or to your Gradle `build.gradle` build file:
|
||||
|
||||
[source,groovy]
|
||||
----
|
||||
dependencies {
|
||||
implementation 'org.springframework.ai:spring-ai-elevenlabs'
|
||||
}
|
||||
----
|
||||
|
||||
TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file.
|
||||
|
||||
Next, create an `ElevenLabsTextToSpeechModel`:
|
||||
|
||||
[source,java]
|
||||
----
|
||||
ElevenLabsApi elevenLabsApi = ElevenLabsApi.builder()
|
||||
.apiKey(System.getenv("ELEVEN_LABS_API_KEY"))
|
||||
.build();
|
||||
|
||||
ElevenLabsTextToSpeechModel elevenLabsTextToSpeechModel = ElevenLabsTextToSpeechModel.builder()
|
||||
.elevenLabsApi(elevenLabsApi)
|
||||
.defaultOptions(ElevenLabsTextToSpeechOptions.builder()
|
||||
.model("eleven_turbo_v2_5")
|
||||
.voiceId("your_voice_id") // e.g. "9BWtsMINqrJLrRacOk9x"
|
||||
.outputFormat("mp3_44100_128")
|
||||
.build())
|
||||
.build();
|
||||
|
||||
// The call will use the default options configured above.
|
||||
TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Hello, this is a text-to-speech example.");
|
||||
TextToSpeechResponse response = elevenLabsTextToSpeechModel.call(speechPrompt);
|
||||
|
||||
byte[] responseAsBytes = response.getResult().getOutput();
|
||||
----
|
||||
|
||||
== Streaming Real-time Audio
|
||||
|
||||
The ElevenLabs Speech API supports real-time audio streaming using chunk transfer encoding. This allows audio playback to begin before the entire audio file is generated.
|
||||
|
||||
[source,java]
|
||||
----
|
||||
ElevenLabsApi elevenLabsApi = ElevenLabsApi.builder()
|
||||
.apiKey(System.getenv("ELEVEN_LABS_API_KEY"))
|
||||
.build();
|
||||
|
||||
ElevenLabsTextToSpeechModel elevenLabsTextToSpeechModel = ElevenLabsTextToSpeechModel.builder()
|
||||
.elevenLabsApi(elevenLabsApi)
|
||||
.build();
|
||||
|
||||
ElevenLabsTextToSpeechOptions streamingOptions = ElevenLabsTextToSpeechOptions.builder()
|
||||
.model("eleven_turbo_v2_5")
|
||||
.voiceId("your_voice_id")
|
||||
.outputFormat("mp3_44100_128")
|
||||
.build();
|
||||
|
||||
TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Today is a wonderful day to build something people love!", streamingOptions);
|
||||
|
||||
Flux<TextToSpeechResponse> responseStream = elevenLabsTextToSpeechModel.stream(speechPrompt);
|
||||
|
||||
// Process the stream, e.g., play the audio chunks
|
||||
responseStream.subscribe(speechResponse -> {
|
||||
byte[] audioChunk = speechResponse.getResult().getOutput();
|
||||
// Play the audioChunk
|
||||
});
|
||||
|
||||
----
|
||||
|
||||
== Voices API
|
||||
|
||||
The ElevenLabs Voices API allows you to retrieve information about available voices, their settings, and default voice settings. You can use this API to discover the `voiceId`s to use in your speech requests.
|
||||
|
||||
To use the Voices API, you'll need to create an instance of `ElevenLabsVoicesApi`:
|
||||
|
||||
[source,java]
|
||||
----
|
||||
ElevenLabsVoicesApi voicesApi = ElevenLabsVoicesApi.builder()
|
||||
.apiKey(System.getenv("ELEVEN_LABS_API_KEY"))
|
||||
.build();
|
||||
----
|
||||
|
||||
You can then use the following methods:
|
||||
|
||||
* `getVoices()`: Retrieves a list of all available voices.
|
||||
* `getDefaultVoiceSettings()`: Gets the default settings for voices.
|
||||
* `getVoiceSettings(String voiceId)`: Returns the settings for a specific voice.
|
||||
* `getVoice(String voiceId)`: Returns metadata about a specific voice.
|
||||
|
||||
Example:
|
||||
|
||||
[source,java]
|
||||
----
|
||||
// Get all voices
|
||||
ResponseEntity<ElevenLabsVoicesApi.Voices> voicesResponse = voicesApi.getVoices();
|
||||
List<ElevenLabsVoicesApi.Voice> voices = voicesResponse.getBody().voices();
|
||||
|
||||
// Get default voice settings
|
||||
ResponseEntity<ElevenLabsVoicesApi.VoiceSettings> defaultSettingsResponse = voicesApi.getDefaultVoiceSettings();
|
||||
ElevenLabsVoicesApi.VoiceSettings defaultSettings = defaultSettingsResponse.getBody();
|
||||
|
||||
// Get settings for a specific voice
|
||||
ResponseEntity<ElevenLabsVoicesApi.VoiceSettings> voiceSettingsResponse = voicesApi.getVoiceSettings(voiceId);
|
||||
ElevenLabsVoicesApi.VoiceSettings voiceSettings = voiceSettingsResponse.getBody();
|
||||
|
||||
// Get details for a specific voice
|
||||
ResponseEntity<ElevenLabsVoicesApi.Voice> voiceDetailsResponse = voicesApi.getVoice(voiceId);
|
||||
ElevenLabsVoicesApi.Voice voiceDetails = voiceDetailsResponse.getBody();
|
||||
----
|
||||
|
||||
== Example Code
|
||||
|
||||
* The link:https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModelIT.java[ElevenLabsTextToSpeechModelIT.java] test provides some general examples of how to use the library.
|
||||
* The link:https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsApiIT.java[ElevenLabsApiIT.java] test provides examples of using the low-level `ElevenLabsApi`.
|
||||
@@ -0,0 +1,147 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.audio.tts;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
import com.fasterxml.jackson.annotation.JsonInclude;
|
||||
|
||||
/**
|
||||
* Default implementation of the {@link TextToSpeechOptions} interface.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
@JsonInclude(JsonInclude.Include.NON_NULL)
|
||||
public class DefaultTextToSpeechOptions implements TextToSpeechOptions {
|
||||
|
||||
private final String model;
|
||||
|
||||
private final String voice;
|
||||
|
||||
private final String format;
|
||||
|
||||
private final Double speed;
|
||||
|
||||
private DefaultTextToSpeechOptions(String model, String voice, String format, Double speed) {
|
||||
this.model = model;
|
||||
this.voice = voice;
|
||||
this.format = format;
|
||||
this.speed = speed;
|
||||
}
|
||||
|
||||
public static Builder builder() {
|
||||
return new Builder();
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getModel() {
|
||||
return this.model;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getVoice() {
|
||||
return this.voice;
|
||||
}
|
||||
|
||||
@Override
|
||||
public String getFormat() {
|
||||
return this.format;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Double getSpeed() {
|
||||
return this.speed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (!(o instanceof DefaultTextToSpeechOptions that))
|
||||
return false;
|
||||
return Objects.equals(model, that.model) && Objects.equals(voice, that.voice)
|
||||
&& Objects.equals(format, that.format) && Objects.equals(speed, that.speed);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(model, voice, format, speed);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "DefaultTextToSpeechOptions{" + "model='" + model + '\'' + ", voice='" + voice + '\'' + ", format='"
|
||||
+ format + '\'' + ", speed=" + speed + '}';
|
||||
}
|
||||
|
||||
@Override
|
||||
@SuppressWarnings("unchecked")
|
||||
public DefaultTextToSpeechOptions copy() {
|
||||
return new Builder(this).build();
|
||||
}
|
||||
|
||||
public static class Builder implements TextToSpeechOptions.Builder {
|
||||
|
||||
private String model;
|
||||
|
||||
private String voice;
|
||||
|
||||
private String format;
|
||||
|
||||
private Double speed;
|
||||
|
||||
public Builder() {
|
||||
}
|
||||
|
||||
private Builder(DefaultTextToSpeechOptions options) {
|
||||
this.model = options.model;
|
||||
this.voice = options.voice;
|
||||
this.format = options.format;
|
||||
this.speed = options.speed;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Builder model(String model) {
|
||||
this.model = model;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Builder voice(String voice) {
|
||||
this.voice = voice;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Builder format(String format) {
|
||||
this.format = format;
|
||||
return this;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Builder speed(Double speed) {
|
||||
this.speed = speed;
|
||||
return this;
|
||||
}
|
||||
|
||||
public DefaultTextToSpeechOptions build() {
|
||||
return new DefaultTextToSpeechOptions(this.model, this.voice, this.format, this.speed);
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.audio.tts;
|
||||
|
||||
import java.util.Arrays;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.springframework.ai.model.ModelResult;
|
||||
import org.springframework.ai.model.ResultMetadata;
|
||||
|
||||
/**
|
||||
* Implementation of the {@link ModelResult} interface for the speech model.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public class Speech implements ModelResult<byte[]> {
|
||||
|
||||
private final byte[] speech;
|
||||
|
||||
public Speech(byte[] speech) {
|
||||
this.speech = speech;
|
||||
}
|
||||
|
||||
@Override
|
||||
public byte[] getOutput() {
|
||||
return this.speech;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (!(o instanceof Speech speech1))
|
||||
return false;
|
||||
return Arrays.equals(speech, speech1.speech);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(Arrays.hashCode(speech));
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "Speech{" + "speech=" + Arrays.toString(speech) + '}';
|
||||
}
|
||||
|
||||
@Override
|
||||
public ResultMetadata getMetadata() {
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,45 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.audio.tts;
|
||||
|
||||
import reactor.core.publisher.Flux;
|
||||
|
||||
import org.springframework.ai.model.StreamingModel;
|
||||
|
||||
/**
|
||||
* Interface for the streaming text to speech model.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public interface StreamingTextToSpeechModel extends StreamingModel<TextToSpeechPrompt, TextToSpeechResponse> {
|
||||
|
||||
default Flux<byte[]> stream(String text) {
|
||||
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text);
|
||||
return stream(prompt).map(response -> (response.getResult() == null || response.getResult().getOutput() == null)
|
||||
? new byte[0] : response.getResult().getOutput());
|
||||
}
|
||||
|
||||
default Flux<byte[]> stream(String text, TextToSpeechOptions options) {
|
||||
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
|
||||
return stream(prompt).map(response -> (response.getResult() == null || response.getResult().getOutput() == null)
|
||||
? new byte[0] : response.getResult().getOutput());
|
||||
}
|
||||
|
||||
@Override
|
||||
Flux<TextToSpeechResponse> stream(TextToSpeechPrompt prompt);
|
||||
|
||||
}
|
||||
@@ -0,0 +1,58 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.audio.tts;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
/**
|
||||
* Implementation of the {@link TextToSpeechMessage} interface for the text to speech
|
||||
* message.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public class TextToSpeechMessage {
|
||||
|
||||
private final String text;
|
||||
|
||||
public TextToSpeechMessage(String text) {
|
||||
this.text = text;
|
||||
}
|
||||
|
||||
public String getText() {
|
||||
return text;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (!(o instanceof TextToSpeechMessage that))
|
||||
return false;
|
||||
return Objects.equals(text, that.text);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(text);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TextToSpeechMessage{" + "text='" + text + '\'' + '}';
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,42 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.audio.tts;
|
||||
|
||||
import org.springframework.ai.model.Model;
|
||||
import org.springframework.ai.model.ModelResult;
|
||||
|
||||
/**
|
||||
* Interface for the text to speech model.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public interface TextToSpeechModel extends Model<TextToSpeechPrompt, TextToSpeechResponse> {
|
||||
|
||||
default byte[] call(String text) {
|
||||
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text);
|
||||
ModelResult<byte[]> result = call(prompt).getResult();
|
||||
return (result != null) ? result.getOutput() : new byte[0];
|
||||
}
|
||||
|
||||
@Override
|
||||
TextToSpeechResponse call(TextToSpeechPrompt prompt);
|
||||
|
||||
default TextToSpeechOptions getDefaultOptions() {
|
||||
return TextToSpeechOptions.builder().build();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,114 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.audio.tts;
|
||||
|
||||
import org.springframework.ai.model.ModelOptions;
|
||||
import org.springframework.lang.Nullable;
|
||||
|
||||
/**
|
||||
* Interface for text-to-speech model options. Defines the common, portable options that
|
||||
* should be supported by all implementations.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public interface TextToSpeechOptions extends ModelOptions {
|
||||
|
||||
/**
|
||||
* Creates a new {@link TextToSpeechOptions.Builder} to create the default
|
||||
* {@link TextToSpeechOptions}.
|
||||
* @return Returns a new {@link TextToSpeechOptions.Builder}.
|
||||
*/
|
||||
static TextToSpeechOptions.Builder builder() {
|
||||
return new DefaultTextToSpeechOptions.Builder();
|
||||
}
|
||||
|
||||
/**
|
||||
* Returns the model to use for text-to-speech.
|
||||
* @return The model name.
|
||||
*/
|
||||
@Nullable
|
||||
String getModel();
|
||||
|
||||
/**
|
||||
* Returns the voice to use for text-to-speech.
|
||||
* @return The voice identifier.
|
||||
*/
|
||||
@Nullable
|
||||
String getVoice();
|
||||
|
||||
/**
|
||||
* Returns the output format for the generated audio.
|
||||
* @return The output format (e.g., "mp3", "wav").
|
||||
*/
|
||||
@Nullable
|
||||
String getFormat();
|
||||
|
||||
/**
|
||||
* Returns the speed of the generated speech.
|
||||
* @return The speech speed.
|
||||
*/
|
||||
@Nullable
|
||||
Double getSpeed();
|
||||
|
||||
/**
|
||||
* Returns a copy of this {@link TextToSpeechOptions}.
|
||||
* @return a copy of this {@link TextToSpeechOptions}
|
||||
*/
|
||||
<T extends TextToSpeechOptions> T copy();
|
||||
|
||||
/**
|
||||
* Builder for {@link TextToSpeechOptions}.
|
||||
*/
|
||||
interface Builder {
|
||||
|
||||
/**
|
||||
* Sets the model to use for text-to-speech.
|
||||
* @param model The model name.
|
||||
* @return This builder.
|
||||
*/
|
||||
Builder model(String model);
|
||||
|
||||
/**
|
||||
* Sets the voice to use for text-to-speech.
|
||||
* @param voice The voice identifier.
|
||||
* @return This builder.
|
||||
*/
|
||||
Builder voice(String voice);
|
||||
|
||||
/**
|
||||
* Sets the output format for the generated audio.
|
||||
* @param format The output format (e.g., "mp3", "wav").
|
||||
* @return This builder.
|
||||
*/
|
||||
Builder format(String format);
|
||||
|
||||
/**
|
||||
* Sets the speed of the generated speech.
|
||||
* @param speed The speech speed.
|
||||
* @return This builder.
|
||||
*/
|
||||
Builder speed(Double speed);
|
||||
|
||||
/**
|
||||
* Builds the {@link TextToSpeechOptions}.
|
||||
* @return The {@link TextToSpeechOptions}.
|
||||
*/
|
||||
TextToSpeechOptions build();
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,84 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.audio.tts;
|
||||
|
||||
import java.util.Objects;
|
||||
|
||||
import org.springframework.ai.model.ModelRequest;
|
||||
|
||||
/**
|
||||
* Implementation of the {@link ModelRequest} interface for the text to speech prompt.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public class TextToSpeechPrompt implements ModelRequest<TextToSpeechMessage> {
|
||||
|
||||
private final TextToSpeechMessage message;
|
||||
|
||||
private TextToSpeechOptions options;
|
||||
|
||||
public TextToSpeechPrompt(String text) {
|
||||
this(new TextToSpeechMessage(text), TextToSpeechOptions.builder().build());
|
||||
}
|
||||
|
||||
public TextToSpeechPrompt(String text, TextToSpeechOptions options) {
|
||||
this(new TextToSpeechMessage(text), options);
|
||||
}
|
||||
|
||||
public TextToSpeechPrompt(TextToSpeechMessage message) {
|
||||
this(message, TextToSpeechOptions.builder().build());
|
||||
}
|
||||
|
||||
public TextToSpeechPrompt(TextToSpeechMessage message, TextToSpeechOptions options) {
|
||||
this.message = message;
|
||||
this.options = options;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TextToSpeechMessage getInstructions() {
|
||||
return this.message;
|
||||
}
|
||||
|
||||
@Override
|
||||
public TextToSpeechOptions getOptions() {
|
||||
return this.options;
|
||||
}
|
||||
|
||||
public void setOptions(TextToSpeechOptions options) {
|
||||
this.options = options;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (!(o instanceof TextToSpeechPrompt that))
|
||||
return false;
|
||||
return Objects.equals(message, that.message) && Objects.equals(options, that.options);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(message, options);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TextToSpeechPrompt{" + "message=" + message + ", options=" + options + '}';
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,78 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.audio.tts;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Objects;
|
||||
|
||||
import org.springframework.ai.model.ModelResponse;
|
||||
import org.springframework.ai.model.ResponseMetadata;
|
||||
|
||||
/**
|
||||
* Implementation of the {@link ModelResponse} interface for the text to speech response.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public class TextToSpeechResponse implements ModelResponse<Speech> {
|
||||
|
||||
private final List<Speech> results;
|
||||
|
||||
private final TextToSpeechResponseMetadata textToSpeechResponseMetadata;
|
||||
|
||||
public TextToSpeechResponse(List<Speech> results) {
|
||||
this(results, null);
|
||||
}
|
||||
|
||||
public TextToSpeechResponse(List<Speech> results, TextToSpeechResponseMetadata textToSpeechResponseMetadata) {
|
||||
this.results = results;
|
||||
this.textToSpeechResponseMetadata = textToSpeechResponseMetadata;
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Speech> getResults() {
|
||||
return this.results;
|
||||
}
|
||||
|
||||
public Speech getResult() {
|
||||
return this.results.get(0);
|
||||
}
|
||||
|
||||
@Override
|
||||
public TextToSpeechResponseMetadata getMetadata() {
|
||||
return this.textToSpeechResponseMetadata;
|
||||
}
|
||||
|
||||
@Override
|
||||
public boolean equals(Object o) {
|
||||
if (this == o)
|
||||
return true;
|
||||
if (!(o instanceof TextToSpeechResponse that))
|
||||
return false;
|
||||
return Objects.equals(results, that.results);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int hashCode() {
|
||||
return Objects.hash(results);
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
return "TextToSpeechResponse{" + "results=" + results + '}';
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,12 @@
|
||||
package org.springframework.ai.audio.tts;
|
||||
|
||||
import org.springframework.ai.model.MutableResponseMetadata;
|
||||
|
||||
/**
|
||||
* Metadata associated with an audio transcription response.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
public class TextToSpeechResponseMetadata extends MutableResponseMetadata {
|
||||
|
||||
}
|
||||
@@ -0,0 +1,67 @@
|
||||
/*
|
||||
* Copyright 2025-2025 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.audio.tts;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.api.Assertions.within;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
/**
|
||||
* Unit tests for {@link DefaultTextToSpeechOptions}.
|
||||
*
|
||||
* @author Alexandros Pappas
|
||||
*/
|
||||
class DefaultTextToSpeechOptionsTests {
|
||||
|
||||
@Test
|
||||
void testBuilderWithAllFields() {
|
||||
TextToSpeechOptions options = DefaultTextToSpeechOptions.builder()
|
||||
.model("test-model")
|
||||
.voice("test-voice")
|
||||
.format("test-format")
|
||||
.speed(0.8)
|
||||
.build();
|
||||
|
||||
assertThat(options.getModel()).isEqualTo("test-model");
|
||||
assertThat(options.getVoice()).isEqualTo("test-voice");
|
||||
assertThat(options.getFormat()).isEqualTo("test-format");
|
||||
assertThat(options.getSpeed()).isCloseTo(0.8, within(0.0001));
|
||||
}
|
||||
|
||||
@Test
|
||||
void testCopy() {
|
||||
TextToSpeechOptions original = DefaultTextToSpeechOptions.builder()
|
||||
.model("test-model")
|
||||
.voice("test-voice")
|
||||
.format("test-format")
|
||||
.speed(0.8)
|
||||
.build();
|
||||
|
||||
DefaultTextToSpeechOptions copied = original.copy();
|
||||
assertThat(copied).isNotSameAs(original).isEqualTo(original);
|
||||
}
|
||||
|
||||
@Test
|
||||
void testDefaultValues() {
|
||||
DefaultTextToSpeechOptions options = DefaultTextToSpeechOptions.builder().build();
|
||||
assertThat(options.getModel()).isNull();
|
||||
assertThat(options.getVoice()).isNull();
|
||||
assertThat(options.getFormat()).isNull();
|
||||
assertThat(options.getSpeed()).isNull();
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,44 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-parent</artifactId>
|
||||
<version>1.1.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
<artifactId>spring-ai-starter-model-elevenlabs</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Spring AI Starter - ElevenLabs</name>
|
||||
<description>Spring AI ElevenLabs Auto Configuration</description>
|
||||
<url>https://github.com/spring-projects/spring-ai</url>
|
||||
|
||||
<scm>
|
||||
<url>https://github.com/spring-projects/spring-ai</url>
|
||||
<connection>git://github.com/spring-projects/spring-ai.git</connection>
|
||||
<developerConnection>git@github.com:spring-projects/spring-ai.git</developerConnection>
|
||||
</scm>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-autoconfigure-model-elevenlabs</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-elevenlabs</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
Reference in New Issue
Block a user