feat: Add ElevenLabs Text-to-Speech support

This commit introduces a new `spring-ai-elevenlabs` module to integrate ElevenLabs' TTS service into Spring AI.

Key Features:
- **New Module:** `spring-ai-elevenlabs` with a Spring Boot starter for auto-configuration.
- **Core Classes:**
  - `ElevenLabsTextToSpeechModel`: Implements `TextToSpeechModel` and `StreamingTextToSpeechModel`.
  - `ElevenLabsTextToSpeechOptions`: Configurable TTS options (voice, format, speed, etc.).
  - `ElevenLabsApi` and `ElevenLabsVoicesApi`: Low-level REST clients for ElevenLabs APIs.
  - DTOs: `Speech`, `TextToSpeechMessage`, `TextToSpeechPrompt`, `TextToSpeechResponse`.
- **Auto-configuration:**
  - `ElevenLabsAutoConfiguration`, `ElevenLabsConnectionProperties`, and `ElevenLabsSpeechProperties`.
- **Functionality:**
  - Text-to-speech conversion with ElevenLabs voices.
  - Real-time streaming playback support.
  - Flexible runtime configuration via properties and model options.
- **Documentation:** Updated Spring AI reference guide with usage examples.
- **Tests:** Includes unit and integration tests for both success and failure scenarios.

Note:
- Some `tts` package classes will be relocated to the `core` module to support shared TTS abstractions, including upcoming OpenAI Speech API support.

- Added metadata support to `TextToSpeechResponse`.
- Added tests and updated documentation.

Signed-off-by: Alexandros Pappas <apappascs@gmail.com>
This commit is contained in:
Alexandros Pappas
2025-03-02 21:11:15 +01:00
committed by Mark Pollack
parent 2be1e42505
commit 9398850c2b
43 changed files with 5472 additions and 6 deletions

View File

@@ -0,0 +1,90 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xmlns="http://maven.apache.org/POM/4.0.0"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-parent</artifactId>
<version>1.1.0-SNAPSHOT</version>
<relativePath>../../../pom.xml</relativePath>
</parent>
<artifactId>spring-ai-autoconfigure-model-elevenlabs</artifactId>
<packaging>jar</packaging>
<name>Spring AI ElevenLabs Auto Configuration</name>
<description>Spring AI ElevenLabs Auto Configuration</description>
<url>https://github.com/spring-projects/spring-ai</url>
<scm>
<url>https://github.com/spring-projects/spring-ai</url>
<connection>git://github.com/spring-projects/spring-ai.git</connection>
<developerConnection>git@github.com:spring-projects/spring-ai.git</developerConnection>
</scm>
<dependencies>
<!-- Spring AI dependencies -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-elevenlabs</artifactId>
<version>${project.parent.version}</version>
<optional>true</optional>
</dependency>
<!-- Spring AI auto configurations -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-autoconfigure-model-tool</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-autoconfigure-retry</artifactId>
<version>${project.parent.version}</version>
</dependency>
<!-- Boot dependencies -->
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-configuration-processor</artifactId>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-autoconfigure-processor</artifactId>
<optional>true</optional>
</dependency>
<!-- Test dependencies -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-test</artifactId>
<version>${project.parent.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.mockito</groupId>
<artifactId>mockito-core</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,79 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.model.elevenlabs.autoconfigure;
import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechModel;
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
import org.springframework.ai.retry.autoconfigure.SpringAiRetryAutoConfiguration;
import org.springframework.beans.factory.ObjectProvider;
import org.springframework.boot.autoconfigure.AutoConfiguration;
import org.springframework.boot.autoconfigure.ImportAutoConfiguration;
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
import org.springframework.boot.autoconfigure.condition.ConditionalOnProperty;
import org.springframework.boot.autoconfigure.web.client.RestClientAutoConfiguration;
import org.springframework.boot.autoconfigure.web.reactive.function.client.WebClientAutoConfiguration;
import org.springframework.boot.context.properties.EnableConfigurationProperties;
import org.springframework.context.annotation.Bean;
import org.springframework.retry.support.RetryTemplate;
import org.springframework.web.client.ResponseErrorHandler;
import org.springframework.web.client.RestClient;
import org.springframework.web.reactive.function.client.WebClient;
/**
* {@link AutoConfiguration Auto-configuration} for ElevenLabs.
*
* @author Alexandros Pappas
*/
@AutoConfiguration(after = { RestClientAutoConfiguration.class, SpringAiRetryAutoConfiguration.class,
WebClientAutoConfiguration.class })
@ConditionalOnClass(ElevenLabsApi.class)
@EnableConfigurationProperties({ ElevenLabsSpeechProperties.class, ElevenLabsConnectionProperties.class })
@ConditionalOnProperty(prefix = ElevenLabsSpeechProperties.CONFIG_PREFIX, name = "enabled", havingValue = "true",
matchIfMissing = true)
@ImportAutoConfiguration(classes = { SpringAiRetryAutoConfiguration.class, RestClientAutoConfiguration.class,
WebClientAutoConfiguration.class })
public class ElevenLabsAutoConfiguration {
@Bean
@ConditionalOnMissingBean
public ElevenLabsApi elevenLabsApi(ElevenLabsConnectionProperties connectionProperties,
ObjectProvider<RestClient.Builder> restClientBuilderProvider,
ObjectProvider<WebClient.Builder> webClientBuilderProvider, ResponseErrorHandler responseErrorHandler) {
return ElevenLabsApi.builder()
.baseUrl(connectionProperties.getBaseUrl())
.apiKey(connectionProperties.getApiKey())
.restClientBuilder(restClientBuilderProvider.getIfAvailable(RestClient::builder))
.webClientBuilder(webClientBuilderProvider.getIfAvailable(WebClient::builder))
.responseErrorHandler(responseErrorHandler)
.build();
}
@Bean
@ConditionalOnMissingBean
public ElevenLabsTextToSpeechModel elevenLabsSpeechModel(ElevenLabsApi elevenLabsApi,
ElevenLabsSpeechProperties speechProperties, RetryTemplate retryTemplate) {
return ElevenLabsTextToSpeechModel.builder()
.elevenLabsApi(elevenLabsApi)
.defaultOptions(speechProperties.getOptions())
.retryTemplate(retryTemplate)
.build();
}
}

View File

@@ -0,0 +1,58 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.model.elevenlabs.autoconfigure;
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
import org.springframework.boot.context.properties.ConfigurationProperties;
/**
* Configuration properties for the ElevenLabs API connection.
*
* @author Alexandros Pappas
*/
@ConfigurationProperties(ElevenLabsConnectionProperties.CONFIG_PREFIX)
public class ElevenLabsConnectionProperties {
public static final String CONFIG_PREFIX = "spring.ai.elevenlabs";
/**
* ElevenLabs API access key.
*/
private String apiKey;
/**
* ElevenLabs API base URL.
*/
private String baseUrl = ElevenLabsApi.DEFAULT_BASE_URL;
public String getApiKey() {
return this.apiKey;
}
public void setApiKey(String apiKey) {
this.apiKey = apiKey;
}
public String getBaseUrl() {
return this.baseUrl;
}
public void setBaseUrl(String baseUrl) {
this.baseUrl = baseUrl;
}
}

View File

@@ -0,0 +1,68 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.model.elevenlabs.autoconfigure;
import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechOptions;
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
import org.springframework.boot.context.properties.ConfigurationProperties;
import org.springframework.boot.context.properties.NestedConfigurationProperty;
/**
* Configuration properties for the ElevenLabs Text-to-Speech API.
*
* @author Alexandros Pappas
*/
@ConfigurationProperties(ElevenLabsSpeechProperties.CONFIG_PREFIX)
public class ElevenLabsSpeechProperties {
public static final String CONFIG_PREFIX = "spring.ai.elevenlabs.tts";
public static final String DEFAULT_MODEL_ID = "eleven_turbo_v2_5";
private static final String DEFAULT_VOICE_ID = "9BWtsMINqrJLrRacOk9x";
private static final ElevenLabsApi.OutputFormat DEFAULT_OUTPUT_FORMAT = ElevenLabsApi.OutputFormat.MP3_22050_32;
/**
* Enable ElevenLabs speech model.
*/
private boolean enabled = true;
@NestedConfigurationProperty
private ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder()
.modelId(DEFAULT_MODEL_ID)
.voiceId(DEFAULT_VOICE_ID)
.outputFormat(DEFAULT_OUTPUT_FORMAT.getValue())
.build();
public ElevenLabsTextToSpeechOptions getOptions() {
return this.options;
}
public void setOptions(ElevenLabsTextToSpeechOptions options) {
this.options = options;
}
public boolean isEnabled() {
return this.enabled;
}
public void setEnabled(boolean enabled) {
this.enabled = enabled;
}
}

View File

@@ -0,0 +1,16 @@
#
# Copyright 2025-2025 the original author or authors.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# https://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.
#
org.springframework.ai.model.elevenlabs.autoconfigure.elevenlabsChatAutoConfiguration

View File

@@ -0,0 +1,84 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.model.elevenlabs.autoconfigure;
import java.util.Arrays;
import static org.assertj.core.api.Assertions.assertThat;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechModel;
import org.springframework.boot.autoconfigure.AutoConfigurations;
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
/**
* Integration tests for the {@link ElevenLabsAutoConfiguration}.
*
* @author Alexandros Pappas
*/
@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".*")
public class ElevenLabsAutoConfigurationIT {
private static final org.apache.commons.logging.Log logger = org.apache.commons.logging.LogFactory
.getLog(ElevenLabsAutoConfigurationIT.class);
private final ApplicationContextRunner contextRunner = new ApplicationContextRunner()
.withPropertyValues("spring.ai.elevenlabs.api-key=" + System.getenv("ELEVEN_LABS_API_KEY"))
.withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class));
@Test
void speech() {
this.contextRunner.run(context -> {
ElevenLabsTextToSpeechModel speechModel = context.getBean(ElevenLabsTextToSpeechModel.class);
byte[] response = speechModel.call("H");
assertThat(response).isNotNull();
assertThat(verifyMp3FrameHeader(response))
.withFailMessage("Expected MP3 frame header to be present in the response, but it was not found.")
.isTrue();
assertThat(response).isNotEmpty();
logger.debug("Response: " + Arrays.toString(response));
});
}
@Test
void speechStream() {
this.contextRunner.run(context -> {
ElevenLabsTextToSpeechModel speechModel = context.getBean(ElevenLabsTextToSpeechModel.class);
byte[] response = speechModel.call("Hello");
assertThat(response).isNotNull();
assertThat(verifyMp3FrameHeader(response))
.withFailMessage("Expected MP3 frame header to be present in the response, but it was not found.")
.isTrue();
assertThat(response).isNotEmpty();
logger.debug("Response: " + Arrays.toString(response));
});
}
public boolean verifyMp3FrameHeader(byte[] audioResponse) {
if (audioResponse == null || audioResponse.length < 3) {
return false;
}
// Accept ID3 tag (MP3 metadata) or MP3 frame header
boolean hasId3 = audioResponse[0] == 'I' && audioResponse[1] == 'D' && audioResponse[2] == '3';
boolean hasFrame = (audioResponse[0] & 0xFF) == 0xFF && (audioResponse[1] & 0xE0) == 0xE0;
return hasId3 || hasFrame;
}
}

View File

@@ -0,0 +1,141 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.model.elevenlabs.autoconfigure;
import static org.assertj.core.api.Assertions.assertThat;
import org.junit.jupiter.api.Test;
import org.springframework.ai.elevenlabs.ElevenLabsTextToSpeechModel;
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
import org.springframework.boot.autoconfigure.AutoConfigurations;
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
/**
* Tests for the {@link ElevenLabsSpeechProperties} and
* {@link ElevenLabsConnectionProperties}.
*
* @author Alexandros Pappas
*/
public class ElevenLabsPropertiesTests {
@Test
public void connectionProperties() {
new ApplicationContextRunner().withPropertyValues(
// @formatter:off
"spring.ai.elevenlabs.api-key=YOUR_API_KEY",
"spring.ai.elevenlabs.base-url=https://custom.api.elevenlabs.io",
"spring.ai.elevenlabs.tts.options.model-id=custom-model",
"spring.ai.elevenlabs.tts.options.voice=custom-voice",
"spring.ai.elevenlabs.tts.options.voice-settings.stability=0.6",
"spring.ai.elevenlabs.tts.options.voice-settings.similarity-boost=0.8",
"spring.ai.elevenlabs.tts.options.voice-settings.style=0.2",
"spring.ai.elevenlabs.tts.options.voice-settings.use-speaker-boost=false",
"spring.ai.elevenlabs.tts.options.voice-settings.speed=1.5"
// @formatter:on
).withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)).run(context -> {
var speechProperties = context.getBean(ElevenLabsSpeechProperties.class);
var connectionProperties = context.getBean(ElevenLabsConnectionProperties.class);
assertThat(connectionProperties.getApiKey()).isEqualTo("YOUR_API_KEY");
assertThat(connectionProperties.getBaseUrl()).isEqualTo("https://custom.api.elevenlabs.io");
assertThat(speechProperties.getOptions().getModelId()).isEqualTo("custom-model");
assertThat(speechProperties.getOptions().getVoice()).isEqualTo("custom-voice");
assertThat(speechProperties.getOptions().getVoiceSettings().stability()).isEqualTo(0.6);
assertThat(speechProperties.getOptions().getVoiceSettings().similarityBoost()).isEqualTo(0.8);
assertThat(speechProperties.getOptions().getVoiceSettings().style()).isEqualTo(0.2);
assertThat(speechProperties.getOptions().getVoiceSettings().useSpeakerBoost()).isFalse();
assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(1.5f);
// enabled is true by default
assertThat(speechProperties.isEnabled()).isTrue();
});
}
@Test
public void speechOptionsTest() {
new ApplicationContextRunner().withPropertyValues(
// @formatter:off
"spring.ai.elevenlabs.api-key=YOUR_API_KEY",
"spring.ai.elevenlabs.tts.options.model-id=custom-model",
"spring.ai.elevenlabs.tts.options.voice=custom-voice",
"spring.ai.elevenlabs.tts.options.format=pcm_44100",
"spring.ai.elevenlabs.tts.options.voice-settings.stability=0.6",
"spring.ai.elevenlabs.tts.options.voice-settings.similarity-boost=0.8",
"spring.ai.elevenlabs.tts.options.voice-settings.style=0.2",
"spring.ai.elevenlabs.tts.options.voice-settings.use-speaker-boost=false",
"spring.ai.elevenlabs.tts.options.voice-settings.speed=1.2",
"spring.ai.elevenlabs.tts.options.language-code=en",
"spring.ai.elevenlabs.tts.options.seed=12345",
"spring.ai.elevenlabs.tts.options.previous-text=previous",
"spring.ai.elevenlabs.tts.options.next-text=next",
"spring.ai.elevenlabs.tts.options.apply-text-normalization=ON",
"spring.ai.elevenlabs.tts.options.apply-language-text-normalization=true"
// @formatter:on
).withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class)).run(context -> {
var speechProperties = context.getBean(ElevenLabsSpeechProperties.class);
assertThat(speechProperties.getOptions().getModelId()).isEqualTo("custom-model");
assertThat(speechProperties.getOptions().getVoice()).isEqualTo("custom-voice");
assertThat(speechProperties.getOptions().getFormat()).isEqualTo("pcm_44100");
assertThat(speechProperties.getOptions().getVoiceSettings().stability()).isEqualTo(0.6);
assertThat(speechProperties.getOptions().getVoiceSettings().similarityBoost()).isEqualTo(0.8);
assertThat(speechProperties.getOptions().getVoiceSettings().style()).isEqualTo(0.2);
assertThat(speechProperties.getOptions().getVoiceSettings().useSpeakerBoost()).isFalse();
assertThat(speechProperties.getOptions().getVoiceSettings().speed()).isEqualTo(1.2);
assertThat(speechProperties.getOptions().getSpeed()).isEqualTo(1.2);
assertThat(speechProperties.getOptions().getLanguageCode()).isEqualTo("en");
assertThat(speechProperties.getOptions().getSeed()).isEqualTo(12345);
assertThat(speechProperties.getOptions().getPreviousText()).isEqualTo("previous");
assertThat(speechProperties.getOptions().getNextText()).isEqualTo("next");
assertThat(speechProperties.getOptions().getApplyTextNormalization())
.isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON);
assertThat(speechProperties.getOptions().getApplyLanguageTextNormalization()).isTrue();
});
}
@Test
public void speechActivation() {
// It is enabled by default
new ApplicationContextRunner().withPropertyValues("spring.ai.elevenlabs.api-key=YOUR_API_KEY")
.withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class))
.run(context -> {
assertThat(context.getBeansOfType(ElevenLabsSpeechProperties.class)).isNotEmpty();
assertThat(context.getBeansOfType(ElevenLabsTextToSpeechModel.class)).isNotEmpty();
});
// Explicitly enable the text-to-speech autoconfiguration.
new ApplicationContextRunner()
.withPropertyValues("spring.ai.elevenlabs.api-key=YOUR_API_KEY", "spring.ai.elevenlabs.tts.enabled=true")
.withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class))
.run(context -> {
assertThat(context.getBeansOfType(ElevenLabsSpeechProperties.class)).isNotEmpty();
assertThat(context.getBeansOfType(ElevenLabsTextToSpeechModel.class)).isNotEmpty();
});
// Explicitly disable the text-to-speech autoconfiguration.
new ApplicationContextRunner()
.withPropertyValues("spring.ai.elevenlabs.api-key=YOUR_API_KEY", "spring.ai.elevenlabs.tts.enabled=false")
.withConfiguration(AutoConfigurations.of(ElevenLabsAutoConfiguration.class))
.run(context -> {
assertThat(context.getBeansOfType(ElevenLabsSpeechProperties.class)).isEmpty();
assertThat(context.getBeansOfType(ElevenLabsTextToSpeechModel.class)).isEmpty();
});
}
}

View File

@@ -0,0 +1,3 @@
# Spring AI - ElevenLabs Text-to-Speech
[ElevenLabs Text-to-Speech Documentation](https://docs.spring.io/spring-ai/reference/api/audio/speech/elevenlabs-speech.html)

View File

@@ -0,0 +1,92 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-parent</artifactId>
<version>1.1.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<artifactId>spring-ai-elevenlabs</artifactId>
<packaging>jar</packaging>
<name>Spring AI Model - ElevenLabs</name>
<description>ElevenLabs Text-to-Speech model support</description>
<url>https://github.com/spring-projects/spring-ai</url>
<scm>
<url>https://github.com/spring-projects/spring-ai</url>
<connection>git://github.com/spring-projects/spring-ai.git</connection>
<developerConnection>git@github.com:spring-projects/spring-ai.git</developerConnection>
</scm>
<properties>
<!-- ElevenLabs-specific properties here, if needed -->
</properties>
<dependencies>
<!-- production dependencies -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-model</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-retry</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>io.rest-assured</groupId>
<artifactId>json-path</artifactId>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-context-support</artifactId>
</dependency>
<dependency>
<groupId>org.springframework</groupId>
<artifactId>spring-webflux</artifactId>
</dependency>
<dependency>
<groupId>org.slf4j</groupId>
<artifactId>slf4j-api</artifactId>
</dependency>
<!-- test dependencies -->
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-test</artifactId>
<version>${project.version}</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.micrometer</groupId>
<artifactId>micrometer-observation-test</artifactId>
<scope>test</scope>
</dependency>
<dependency>
<groupId>com.fasterxml.jackson.dataformat</groupId>
<artifactId>jackson-dataformat-xml</artifactId>
<version>2.11.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>io.projectreactor</groupId>
<artifactId>reactor-test</artifactId>
<scope>test</scope>
</dependency>
</dependencies>
</project>

View File

@@ -0,0 +1,219 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.elevenlabs;
import java.util.List;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;
import reactor.core.publisher.Flux;
import org.springframework.ai.audio.tts.Speech;
import org.springframework.ai.audio.tts.StreamingTextToSpeechModel;
import org.springframework.ai.audio.tts.TextToSpeechModel;
import org.springframework.ai.audio.tts.TextToSpeechPrompt;
import org.springframework.ai.audio.tts.TextToSpeechResponse;
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
import org.springframework.ai.retry.RetryUtils;
import org.springframework.retry.support.RetryTemplate;
import org.springframework.util.Assert;
import org.springframework.util.LinkedMultiValueMap;
import org.springframework.util.MultiValueMap;
/**
* Implementation of the {@link TextToSpeechModel} and {@link StreamingTextToSpeechModel}
* interfaces
*
* @author Alexandros Pappas
*/
public class ElevenLabsTextToSpeechModel implements TextToSpeechModel, StreamingTextToSpeechModel {
private final Logger logger = LoggerFactory.getLogger(getClass());
private final ElevenLabsApi elevenLabsApi;
private final RetryTemplate retryTemplate;
private final ElevenLabsTextToSpeechOptions defaultOptions;
public ElevenLabsTextToSpeechModel(ElevenLabsApi elevenLabsApi, ElevenLabsTextToSpeechOptions defaultOptions) {
this(elevenLabsApi, defaultOptions, RetryUtils.DEFAULT_RETRY_TEMPLATE);
}
public ElevenLabsTextToSpeechModel(ElevenLabsApi elevenLabsApi, ElevenLabsTextToSpeechOptions defaultOptions,
RetryTemplate retryTemplate) {
Assert.notNull(elevenLabsApi, "ElevenLabsApi must not be null");
Assert.notNull(defaultOptions, "ElevenLabsSpeechOptions must not be null");
Assert.notNull(retryTemplate, "RetryTemplate must not be null");
this.elevenLabsApi = elevenLabsApi;
this.defaultOptions = defaultOptions;
this.retryTemplate = retryTemplate;
}
public static Builder builder() {
return new Builder();
}
@Override
public TextToSpeechResponse call(TextToSpeechPrompt prompt) {
RequestContext requestContext = prepareRequest(prompt);
byte[] audioData = retryTemplate.execute(context -> {
var response = elevenLabsApi.textToSpeech(requestContext.request, requestContext.voiceId,
requestContext.queryParameters);
if (response.getBody() == null) {
logger.warn("No speech response returned for request: {}", requestContext.request);
return new byte[0];
}
return response.getBody();
});
return new TextToSpeechResponse(List.of(new Speech(audioData)));
}
@Override
public Flux<TextToSpeechResponse> stream(TextToSpeechPrompt prompt) {
RequestContext requestContext = prepareRequest(prompt);
return retryTemplate.execute(context -> elevenLabsApi
.textToSpeechStream(requestContext.request, requestContext.voiceId, requestContext.queryParameters)
.map(entity -> new TextToSpeechResponse(List.of(new Speech(entity.getBody())))));
}
private RequestContext prepareRequest(TextToSpeechPrompt prompt) {
ElevenLabsApi.SpeechRequest request = createRequest(prompt);
ElevenLabsTextToSpeechOptions options = getOptions(prompt);
String voiceId = options.getVoice();
MultiValueMap<String, String> queryParameters = buildQueryParameters(options);
return new RequestContext(request, voiceId, queryParameters);
}
private record RequestContext(ElevenLabsApi.SpeechRequest request, String voiceId,
MultiValueMap<String, String> queryParameters) {
}
private MultiValueMap<String, String> buildQueryParameters(ElevenLabsTextToSpeechOptions options) {
MultiValueMap<String, String> queryParameters = new LinkedMultiValueMap<>();
if (options.getEnableLogging() != null) {
queryParameters.add("enable_logging", options.getEnableLogging().toString());
}
if (options.getFormat() != null) {
queryParameters.add("output_format", options.getFormat());
}
return queryParameters;
}
private ElevenLabsApi.SpeechRequest createRequest(TextToSpeechPrompt prompt) {
ElevenLabsTextToSpeechOptions options = getOptions(prompt);
String voiceId = options.getVoice();
Assert.notNull(voiceId, "A voiceId must be specified in the ElevenLabsSpeechOptions.");
String text = prompt.getInstructions().getText();
Assert.hasText(text, "Prompt must contain text to convert to speech.");
return ElevenLabsApi.SpeechRequest.builder()
.text(text)
.modelId(options.getModelId())
.voiceSettings(options.getVoiceSettings())
.languageCode(options.getLanguageCode())
.pronunciationDictionaryLocators(options.getPronunciationDictionaryLocators())
.seed(options.getSeed())
.previousText(options.getPreviousText())
.nextText(options.getNextText())
.previousRequestIds(options.getPreviousRequestIds())
.nextRequestIds(options.getNextRequestIds())
.applyTextNormalization(options.getApplyTextNormalization())
.applyLanguageTextNormalization(options.getApplyLanguageTextNormalization())
.build();
}
private ElevenLabsTextToSpeechOptions getOptions(TextToSpeechPrompt prompt) {
ElevenLabsTextToSpeechOptions runtimeOptions = (prompt
.getOptions() instanceof ElevenLabsTextToSpeechOptions elevenLabsSpeechOptions) ? elevenLabsSpeechOptions
: null;
return (runtimeOptions != null) ? merge(runtimeOptions, this.defaultOptions) : this.defaultOptions;
}
private ElevenLabsTextToSpeechOptions merge(ElevenLabsTextToSpeechOptions runtimeOptions,
ElevenLabsTextToSpeechOptions defaultOptions) {
return ElevenLabsTextToSpeechOptions.builder()
.modelId(getOrDefault(runtimeOptions.getModelId(), defaultOptions.getModelId()))
.voice(getOrDefault(runtimeOptions.getVoice(), defaultOptions.getVoice()))
.voiceId(getOrDefault(runtimeOptions.getVoiceId(), defaultOptions.getVoiceId()))
.format(getOrDefault(runtimeOptions.getFormat(), defaultOptions.getFormat()))
.outputFormat(getOrDefault(runtimeOptions.getOutputFormat(), defaultOptions.getOutputFormat()))
.voiceSettings(getOrDefault(runtimeOptions.getVoiceSettings(), defaultOptions.getVoiceSettings()))
.languageCode(getOrDefault(runtimeOptions.getLanguageCode(), defaultOptions.getLanguageCode()))
.pronunciationDictionaryLocators(getOrDefault(runtimeOptions.getPronunciationDictionaryLocators(),
defaultOptions.getPronunciationDictionaryLocators()))
.seed(getOrDefault(runtimeOptions.getSeed(), defaultOptions.getSeed()))
.previousText(getOrDefault(runtimeOptions.getPreviousText(), defaultOptions.getPreviousText()))
.nextText(getOrDefault(runtimeOptions.getNextText(), defaultOptions.getNextText()))
.previousRequestIds(
getOrDefault(runtimeOptions.getPreviousRequestIds(), defaultOptions.getPreviousRequestIds()))
.nextRequestIds(getOrDefault(runtimeOptions.getNextRequestIds(), defaultOptions.getNextRequestIds()))
.applyTextNormalization(getOrDefault(runtimeOptions.getApplyTextNormalization(),
defaultOptions.getApplyTextNormalization()))
.applyLanguageTextNormalization(getOrDefault(runtimeOptions.getApplyLanguageTextNormalization(),
defaultOptions.getApplyLanguageTextNormalization()))
.build();
}
private <T> T getOrDefault(T runtimeValue, T defaultValue) {
return runtimeValue != null ? runtimeValue : defaultValue;
}
@Override
public ElevenLabsTextToSpeechOptions getDefaultOptions() {
return this.defaultOptions;
}
public static class Builder {
private ElevenLabsApi elevenLabsApi;
private RetryTemplate retryTemplate = RetryUtils.DEFAULT_RETRY_TEMPLATE;
private ElevenLabsTextToSpeechOptions defaultOptions = ElevenLabsTextToSpeechOptions.builder().build();
public Builder elevenLabsApi(ElevenLabsApi elevenLabsApi) {
this.elevenLabsApi = elevenLabsApi;
return this;
}
public Builder retryTemplate(RetryTemplate retryTemplate) {
this.retryTemplate = retryTemplate;
return this;
}
public Builder defaultOptions(ElevenLabsTextToSpeechOptions defaultOptions) {
this.defaultOptions = defaultOptions;
return this;
}
public ElevenLabsTextToSpeechModel build() {
Assert.notNull(elevenLabsApi, "ElevenLabsApi must not be null");
Assert.notNull(defaultOptions, "ElevenLabsSpeechOptions must not be null");
return new ElevenLabsTextToSpeechModel(elevenLabsApi, defaultOptions, retryTemplate);
}
}
}

View File

@@ -0,0 +1,439 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.elevenlabs;
import java.util.List;
import java.util.Objects;
import com.fasterxml.jackson.annotation.JsonIgnore;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import org.springframework.ai.audio.tts.TextToSpeechOptions;
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
/**
* Options for ElevenLabs text-to-speech.
*
* @author Alexandros Pappas
*/
@JsonInclude(JsonInclude.Include.NON_NULL)
public class ElevenLabsTextToSpeechOptions implements TextToSpeechOptions {
@JsonProperty("model_id")
private String modelId;
// Path Params
@JsonProperty("voice_id")
private String voiceId;
// End Path Params
// Query Params
@JsonProperty("enable_logging")
private Boolean enableLogging;
@JsonProperty("output_format")
private String outputFormat;
// End Query Params
@JsonProperty("voice_settings")
private ElevenLabsApi.SpeechRequest.VoiceSettings voiceSettings;
@JsonProperty("language_code")
private String languageCode;
@JsonProperty("pronunciation_dictionary_locators")
private List<ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator> pronunciationDictionaryLocators;
@JsonProperty("seed")
private Integer seed;
@JsonProperty("previous_text")
private String previousText;
@JsonProperty("next_text")
private String nextText;
@JsonProperty("previous_request_ids")
private List<String> previousRequestIds;
@JsonProperty("next_request_ids")
private List<String> nextRequestIds;
@JsonProperty("apply_text_normalization")
private ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization;
@JsonProperty("apply_language_text_normalization")
private Boolean applyLanguageTextNormalization;
public static Builder builder() {
return new ElevenLabsTextToSpeechOptions.Builder();
}
@Override
@JsonIgnore
public String getModel() {
return getModelId();
}
@JsonIgnore
public void setModel(String model) {
setModelId(model);
}
public String getModelId() {
return this.modelId;
}
public void setModelId(String modelId) {
this.modelId = modelId;
}
@Override
@JsonIgnore
public String getVoice() {
return getVoiceId();
}
@JsonIgnore
public void setVoice(String voice) {
setVoiceId(voice);
}
public String getVoiceId() {
return this.voiceId;
}
public void setVoiceId(String voiceId) {
this.voiceId = voiceId;
}
public Boolean getEnableLogging() {
return this.enableLogging;
}
public void setEnableLogging(Boolean enableLogging) {
this.enableLogging = enableLogging;
}
@Override
@JsonIgnore
public String getFormat() {
return getOutputFormat();
}
@JsonIgnore
public void setFormat(String format) {
setOutputFormat(format);
}
public String getOutputFormat() {
return this.outputFormat;
}
public void setOutputFormat(String outputFormat) {
this.outputFormat = outputFormat;
}
@Override
@JsonIgnore
public Double getSpeed() {
if (this.getVoiceSettings() != null) {
return this.getVoiceSettings().speed();
}
return null;
}
@JsonIgnore
public void setSpeed(Double speed) {
if (speed != null) {
if (this.getVoiceSettings() == null) {
this.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(null, null, null, null, speed));
}
else {
this.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(this.getVoiceSettings().stability(),
this.getVoiceSettings().similarityBoost(), this.getVoiceSettings().style(),
this.getVoiceSettings().useSpeakerBoost(), speed));
}
}
else {
if (this.getVoiceSettings() != null) {
this.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(this.getVoiceSettings().stability(),
this.getVoiceSettings().similarityBoost(), this.getVoiceSettings().style(),
this.getVoiceSettings().useSpeakerBoost(), null));
}
}
}
public ElevenLabsApi.SpeechRequest.VoiceSettings getVoiceSettings() {
return this.voiceSettings;
}
public void setVoiceSettings(ElevenLabsApi.SpeechRequest.VoiceSettings voiceSettings) {
this.voiceSettings = voiceSettings;
}
public String getLanguageCode() {
return this.languageCode;
}
public void setLanguageCode(String languageCode) {
this.languageCode = languageCode;
}
public List<ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator> getPronunciationDictionaryLocators() {
return this.pronunciationDictionaryLocators;
}
public void setPronunciationDictionaryLocators(
List<ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator> pronunciationDictionaryLocators) {
this.pronunciationDictionaryLocators = pronunciationDictionaryLocators;
}
public Integer getSeed() {
return this.seed;
}
public void setSeed(Integer seed) {
this.seed = seed;
}
public String getPreviousText() {
return this.previousText;
}
public void setPreviousText(String previousText) {
this.previousText = previousText;
}
public String getNextText() {
return this.nextText;
}
public void setNextText(String nextText) {
this.nextText = nextText;
}
public List<String> getPreviousRequestIds() {
return this.previousRequestIds;
}
public void setPreviousRequestIds(List<String> previousRequestIds) {
this.previousRequestIds = previousRequestIds;
}
public List<String> getNextRequestIds() {
return this.nextRequestIds;
}
public void setNextRequestIds(List<String> nextRequestIds) {
this.nextRequestIds = nextRequestIds;
}
public ElevenLabsApi.SpeechRequest.TextNormalizationMode getApplyTextNormalization() {
return this.applyTextNormalization;
}
public void setApplyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization) {
this.applyTextNormalization = applyTextNormalization;
}
public Boolean getApplyLanguageTextNormalization() {
return this.applyLanguageTextNormalization;
}
public void setApplyLanguageTextNormalization(Boolean applyLanguageTextNormalization) {
this.applyLanguageTextNormalization = applyLanguageTextNormalization;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (!(o instanceof ElevenLabsTextToSpeechOptions that))
return false;
return Objects.equals(modelId, that.modelId) && Objects.equals(voiceId, that.voiceId)
&& Objects.equals(outputFormat, that.outputFormat) && Objects.equals(voiceSettings, that.voiceSettings)
&& Objects.equals(languageCode, that.languageCode)
&& Objects.equals(pronunciationDictionaryLocators, that.pronunciationDictionaryLocators)
&& Objects.equals(seed, that.seed) && Objects.equals(previousText, that.previousText)
&& Objects.equals(nextText, that.nextText)
&& Objects.equals(previousRequestIds, that.previousRequestIds)
&& Objects.equals(applyTextNormalization, that.applyTextNormalization)
&& Objects.equals(nextRequestIds, that.nextRequestIds)
&& Objects.equals(applyLanguageTextNormalization, that.applyLanguageTextNormalization);
}
@Override
public int hashCode() {
return Objects.hash(modelId, voiceId, outputFormat, voiceSettings, languageCode,
pronunciationDictionaryLocators, seed, previousText, nextText, previousRequestIds, nextRequestIds,
applyTextNormalization, applyLanguageTextNormalization);
}
@Override
public String toString() {
return "ElevenLabsSpeechOptions{" + "modelId='" + modelId + '\'' + ", voiceId='" + voiceId + '\''
+ ", outputFormat='" + outputFormat + '\'' + ", voiceSettings=" + voiceSettings + ", languageCode='"
+ languageCode + '\'' + ", pronunciationDictionaryLocators=" + pronunciationDictionaryLocators
+ ", seed=" + seed + ", previousText='" + previousText + '\'' + ", nextText='" + nextText + '\''
+ ", previousRequestIds=" + previousRequestIds + ", nextRequestIds=" + nextRequestIds
+ ", applyTextNormalization=" + applyTextNormalization + ", applyLanguageTextNormalization="
+ applyLanguageTextNormalization + '}';
}
@Override
@SuppressWarnings("unchecked")
public ElevenLabsTextToSpeechOptions copy() {
return ElevenLabsTextToSpeechOptions.builder()
.modelId(this.getModelId())
.voice(this.getVoice())
.voiceId(this.getVoiceId())
.format(this.getFormat())
.outputFormat(this.getOutputFormat())
.voiceSettings(this.getVoiceSettings())
.languageCode(this.getLanguageCode())
.pronunciationDictionaryLocators(this.getPronunciationDictionaryLocators())
.seed(this.getSeed())
.previousText(this.getPreviousText())
.nextText(this.getNextText())
.previousRequestIds(this.getPreviousRequestIds())
.nextRequestIds(this.getNextRequestIds())
.applyTextNormalization(this.getApplyTextNormalization())
.applyLanguageTextNormalization(this.getApplyLanguageTextNormalization())
.build();
}
public static class Builder {
private final ElevenLabsTextToSpeechOptions options = new ElevenLabsTextToSpeechOptions();
/**
* Sets the model ID using the generic 'model' property. This is an alias for
* {@link #modelId(String)}.
* @param model The model ID to use.
* @return this builder.
*/
public Builder model(String model) {
options.setModel(model);
return this;
}
/**
* Sets the model ID using the ElevenLabs specific 'modelId' property. This is an
* alias for {@link #model(String)}.
* @param modelId The model ID to use.
* @return this builder.
*/
public Builder modelId(String modelId) {
options.setModelId(modelId);
return this;
}
/**
* Sets the voice ID using the generic 'voice' property. This is an alias for
* {@link #voiceId(String)}.
* @param voice The voice ID to use.
* @return this builder.
*/
public Builder voice(String voice) {
options.setVoice(voice);
return this;
}
/**
* Sets the voice ID using the ElevenLabs specific 'voiceId' property. This is an
* alias for {@link #voice(String)}.
* @param voiceId The voice ID to use.
* @return this builder.
*/
public Builder voiceId(String voiceId) {
options.setVoiceId(voiceId);
return this;
}
public Builder format(String format) {
options.setFormat(format);
return this;
}
public Builder outputFormat(String outputFormat) {
options.setOutputFormat(outputFormat);
return this;
}
public Builder voiceSettings(ElevenLabsApi.SpeechRequest.VoiceSettings voiceSettings) {
options.setVoiceSettings(voiceSettings);
return this;
}
public Builder languageCode(String languageCode) {
options.setLanguageCode(languageCode);
return this;
}
public Builder pronunciationDictionaryLocators(
List<ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator> pronunciationDictionaryLocators) {
options.setPronunciationDictionaryLocators(pronunciationDictionaryLocators);
return this;
}
public Builder seed(Integer seed) {
options.setSeed(seed);
return this;
}
public Builder previousText(String previousText) {
options.setPreviousText(previousText);
return this;
}
public Builder nextText(String nextText) {
options.setNextText(nextText);
return this;
}
public Builder previousRequestIds(List<String> previousRequestIds) {
options.setPreviousRequestIds(previousRequestIds);
return this;
}
public Builder nextRequestIds(List<String> nextRequestIds) {
options.setNextRequestIds(nextRequestIds);
return this;
}
public Builder applyTextNormalization(
ElevenLabsApi.SpeechRequest.TextNormalizationMode applyTextNormalization) {
options.setApplyTextNormalization(applyTextNormalization);
return this;
}
public Builder applyLanguageTextNormalization(Boolean applyLanguageTextNormalization) {
options.setApplyLanguageTextNormalization(applyLanguageTextNormalization);
return this;
}
public ElevenLabsTextToSpeechOptions build() {
return this.options;
}
}
}

View File

@@ -0,0 +1,44 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.elevenlabs.aot;
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
import org.springframework.aot.hint.MemberCategory;
import org.springframework.aot.hint.RuntimeHints;
import org.springframework.aot.hint.RuntimeHintsRegistrar;
import org.springframework.lang.NonNull;
import org.springframework.lang.Nullable;
import static org.springframework.ai.aot.AiRuntimeHints.findJsonAnnotatedClassesInPackage;
/**
* The ElevenLabsRuntimeHints class is responsible for registering runtime hints for
* ElevenLabs API classes.
*
* @author Alexandros Pappas
*/
public class ElevenLabsRuntimeHints implements RuntimeHintsRegistrar {
@Override
public void registerHints(@NonNull RuntimeHints hints, @Nullable ClassLoader classLoader) {
var mcs = MemberCategory.values();
for (var tr : findJsonAnnotatedClassesInPackage(ElevenLabsApi.class)) {
hints.reflection().registerType(tr, mcs);
}
}
}

View File

@@ -0,0 +1,391 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.elevenlabs.api;
import java.util.List;
import java.util.function.Consumer;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonValue;
import reactor.core.publisher.Flux;
import reactor.core.publisher.Mono;
import org.springframework.ai.model.ApiKey;
import org.springframework.ai.model.NoopApiKey;
import org.springframework.ai.model.SimpleApiKey;
import org.springframework.ai.retry.RetryUtils;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.util.Assert;
import org.springframework.util.LinkedMultiValueMap;
import org.springframework.util.MultiValueMap;
import org.springframework.web.client.ResponseErrorHandler;
import org.springframework.web.client.RestClient;
import org.springframework.web.reactive.function.client.WebClient;
import org.springframework.web.util.UriComponentsBuilder;
/**
* Client for the ElevenLabs Text-to-Speech API.
*
* @author Alexandros Pappas
*/
public class ElevenLabsApi {
public static final String DEFAULT_BASE_URL = "https://api.elevenlabs.io";
private final RestClient restClient;
private final WebClient webClient;
/**
* Create a new ElevenLabs API client.
* @param baseUrl The base URL for the ElevenLabs API.
* @param apiKey Your ElevenLabs API key.
* @param headers the http headers to use.
* @param restClientBuilder A builder for the Spring RestClient.
* @param webClientBuilder A builder for the Spring WebClient.
* @param responseErrorHandler A custom error handler for API responses.
*/
private ElevenLabsApi(String baseUrl, ApiKey apiKey, MultiValueMap<String, String> headers,
RestClient.Builder restClientBuilder, WebClient.Builder webClientBuilder,
ResponseErrorHandler responseErrorHandler) {
Consumer<HttpHeaders> jsonContentHeaders = h -> {
if (!(apiKey instanceof NoopApiKey)) {
h.set("xi-api-key", apiKey.getValue());
}
h.addAll(headers);
h.setContentType(MediaType.APPLICATION_JSON);
};
this.restClient = restClientBuilder.baseUrl(baseUrl)
.defaultHeaders(jsonContentHeaders)
.defaultStatusHandler(responseErrorHandler)
.build();
this.webClient = webClientBuilder.baseUrl(baseUrl).defaultHeaders(jsonContentHeaders).build();
}
public static Builder builder() {
return new Builder();
}
/**
* Convert text to speech using the specified voice and parameters.
* @param requestBody The request body containing text, model, and voice settings.
* @param voiceId The ID of the voice to use. Must not be null.
* @param queryParameters Additional query parameters for the API call.
* @return A ResponseEntity containing the generated audio as a byte array.
*/
public ResponseEntity<byte[]> textToSpeech(SpeechRequest requestBody, String voiceId,
MultiValueMap<String, String> queryParameters) {
Assert.notNull(voiceId, "voiceId must be provided. It cannot be null.");
Assert.notNull(requestBody, "requestBody can not be null.");
Assert.hasText(requestBody.text(), "requestBody.text must be provided. It cannot be null or empty.");
UriComponentsBuilder uriBuilder = UriComponentsBuilder.fromPath("/v1/text-to-speech/{voice_id}")
.queryParams(queryParameters);
return this.restClient.post()
.uri(uriBuilder.buildAndExpand(voiceId).toUriString())
.body(requestBody)
.retrieve()
.toEntity(byte[].class);
}
/**
* Convert text to speech using the specified voice and parameters, streaming the
* results.
* @param requestBody The request body containing text, model, and voice settings.
* @param voiceId The ID of the voice to use. Must not be null.
* @param queryParameters Additional query parameters for the API call.
* @return A Flux of ResponseEntity containing the generated audio chunks as byte
* arrays.
*/
public Flux<ResponseEntity<byte[]>> textToSpeechStream(SpeechRequest requestBody, String voiceId,
MultiValueMap<String, String> queryParameters) {
Assert.notNull(voiceId, "voiceId must be provided for streaming. It cannot be null.");
Assert.notNull(requestBody, "requestBody can not be null.");
Assert.hasText(requestBody.text(), "requestBody.text must be provided. It cannot be null or empty.");
UriComponentsBuilder uriBuilder = UriComponentsBuilder.fromPath("/v1/text-to-speech/{voice_id}/stream")
.queryParams(queryParameters);
return this.webClient.post()
.uri(uriBuilder.buildAndExpand(voiceId).toUriString())
.body(Mono.just(requestBody), SpeechRequest.class)
.accept(MediaType.APPLICATION_OCTET_STREAM)
.exchangeToFlux(clientResponse -> {
HttpHeaders headers = clientResponse.headers().asHttpHeaders();
return clientResponse.bodyToFlux(byte[].class)
.map(bytes -> ResponseEntity.ok().headers(headers).body(bytes));
});
}
/**
* The output format of the generated audio.
*/
public enum OutputFormat {
MP3_22050_32("mp3_22050_32"), MP3_44100_32("mp3_44100_32"), MP3_44100_64("mp3_44100_64"),
MP3_44100_96("mp3_44100_96"), MP3_44100_128("mp3_44100_128"), MP3_44100_192("mp3_44100_192"),
PCM_8000("pcm_8000"), PCM_16000("pcm_16000"), PCM_22050("pcm_22050"), PCM_24000("pcm_24000"),
PCM_44100("pcm_44100"), PCM_48000("pcm_48000"), ULAW_8000("ulaw_8000"), ALAW_8000("alaw_8000"),
OPUS_48000_32("opus_48000_32"), OPUS_48000_64("opus_48000_64"), OPUS_48000_96("opus_48000_96"),
OPUS_48000_128("opus_48000_128"), OPUS_48000_192("opus_48000_192");
private final String value;
OutputFormat(String value) {
this.value = value;
}
public String getValue() {
return this.value;
}
}
/**
* Represents a request to the ElevenLabs Text-to-Speech API.
*/
@JsonInclude(JsonInclude.Include.NON_NULL)
public record SpeechRequest(@JsonProperty("text") String text, @JsonProperty("model_id") String modelId,
@JsonProperty("language_code") String languageCode,
@JsonProperty("voice_settings") VoiceSettings voiceSettings,
@JsonProperty("pronunciation_dictionary_locators") List<PronunciationDictionaryLocator> pronunciationDictionaryLocators,
@JsonProperty("seed") Integer seed, @JsonProperty("previous_text") String previousText,
@JsonProperty("next_text") String nextText,
@JsonProperty("previous_request_ids") List<String> previousRequestIds,
@JsonProperty("next_request_ids") List<String> nextRequestIds,
@JsonProperty("apply_text_normalization") TextNormalizationMode applyTextNormalization,
@JsonProperty("apply_language_text_normalization") Boolean applyLanguageTextNormalization) {
public static Builder builder() {
return new Builder();
}
/**
* Text normalization mode.
*/
public enum TextNormalizationMode {
@JsonProperty("auto")
AUTO("auto"), @JsonProperty("on")
ON("on"), @JsonProperty("off")
OFF("off");
public final String value;
TextNormalizationMode(String value) {
this.value = value;
}
@JsonValue
public String getValue() {
return this.value;
}
}
/**
* Voice settings to override defaults for the given voice.
*/
@JsonInclude(JsonInclude.Include.NON_NULL)
public record VoiceSettings(@JsonProperty("stability") Double stability,
@JsonProperty("similarity_boost") Double similarityBoost, @JsonProperty("style") Double style,
@JsonProperty("use_speaker_boost") Boolean useSpeakerBoost, @JsonProperty("speed") Double speed) {
}
/**
* Locator for a pronunciation dictionary.
*/
@JsonInclude(JsonInclude.Include.NON_NULL)
public record PronunciationDictionaryLocator(
@JsonProperty("pronunciation_dictionary_id") String pronunciationDictionaryId,
@JsonProperty("version_id") String versionId) {
}
public static class Builder {
private String text;
private String modelId;
private String languageCode;
private VoiceSettings voiceSettings;
private List<PronunciationDictionaryLocator> pronunciationDictionaryLocators;
private Integer seed;
private String previousText;
private String nextText;
private List<String> previousRequestIds;
private List<String> nextRequestIds;
private TextNormalizationMode applyTextNormalization;
private Boolean applyLanguageTextNormalization = false;
public Builder text(String text) {
this.text = text;
return this;
}
public Builder modelId(String modelId) {
this.modelId = modelId;
return this;
}
public Builder languageCode(String languageCode) {
this.languageCode = languageCode;
return this;
}
public Builder voiceSettings(VoiceSettings voiceSettings) {
this.voiceSettings = voiceSettings;
return this;
}
public Builder pronunciationDictionaryLocators(
List<PronunciationDictionaryLocator> pronunciationDictionaryLocators) {
this.pronunciationDictionaryLocators = pronunciationDictionaryLocators;
return this;
}
public Builder seed(Integer seed) {
this.seed = seed;
return this;
}
public Builder previousText(String previousText) {
this.previousText = previousText;
return this;
}
public Builder nextText(String nextText) {
this.nextText = nextText;
return this;
}
public Builder previousRequestIds(List<String> previousRequestIds) {
this.previousRequestIds = previousRequestIds;
return this;
}
public Builder nextRequestIds(List<String> nextRequestIds) {
this.nextRequestIds = nextRequestIds;
return this;
}
public Builder applyTextNormalization(TextNormalizationMode applyTextNormalization) {
this.applyTextNormalization = applyTextNormalization;
return this;
}
public Builder applyLanguageTextNormalization(Boolean applyLanguageTextNormalization) {
this.applyLanguageTextNormalization = applyLanguageTextNormalization;
return this;
}
public SpeechRequest build() {
Assert.hasText(text, "text must not be empty");
return new SpeechRequest(text, modelId, languageCode, voiceSettings, pronunciationDictionaryLocators,
seed, previousText, nextText, previousRequestIds, nextRequestIds, applyTextNormalization,
applyLanguageTextNormalization);
}
}
}
/**
* Builder to construct {@link ElevenLabsApi} instance.
*/
public static class Builder {
private String baseUrl = DEFAULT_BASE_URL;
private ApiKey apiKey;
private MultiValueMap<String, String> headers = new LinkedMultiValueMap<>();
private RestClient.Builder restClientBuilder = RestClient.builder();
private WebClient.Builder webClientBuilder = WebClient.builder();
private ResponseErrorHandler responseErrorHandler = RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER;
public Builder baseUrl(String baseUrl) {
Assert.hasText(baseUrl, "baseUrl cannot be null or empty");
this.baseUrl = baseUrl;
return this;
}
public Builder apiKey(ApiKey apiKey) {
Assert.notNull(apiKey, "apiKey cannot be null");
this.apiKey = apiKey;
return this;
}
public Builder apiKey(String simpleApiKey) {
Assert.notNull(simpleApiKey, "simpleApiKey cannot be null");
this.apiKey = new SimpleApiKey(simpleApiKey);
return this;
}
public Builder headers(MultiValueMap<String, String> headers) {
Assert.notNull(headers, "headers cannot be null");
this.headers = headers;
return this;
}
public Builder restClientBuilder(RestClient.Builder restClientBuilder) {
Assert.notNull(restClientBuilder, "restClientBuilder cannot be null");
this.restClientBuilder = restClientBuilder;
return this;
}
public Builder webClientBuilder(WebClient.Builder webClientBuilder) {
Assert.notNull(webClientBuilder, "webClientBuilder cannot be null");
this.webClientBuilder = webClientBuilder;
return this;
}
public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler) {
Assert.notNull(responseErrorHandler, "responseErrorHandler cannot be null");
this.responseErrorHandler = responseErrorHandler;
return this;
}
public ElevenLabsApi build() {
Assert.notNull(this.apiKey, "apiKey must be set");
return new ElevenLabsApi(this.baseUrl, this.apiKey, this.headers, this.restClientBuilder,
this.webClientBuilder, this.responseErrorHandler);
}
}
}

View File

@@ -0,0 +1,452 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.elevenlabs.api;
import java.util.List;
import java.util.Map;
import java.util.function.Consumer;
import com.fasterxml.jackson.annotation.JsonInclude;
import com.fasterxml.jackson.annotation.JsonProperty;
import com.fasterxml.jackson.annotation.JsonValue;
import org.springframework.ai.model.ApiKey;
import org.springframework.ai.model.NoopApiKey;
import org.springframework.ai.model.SimpleApiKey;
import org.springframework.ai.retry.RetryUtils;
import org.springframework.http.HttpHeaders;
import org.springframework.http.MediaType;
import org.springframework.http.ResponseEntity;
import org.springframework.util.Assert;
import org.springframework.util.LinkedMultiValueMap;
import org.springframework.util.MultiValueMap;
import org.springframework.web.client.ResponseErrorHandler;
import org.springframework.web.client.RestClient;
/**
* Client for the ElevenLabs Voices API.
*
* @author Alexandros Pappas
*/
public class ElevenLabsVoicesApi {
private static final String DEFAULT_BASE_URL = "https://api.elevenlabs.io";
private final RestClient restClient;
/**
* Create a new ElevenLabs Voices API client.
* @param baseUrl The base URL for the ElevenLabs API.
* @param apiKey Your ElevenLabs API key.
* @param headers the http headers to use.
* @param restClientBuilder A builder for the Spring RestClient.
* @param responseErrorHandler A custom error handler for API responses.
*/
public ElevenLabsVoicesApi(String baseUrl, ApiKey apiKey, MultiValueMap<String, String> headers,
RestClient.Builder restClientBuilder, ResponseErrorHandler responseErrorHandler) {
Consumer<HttpHeaders> jsonContentHeaders = h -> {
if (!(apiKey instanceof NoopApiKey)) {
h.set("xi-api-key", apiKey.getValue());
}
h.addAll(headers);
h.setContentType(MediaType.APPLICATION_JSON);
};
this.restClient = restClientBuilder.baseUrl(baseUrl)
.defaultHeaders(jsonContentHeaders)
.defaultStatusHandler(responseErrorHandler)
.build();
}
public static Builder builder() {
return new Builder();
}
/**
* Retrieves a list of all available voices from the ElevenLabs API.
* @return A ResponseEntity containing a Voices object, which contains the list of
* voices.
*/
public ResponseEntity<Voices> getVoices() {
return this.restClient.get().uri("/v1/voices").retrieve().toEntity(Voices.class);
}
/**
* Gets the default settings for voices. "similarity_boost" corresponds to ”Clarity +
* Similarity Enhancement” in the web app and "stability" corresponds to "Stability"
* slider in the web app.
* @return {@link ResponseEntity} containing the {@link VoiceSettings} record.
*/
public ResponseEntity<VoiceSettings> getDefaultVoiceSettings() {
return this.restClient.get().uri("/v1/voices/settings/default").retrieve().toEntity(VoiceSettings.class);
}
/**
* Returns the settings for a specific voice. "similarity_boost" corresponds to
* "Clarity + Similarity Enhancement" in the web app and "stability" corresponds to
* the "Stability" slider in the web app.
* @param voiceId The ID of the voice to get settings for. Required.
* @return {@link ResponseEntity} containing the {@link VoiceSettings} record.
*/
public ResponseEntity<VoiceSettings> getVoiceSettings(String voiceId) {
Assert.hasText(voiceId, "voiceId cannot be null or empty");
return this.restClient.get()
.uri("/v1/voices/{voiceId}/settings", voiceId)
.retrieve()
.toEntity(VoiceSettings.class);
}
/**
* Returns metadata about a specific voice.
* @param voiceId ID of the voice to be used. You can use the Get voices endpoint list
* all the available voices. Required.
* @return {@link ResponseEntity} containing the {@link Voice} record.
*/
public ResponseEntity<Voice> getVoice(String voiceId) {
Assert.hasText(voiceId, "voiceId cannot be null or empty");
return this.restClient.get().uri("/v1/voices/{voiceId}", voiceId).retrieve().toEntity(Voice.class);
}
public enum CategoryEnum {
@JsonProperty("generated")
GENERATED("generated"), @JsonProperty("cloned")
CLONED("cloned"), @JsonProperty("premade")
PREMADE("premade"), @JsonProperty("professional")
PROFESSIONAL("professional"), @JsonProperty("famous")
FAMOUS("famous"), @JsonProperty("high_quality")
HIGH_QUALITY("high_quality");
public final String value;
CategoryEnum(String value) {
this.value = value;
}
@JsonValue
public String getValue() {
return this.value;
}
}
public enum SafetyControlEnum {
@JsonProperty("NONE")
NONE("NONE"), @JsonProperty("BAN")
BAN("BAN"), @JsonProperty("CAPTCHA")
CAPTCHA("CAPTCHA"), @JsonProperty("CAPTCHA_AND_MODERATION")
CAPTCHA_AND_MODERATION("CAPTCHA_AND_MODERATION"), @JsonProperty("ENTERPRISE_BAN")
ENTERPRISE_BAN("ENTERPRISE_BAN"), @JsonProperty("ENTERPRISE_CAPTCHA")
ENTERPRISE_CAPTCHA("ENTERPRISE_CAPTCHA");
public final String value;
SafetyControlEnum(String value) {
this.value = value;
}
@JsonValue
public String getValue() {
return this.value;
}
}
/**
* Represents the response from the /v1/voices endpoint.
*
* @param voices A list of Voice objects representing the available voices.
*/
@JsonInclude(JsonInclude.Include.NON_NULL)
public record Voices(@JsonProperty("voices") List<Voice> voices) {
}
/**
* Represents a single voice from the ElevenLabs API.
*/
@JsonInclude(JsonInclude.Include.NON_NULL)
public record Voice(@JsonProperty("voice_id") String voiceId, @JsonProperty("name") String name,
@JsonProperty("samples") List<Sample> samples, @JsonProperty("category") CategoryEnum category,
@JsonProperty("fine_tuning") FineTuning fineTuning, @JsonProperty("labels") Map<String, String> labels,
@JsonProperty("description") String description, @JsonProperty("preview_url") String previewUrl,
@JsonProperty("available_for_tiers") List<String> availableForTiers,
@JsonProperty("settings") VoiceSettings settings, @JsonProperty("sharing") VoiceSharing sharing,
@JsonProperty("high_quality_base_model_ids") List<String> highQualityBaseModelIds,
@JsonProperty("verified_languages") List<VerifiedVoiceLanguage> verifiedLanguages,
@JsonProperty("safety_control") SafetyControlEnum safetyControl,
@JsonProperty("voice_verification") VoiceVerification voiceVerification,
@JsonProperty("permission_on_resource") String permissionOnResource,
@JsonProperty("is_owner") Boolean isOwner, @JsonProperty("is_legacy") Boolean isLegacy,
@JsonProperty("is_mixed") Boolean isMixed, @JsonProperty("created_at_unix") Integer createdAtUnix) {
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public record Sample(@JsonProperty("sample_id") String sampleId, @JsonProperty("file_name") String fileName,
@JsonProperty("mime_type") String mimeType, @JsonProperty("size_bytes") Integer sizeBytes,
@JsonProperty("hash") String hash) {
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public record FineTuning(@JsonProperty("is_allowed_to_fine_tune") Boolean isAllowedToFineTune,
@JsonProperty("state") Map<String, String> state,
@JsonProperty("verification_failures") List<String> verificationFailures,
@JsonProperty("verification_attempts_count") Integer verificationAttemptsCount,
@JsonProperty("manual_verification_requested") Boolean manualVerificationRequested,
@JsonProperty("language") String language, @JsonProperty("progress") Map<String, Double> progress,
@JsonProperty("message") Map<String, String> message,
@JsonProperty("dataset_duration_seconds") Double datasetDurationSeconds,
@JsonProperty("verification_attempts") List<VerificationAttempt> verificationAttempts,
@JsonProperty("slice_ids") List<String> sliceIds,
@JsonProperty("manual_verification") ManualVerification manualVerification,
@JsonProperty("max_verification_attempts") Integer maxVerificationAttempts,
@JsonProperty("next_max_verification_attempts_reset_unix_ms") Long nextMaxVerificationAttemptsResetUnixMs) {
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public record VoiceVerification(@JsonProperty("requires_verification") Boolean requiresVerification,
@JsonProperty("is_verified") Boolean isVerified,
@JsonProperty("verification_failures") List<String> verificationFailures,
@JsonProperty("verification_attempts_count") Integer verificationAttemptsCount,
@JsonProperty("language") String language,
@JsonProperty("verification_attempts") List<VerificationAttempt> verificationAttempts) {
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public record VerificationAttempt(@JsonProperty("text") String text, @JsonProperty("date_unix") Integer dateUnix,
@JsonProperty("accepted") Boolean accepted, @JsonProperty("similarity") Double similarity,
@JsonProperty("levenshtein_distance") Double levenshteinDistance,
@JsonProperty("recording") Recording recording) {
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public record Recording(@JsonProperty("recording_id") String recordingId,
@JsonProperty("mime_type") String mimeType, @JsonProperty("size_bytes") Integer sizeBytes,
@JsonProperty("upload_date_unix") Integer uploadDateUnix,
@JsonProperty("transcription") String transcription) {
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public record ManualVerification(@JsonProperty("extra_text") String extraText,
@JsonProperty("request_time_unix") Integer requestTimeUnix,
@JsonProperty("files") List<ManualVerificationFile> files) {
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public record ManualVerificationFile(@JsonProperty("file_id") String fileId,
@JsonProperty("file_name") String fileName, @JsonProperty("mime_type") String mimeType,
@JsonProperty("size_bytes") Integer sizeBytes, @JsonProperty("upload_date_unix") Integer uploadDateUnix) {
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public record VoiceSettings(@JsonProperty("stability") Double stability,
@JsonProperty("similarity_boost") Double similarityBoost, @JsonProperty("style") Double style,
@JsonProperty("use_speaker_boost") Boolean useSpeakerBoost, @JsonProperty("speed") Double speed) {
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public record VoiceSharing(@JsonProperty("status") StatusEnum status,
@JsonProperty("history_item_sample_id") String historyItemSampleId,
@JsonProperty("date_unix") Integer dateUnix,
@JsonProperty("whitelisted_emails") List<String> whitelistedEmails,
@JsonProperty("public_owner_id") String publicOwnerId,
@JsonProperty("original_voice_id") String originalVoiceId,
@JsonProperty("financial_rewards_enabled") Boolean financialRewardsEnabled,
@JsonProperty("free_users_allowed") Boolean freeUsersAllowed,
@JsonProperty("live_moderation_enabled") Boolean liveModerationEnabled, @JsonProperty("rate") Double rate,
@JsonProperty("notice_period") Integer noticePeriod, @JsonProperty("disable_at_unix") Integer disableAtUnix,
@JsonProperty("voice_mixing_allowed") Boolean voiceMixingAllowed,
@JsonProperty("featured") Boolean featured, @JsonProperty("category") CategoryEnum category,
@JsonProperty("reader_app_enabled") Boolean readerAppEnabled, @JsonProperty("image_url") String imageUrl,
@JsonProperty("ban_reason") String banReason, @JsonProperty("liked_by_count") Integer likedByCount,
@JsonProperty("cloned_by_count") Integer clonedByCount, @JsonProperty("name") String name,
@JsonProperty("description") String description, @JsonProperty("labels") Map<String, String> labels,
@JsonProperty("review_status") ReviewStatusEnum reviewStatus,
@JsonProperty("review_message") String reviewMessage,
@JsonProperty("enabled_in_library") Boolean enabledInLibrary,
@JsonProperty("instagram_username") String instagramUsername,
@JsonProperty("twitter_username") String twitterUsername,
@JsonProperty("youtube_username") String youtubeUsername,
@JsonProperty("tiktok_username") String tiktokUsername,
@JsonProperty("moderation_check") VoiceSharingModerationCheck moderationCheck,
@JsonProperty("reader_restricted_on") List<ReaderResource> readerRestrictedOn) {
public enum StatusEnum {
@JsonProperty("enabled")
ENABLED("enabled"), @JsonProperty("disabled")
DISABLED("disabled"), @JsonProperty("copied")
COPIED("copied"), @JsonProperty("copied_disabled")
COPIED_DISABLED("copied_disabled");
public final String value;
StatusEnum(String value) {
this.value = value;
}
@JsonValue
public String getValue() {
return this.value;
}
}
public enum CategoryEnum {
@JsonProperty("generated")
GENERATED("generated"), @JsonProperty("professional")
PROFESSIONAL("professional"), @JsonProperty("high_quality")
HIGH_QUALITY("high_quality"), @JsonProperty("famous")
FAMOUS("famous");
public final String value;
CategoryEnum(String value) {
this.value = value;
}
@JsonValue
public String getValue() {
return this.value;
}
}
public enum ReviewStatusEnum {
@JsonProperty("not_requested")
NOT_REQUESTED("not_requested"), @JsonProperty("pending")
PENDING("pending"), @JsonProperty("declined")
DECLINED("declined"), @JsonProperty("allowed")
ALLOWED("allowed"), @JsonProperty("allowed_with_changes")
ALLOWED_WITH_CHANGES("allowed_with_changes");
public final String value;
ReviewStatusEnum(String value) {
this.value = value;
}
@JsonValue
public String getValue() {
return this.value;
}
}
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public record VoiceSharingModerationCheck(@JsonProperty("date_checked_unix") Integer dateCheckedUnix,
@JsonProperty("name_value") String nameValue, @JsonProperty("name_check") Boolean nameCheck,
@JsonProperty("description_value") String descriptionValue,
@JsonProperty("description_check") Boolean descriptionCheck,
@JsonProperty("sample_ids") List<String> sampleIds,
@JsonProperty("sample_checks") List<Double> sampleChecks,
@JsonProperty("captcha_ids") List<String> captchaIds,
@JsonProperty("captcha_checks") List<Double> captchaChecks) {
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public record ReaderResource(@JsonProperty("resource_type") ResourceTypeEnum resourceType,
@JsonProperty("resource_id") String resourceId) {
public enum ResourceTypeEnum {
@JsonProperty("read")
READ("read"), @JsonProperty("collection")
COLLECTION("collection");
public final String value;
ResourceTypeEnum(String value) {
this.value = value;
}
@JsonValue
public String getValue() {
return this.value;
}
}
}
@JsonInclude(JsonInclude.Include.NON_NULL)
public record VerifiedVoiceLanguage(@JsonProperty("language") String language,
@JsonProperty("model_id") String modelId, @JsonProperty("accent") String accent) {
}
/**
* Builder to construct {@link ElevenLabsVoicesApi} instance.
*/
public static class Builder {
private String baseUrl = DEFAULT_BASE_URL;
private ApiKey apiKey;
private MultiValueMap<String, String> headers = new LinkedMultiValueMap<>();
private RestClient.Builder restClientBuilder = RestClient.builder();
private ResponseErrorHandler responseErrorHandler = RetryUtils.DEFAULT_RESPONSE_ERROR_HANDLER;
public Builder baseUrl(String baseUrl) {
Assert.hasText(baseUrl, "baseUrl cannot be null or empty");
this.baseUrl = baseUrl;
return this;
}
public Builder apiKey(ApiKey apiKey) {
Assert.notNull(apiKey, "apiKey cannot be null");
this.apiKey = apiKey;
return this;
}
public Builder apiKey(String simpleApiKey) {
Assert.notNull(simpleApiKey, "simpleApiKey cannot be null");
this.apiKey = new SimpleApiKey(simpleApiKey);
return this;
}
public Builder headers(MultiValueMap<String, String> headers) {
Assert.notNull(headers, "headers cannot be null");
this.headers = headers;
return this;
}
public Builder restClientBuilder(RestClient.Builder restClientBuilder) {
Assert.notNull(restClientBuilder, "restClientBuilder cannot be null");
this.restClientBuilder = restClientBuilder;
return this;
}
public Builder responseErrorHandler(ResponseErrorHandler responseErrorHandler) {
Assert.notNull(responseErrorHandler, "responseErrorHandler cannot be null");
this.responseErrorHandler = responseErrorHandler;
return this;
}
public ElevenLabsVoicesApi build() {
Assert.notNull(this.apiKey, "apiKey must be set");
return new ElevenLabsVoicesApi(this.baseUrl, this.apiKey, this.headers, this.restClientBuilder,
this.responseErrorHandler);
}
}
}

View File

@@ -0,0 +1,2 @@
org.springframework.aot.hint.RuntimeHintsRegistrar=\
org.springframework.ai.elevenlabs.aot.ElevenLabsRuntimeHints

View File

@@ -0,0 +1,58 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.elevenlabs;
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
import org.springframework.ai.elevenlabs.api.ElevenLabsVoicesApi;
import org.springframework.ai.model.SimpleApiKey;
import org.springframework.boot.SpringBootConfiguration;
import org.springframework.context.annotation.Bean;
import org.springframework.util.StringUtils;
/**
* Configuration class for the ElevenLabs API.
*
* @author Alexandros Pappas
*/
@SpringBootConfiguration
public class ElevenLabsTestConfiguration {
@Bean
public ElevenLabsApi elevenLabsApi() {
return ElevenLabsApi.builder().apiKey(getApiKey()).build();
}
@Bean
public ElevenLabsVoicesApi elevenLabsVoicesApi() {
return ElevenLabsVoicesApi.builder().apiKey(getApiKey()).build();
}
private SimpleApiKey getApiKey() {
String apiKey = System.getenv("ELEVEN_LABS_API_KEY");
if (!StringUtils.hasText(apiKey)) {
throw new IllegalArgumentException(
"You must provide an API key. Put it in an environment variable under the name ELEVEN_LABS_API_KEY");
}
return new SimpleApiKey(apiKey);
}
@Bean
public ElevenLabsTextToSpeechModel elevenLabsSpeechModel() {
return ElevenLabsTextToSpeechModel.builder().elevenLabsApi(elevenLabsApi()).build();
}
}

View File

@@ -0,0 +1,110 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.elevenlabs;
import java.util.List;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
import reactor.core.publisher.Flux;
import org.springframework.ai.audio.tts.Speech;
import org.springframework.ai.audio.tts.TextToSpeechPrompt;
import org.springframework.ai.audio.tts.TextToSpeechResponse;
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.ai.retry.NonTransientAiException;
import org.springframework.web.client.HttpClientErrorException;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.assertThatThrownBy;
/**
* Integration tests for the {@link ElevenLabsTextToSpeechModel}.
*
* <p>
* These tests require a valid ElevenLabs API key to be set as an environment variable
* named {@code ELEVEN_LABS_API_KEY}.
*
* @author Alexandros Pappas
*/
@SpringBootTest(classes = ElevenLabsTestConfiguration.class)
@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".+")
public class ElevenLabsTextToSpeechModelIT {
private static final String VOICE_ID = "9BWtsMINqrJLrRacOk9x";
@Autowired
private ElevenLabsTextToSpeechModel textToSpeechModel;
@Test
void textToSpeechWithVoiceTest() {
ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder().voice(VOICE_ID).build();
TextToSpeechPrompt prompt = new TextToSpeechPrompt("Hello, world!", options);
TextToSpeechResponse response = textToSpeechModel.call(prompt);
assertThat(response).isNotNull();
List<Speech> results = response.getResults();
assertThat(results).hasSize(1);
Speech speech = results.get(0);
assertThat(speech.getOutput()).isNotEmpty();
}
@Test
void textToSpeechStreamWithVoiceTest() {
ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder().voice(VOICE_ID).build();
TextToSpeechPrompt prompt = new TextToSpeechPrompt(
"Hello, world! This is a test of streaming speech synthesis.", options);
Flux<TextToSpeechResponse> responseFlux = textToSpeechModel.stream(prompt);
List<TextToSpeechResponse> responses = responseFlux.collectList().block();
assertThat(responses).isNotNull().isNotEmpty();
responses.forEach(response -> {
assertThat(response).isNotNull();
assertThat(response.getResults()).hasSize(1);
assertThat(response.getResults().get(0).getOutput()).isNotEmpty();
});
}
@Test
void invalidVoiceId() {
ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder()
.model("eleven_turbo_v2_5")
.voiceId("invalid-voice-id")
.outputFormat(ElevenLabsApi.OutputFormat.MP3_44100_128.getValue())
.build();
TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Hello, this is a text-to-speech example.", options);
assertThatThrownBy(() -> {
textToSpeechModel.call(speechPrompt);
}).isInstanceOf(NonTransientAiException.class)
.hasMessageContaining("An invalid ID has been received: 'invalid-voice-id'");
}
@Test
void emptyInputText() {
TextToSpeechPrompt prompt = new TextToSpeechPrompt("");
assertThatThrownBy(() -> {
textToSpeechModel.call(prompt);
}).isInstanceOf(IllegalArgumentException.class)
.hasMessageContaining("A voiceId must be specified in the ElevenLabsSpeechOptions.");
}
}

View File

@@ -0,0 +1,232 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.elevenlabs;
import java.util.List;
import org.junit.jupiter.api.Test;
import org.springframework.ai.elevenlabs.api.ElevenLabsApi;
import static org.assertj.core.api.Assertions.assertThat;
/**
* Tests for the {@link ElevenLabsTextToSpeechOptions}.
*
* <p>
* These tests require a valid ElevenLabs API key to be set as an environment variable
* named {@code ELEVEN_LABS_API_KEY}.
*
* @author Alexandros Pappas
*/
public class ElevenLabsTextToSpeechOptionsTests {
@Test
public void testBuilderWithAllFields() {
ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder()
.modelId("test-model")
.voice("test-voice")
.voiceId("test-voice-id") // Test both voice and voiceId
.format("mp3_44100_128")
.outputFormat("mp3_44100_128")
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.8, 0.9, true, 1.2))
.languageCode("en")
.pronunciationDictionaryLocators(
List.of(new ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator("dict1", "v1")))
.seed(12345)
.previousText("previous")
.nextText("next")
.previousRequestIds(List.of("req1", "req2"))
.nextRequestIds(List.of("req3", "req4"))
.applyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON)
.applyLanguageTextNormalization(true)
.build();
assertThat(options.getModelId()).isEqualTo("test-model");
assertThat(options.getVoice()).isEqualTo("test-voice-id");
assertThat(options.getVoiceId()).isEqualTo("test-voice-id");
assertThat(options.getFormat()).isEqualTo("mp3_44100_128");
assertThat(options.getOutputFormat()).isEqualTo("mp3_44100_128");
assertThat(options.getVoiceSettings()).isNotNull();
assertThat(options.getVoiceSettings().stability()).isEqualTo(0.5);
assertThat(options.getVoiceSettings().similarityBoost()).isEqualTo(0.8);
assertThat(options.getVoiceSettings().style()).isEqualTo(0.9);
assertThat(options.getVoiceSettings().useSpeakerBoost()).isTrue();
assertThat(options.getSpeed()).isEqualTo(1.2); // Check via getter
assertThat(options.getLanguageCode()).isEqualTo("en");
assertThat(options.getPronunciationDictionaryLocators()).hasSize(1);
assertThat(options.getPronunciationDictionaryLocators().get(0).pronunciationDictionaryId()).isEqualTo("dict1");
assertThat(options.getPronunciationDictionaryLocators().get(0).versionId()).isEqualTo("v1");
assertThat(options.getSeed()).isEqualTo(12345);
assertThat(options.getPreviousText()).isEqualTo("previous");
assertThat(options.getNextText()).isEqualTo("next");
assertThat(options.getPreviousRequestIds()).containsExactly("req1", "req2");
assertThat(options.getNextRequestIds()).containsExactly("req3", "req4");
assertThat(options.getApplyTextNormalization()).isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON);
assertThat(options.getApplyLanguageTextNormalization()).isTrue();
}
@Test
public void testCopy() {
ElevenLabsTextToSpeechOptions original = ElevenLabsTextToSpeechOptions.builder()
.modelId("test-model")
.voice("test-voice")
.format("mp3_44100_128")
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.8, null, null, null))
.build();
ElevenLabsTextToSpeechOptions copied = original.copy();
assertThat(copied).isNotSameAs(original).isEqualTo(original);
copied = ElevenLabsTextToSpeechOptions.builder().modelId("new-model").build();
assertThat(original.getModelId()).isEqualTo("test-model");
assertThat(copied.getModelId()).isEqualTo("new-model");
}
@Test
public void testSetters() {
ElevenLabsTextToSpeechOptions options = new ElevenLabsTextToSpeechOptions();
options.setModelId("test-model");
options.setVoice("test-voice");
options.setVoiceId("test-voice-id");
options.setOutputFormat("mp3_44100_128");
options.setFormat("mp3_44100_128");
options.setVoiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.8, null, null, null));
options.setLanguageCode("en");
options.setPronunciationDictionaryLocators(
List.of(new ElevenLabsApi.SpeechRequest.PronunciationDictionaryLocator("dict1", "v1")));
options.setSeed(12345);
options.setPreviousText("previous");
options.setNextText("next");
options.setPreviousRequestIds(List.of("req1", "req2"));
options.setNextRequestIds(List.of("req3", "req4"));
options.setApplyTextNormalization(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON);
options.setApplyLanguageTextNormalization(true);
assertThat(options.getModelId()).isEqualTo("test-model");
assertThat(options.getVoice()).isEqualTo("test-voice-id");
assertThat(options.getVoiceId()).isEqualTo("test-voice-id");
assertThat(options.getFormat()).isEqualTo("mp3_44100_128");
assertThat(options.getOutputFormat()).isEqualTo("mp3_44100_128");
assertThat(options.getVoiceSettings()).isNotNull();
assertThat(options.getVoiceSettings().stability()).isEqualTo(0.5);
assertThat(options.getVoiceSettings().similarityBoost()).isEqualTo(0.8);
assertThat(options.getLanguageCode()).isEqualTo("en");
assertThat(options.getPronunciationDictionaryLocators()).hasSize(1);
assertThat(options.getPronunciationDictionaryLocators().get(0).pronunciationDictionaryId()).isEqualTo("dict1");
assertThat(options.getPronunciationDictionaryLocators().get(0).versionId()).isEqualTo("v1");
assertThat(options.getSeed()).isEqualTo(12345);
assertThat(options.getPreviousText()).isEqualTo("previous");
assertThat(options.getNextText()).isEqualTo("next");
assertThat(options.getPreviousRequestIds()).containsExactly("req1", "req2");
assertThat(options.getNextRequestIds()).containsExactly("req3", "req4");
assertThat(options.getApplyTextNormalization()).isEqualTo(ElevenLabsApi.SpeechRequest.TextNormalizationMode.ON);
assertThat(options.getApplyLanguageTextNormalization()).isTrue();
}
@Test
public void testDefaultValues() {
ElevenLabsTextToSpeechOptions options = new ElevenLabsTextToSpeechOptions();
assertThat(options.getModelId()).isNull();
assertThat(options.getVoice()).isNull();
assertThat(options.getVoiceId()).isNull();
assertThat(options.getFormat()).isNull();
assertThat(options.getOutputFormat()).isNull();
assertThat(options.getSpeed()).isNull();
assertThat(options.getVoiceSettings()).isNull();
assertThat(options.getLanguageCode()).isNull();
assertThat(options.getPronunciationDictionaryLocators()).isNull();
assertThat(options.getSeed()).isNull();
assertThat(options.getPreviousText()).isNull();
assertThat(options.getNextText()).isNull();
assertThat(options.getPreviousRequestIds()).isNull();
assertThat(options.getNextRequestIds()).isNull();
assertThat(options.getApplyTextNormalization()).isNull();
assertThat(options.getApplyLanguageTextNormalization()).isNull();
}
@Test
public void testSetSpeed() {
// 1. Setting speed via voiceSettings, no existing voiceSettings
ElevenLabsTextToSpeechOptions options = ElevenLabsTextToSpeechOptions.builder()
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(null, null, null, null, 1.5))
.build();
assertThat(options.getSpeed()).isEqualTo(1.5);
assertThat(options.getVoiceSettings()).isNotNull();
assertThat(options.getVoiceSettings().speed()).isEqualTo(1.5);
// 2. Setting speed via voiceSettings, existing voiceSettings
ElevenLabsTextToSpeechOptions options2 = ElevenLabsTextToSpeechOptions.builder()
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, null))
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 2.0)) // Overwrite
.build();
assertThat(options2.getSpeed()).isEqualTo(2.0f);
assertThat(options2.getVoiceSettings().speed()).isEqualTo(2.0f);
assertThat(options2.getVoiceSettings().stability()).isEqualTo(0.1);
// 3. Setting voiceSettings with null speed, existing voiceSettings
ElevenLabsTextToSpeechOptions options3 = ElevenLabsTextToSpeechOptions.builder()
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 2.0))
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, null)) // Overwrite
.build();
assertThat(options3.getSpeed()).isNull();
assertThat(options3.getVoiceSettings().speed()).isNull();
assertThat(options3.getVoiceSettings().stability()).isEqualTo(0.1);
// 4. Setting voiceSettings to null, no existing voiceSettings (shouldn't create
// voiceSettings)
ElevenLabsTextToSpeechOptions options4 = ElevenLabsTextToSpeechOptions.builder().build();
assertThat(options4.getSpeed()).isNull();
assertThat(options4.getVoiceSettings()).isNull();
// 5. Setting voiceSettings directly, with speed.
ElevenLabsTextToSpeechOptions options5 = ElevenLabsTextToSpeechOptions.builder()
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 2.5))
.build();
assertThat(options5.getSpeed()).isEqualTo(2.5f);
assertThat(options5.getVoiceSettings().speed()).isEqualTo(2.5f);
// 6. Setting voiceSettings directly, without speed (speed should be null).
ElevenLabsTextToSpeechOptions options6 = ElevenLabsTextToSpeechOptions.builder()
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, null))
.build();
assertThat(options6.getSpeed()).isNull();
assertThat(options6.getVoiceSettings().speed()).isNull();
// 7. Setting voiceSettings to null, after previously setting it.
ElevenLabsTextToSpeechOptions options7 = ElevenLabsTextToSpeechOptions.builder()
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.1, 0.2, 0.3, true, 1.5))
.voiceSettings(null)
.build();
assertThat(options7.getSpeed()).isNull();
assertThat(options7.getVoiceSettings()).isNull();
// 8. Setting speed via setSpeed method
ElevenLabsTextToSpeechOptions options8 = ElevenLabsTextToSpeechOptions.builder().build();
options8.setSpeed(3.0);
assertThat(options8.getSpeed()).isEqualTo(3.0);
assertThat(options8.getVoiceSettings()).isNotNull();
assertThat(options8.getVoiceSettings().speed()).isEqualTo(3.0);
// 9. Setting speed to null via setSpeed method
options8.setSpeed(null);
assertThat(options8.getSpeed()).isNull();
assertThat(options8.getVoiceSettings().speed()).isNull();
}
}

View File

@@ -0,0 +1,224 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.elevenlabs.api;
import java.io.IOException;
import java.util.concurrent.atomic.AtomicInteger;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
import reactor.core.publisher.Flux;
import reactor.test.StepVerifier;
import org.springframework.ai.elevenlabs.ElevenLabsTestConfiguration;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.http.ResponseEntity;
import org.springframework.util.LinkedMultiValueMap;
import org.springframework.util.MultiValueMap;
import static org.assertj.core.api.Assertions.assertThat;
import static org.junit.jupiter.api.Assertions.assertThrows;
/**
* Integration tests for the {@link ElevenLabsApi}.
*
* <p>
* These tests require a valid ElevenLabs API key to be set as an environment variable
* named {@code ELEVEN_LABS_API_KEY}.
*
* @author Alexandros Pappas
*/
@SpringBootTest(classes = ElevenLabsTestConfiguration.class)
@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".+")
public class ElevenLabsApiIT {
@Autowired
private ElevenLabsApi elevenLabsApi;
@Test
public void testTextToSpeech() throws IOException {
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
.text("Hello, world!")
.modelId("eleven_turbo_v2_5")
.build();
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
ResponseEntity<byte[]> response = elevenLabsApi.textToSpeech(request, validVoiceId, null);
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
assertThat(response.getBody()).isNotNull().isNotEmpty();
}
@Test
public void testTextToSpeechWithVoiceSettings() {
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
.text("Hello, with Voice settings!")
.modelId("eleven_turbo_v2_5")
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.7, 0.0, true, 1.0))
.build();
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
ResponseEntity<byte[]> response = elevenLabsApi.textToSpeech(request, validVoiceId, null);
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
assertThat(response.getBody()).isNotNull().isNotEmpty();
}
@Test
public void testTextToSpeechWithQueryParams() {
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
.text("Hello, testing query params!")
.modelId("eleven_turbo_v2_5")
.build();
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
MultiValueMap<String, String> queryParams = new LinkedMultiValueMap<>();
queryParams.add("optimize_streaming_latency", "2");
queryParams.add("enable_logging", "true");
queryParams.add("output_format", ElevenLabsApi.OutputFormat.MP3_22050_32.getValue());
ResponseEntity<byte[]> response = elevenLabsApi.textToSpeech(request, validVoiceId, queryParams);
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
assertThat(response.getBody()).isNotNull().isNotEmpty();
}
@Test
public void testTextToSpeechVoiceIdNull() {
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
.text("This should fail.")
.modelId("eleven_turbo_v2_5")
.build();
Exception exception = assertThrows(IllegalArgumentException.class,
() -> elevenLabsApi.textToSpeech(request, null, null));
assertThat(exception.getMessage()).isEqualTo("voiceId must be provided. It cannot be null.");
}
@Test
public void testTextToSpeechTextEmpty() {
Exception exception = assertThrows(IllegalArgumentException.class,
() -> ElevenLabsApi.SpeechRequest.builder().text("").modelId("eleven_turbo_v2_5").build());
assertThat(exception.getMessage()).isEqualTo("text must not be empty");
}
// Streaming API tests
@Test
public void testTextToSpeechStream() {
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
.text("This is a longer text to ensure multiple chunks are received through the streaming API.")
.modelId("eleven_turbo_v2_5")
.build();
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
Flux<ResponseEntity<byte[]>> responseFlux = elevenLabsApi.textToSpeechStream(request, validVoiceId, null);
// Track the number of chunks received
AtomicInteger chunkCount = new AtomicInteger(0);
StepVerifier.create(responseFlux).thenConsumeWhile(response -> {
// Verify each chunk's response properties
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
assertThat(response.getBody()).isNotNull().isNotEmpty();
// Count this chunk
chunkCount.incrementAndGet();
return true;
}).verifyComplete();
// Verify we received at least one chunk
assertThat(chunkCount.get()).isPositive();
}
@Test
public void testTextToSpeechStreamWithVoiceSettings() {
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
.text("Hello, with Voice settings in streaming mode!")
.modelId("eleven_turbo_v2_5")
.voiceSettings(new ElevenLabsApi.SpeechRequest.VoiceSettings(0.5, 0.7, null, null, null))
.build();
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
Flux<ResponseEntity<byte[]>> responseFlux = elevenLabsApi.textToSpeechStream(request, validVoiceId, null);
StepVerifier.create(responseFlux).thenConsumeWhile(response -> {
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
assertThat(response.getBody()).isNotNull().isNotEmpty();
return true;
}).verifyComplete();
}
@Test
public void testTextToSpeechStreamWithQueryParams() {
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
.text("Hello, testing streaming with query params!")
.modelId("eleven_turbo_v2_5")
.build();
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
MultiValueMap<String, String> queryParams = new LinkedMultiValueMap<>();
queryParams.add("optimize_streaming_latency", "2");
queryParams.add("enable_logging", "true");
queryParams.add("output_format", "mp3_44100_128");
Flux<ResponseEntity<byte[]>> responseFlux = elevenLabsApi.textToSpeechStream(request, validVoiceId,
queryParams);
StepVerifier.create(responseFlux).thenConsumeWhile(response -> {
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
assertThat(response.getBody()).isNotNull().isNotEmpty();
return true;
}).verifyComplete();
}
@Test
public void testTextToSpeechStreamVoiceIdNull() {
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
.text("This should fail.")
.modelId("eleven_turbo_v2_5")
.build();
Exception exception = assertThrows(IllegalArgumentException.class,
() -> elevenLabsApi.textToSpeechStream(request, null, null));
assertThat(exception.getMessage()).isEqualTo("voiceId must be provided for streaming. It cannot be null.");
}
@Test
public void testTextToSpeechStreamRequestBodyNull() {
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
Exception exception = assertThrows(IllegalArgumentException.class,
() -> elevenLabsApi.textToSpeechStream(null, validVoiceId, null));
assertThat(exception.getMessage()).isEqualTo("requestBody can not be null.");
}
@Test
public void testTextToSpeechStreamTextEmpty() {
Exception exception = assertThrows(IllegalArgumentException.class, () -> {
ElevenLabsApi.SpeechRequest request = ElevenLabsApi.SpeechRequest.builder()
.text("")
.modelId("eleven_turbo_v2_5")
.build();
String validVoiceId = "9BWtsMINqrJLrRacOk9x";
elevenLabsApi.textToSpeechStream(request, validVoiceId, null);
});
assertThat(exception.getMessage()).isEqualTo("text must not be empty");
}
}

View File

@@ -0,0 +1,113 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.elevenlabs.api;
import java.util.List;
import org.junit.jupiter.api.Test;
import org.junit.jupiter.api.condition.EnabledIfEnvironmentVariable;
import org.springframework.ai.elevenlabs.ElevenLabsTestConfiguration;
import org.springframework.beans.factory.annotation.Autowired;
import org.springframework.boot.test.context.SpringBootTest;
import org.springframework.http.ResponseEntity;
import static org.assertj.core.api.Assertions.assertThat;
/**
* Integration tests for the {@link ElevenLabsVoicesApi}.
*
* <p>
* These tests require a valid ElevenLabs API key to be set as an environment variable
* named {@code ELEVEN_LABS_API_KEY}.
*
* @author Alexandros Pappas
*/
@SpringBootTest(classes = ElevenLabsTestConfiguration.class)
@EnabledIfEnvironmentVariable(named = "ELEVEN_LABS_API_KEY", matches = ".+")
public class ElevenLabsVoicesApiIT {
@Autowired
private ElevenLabsVoicesApi voicesApi;
@Test
void getVoices() {
ResponseEntity<ElevenLabsVoicesApi.Voices> response = voicesApi.getVoices();
System.out.println("Response: " + response);
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
assertThat(response.getBody()).isNotNull();
ElevenLabsVoicesApi.Voices voicesResponse = response.getBody();
List<ElevenLabsVoicesApi.Voice> voices = voicesResponse.voices();
assertThat(voices).isNotNull().isNotEmpty();
for (ElevenLabsVoicesApi.Voice voice : voices) {
assertThat(voice.voiceId()).isNotBlank();
}
}
@Test
void getDefaultVoiceSettings() {
ResponseEntity<ElevenLabsVoicesApi.VoiceSettings> response = voicesApi.getDefaultVoiceSettings();
assertThat(response.getStatusCode().is2xxSuccessful()).isTrue();
assertThat(response.getBody()).isNotNull();
ElevenLabsVoicesApi.VoiceSettings settings = response.getBody();
assertThat(settings.stability()).isNotNull();
assertThat(settings.similarityBoost()).isNotNull();
assertThat(settings.style()).isNotNull();
assertThat(settings.useSpeakerBoost()).isNotNull();
}
@Test
void getVoiceSettings() {
ResponseEntity<ElevenLabsVoicesApi.Voices> voicesResponse = voicesApi.getVoices();
assertThat(voicesResponse.getStatusCode().is2xxSuccessful()).isTrue();
List<ElevenLabsVoicesApi.Voice> voices = voicesResponse.getBody().voices();
assertThat(voices).isNotEmpty();
String voiceId = voices.get(0).voiceId();
ResponseEntity<ElevenLabsVoicesApi.VoiceSettings> settingsResponse = voicesApi.getVoiceSettings(voiceId);
assertThat(settingsResponse.getStatusCode().is2xxSuccessful()).isTrue();
assertThat(settingsResponse.getBody()).isNotNull();
ElevenLabsVoicesApi.VoiceSettings settings = settingsResponse.getBody();
assertThat(settings.stability()).isNotNull();
assertThat(settings.similarityBoost()).isNotNull();
assertThat(settings.style()).isNotNull();
assertThat(settings.useSpeakerBoost()).isNotNull();
}
@Test
void getVoice() {
ResponseEntity<ElevenLabsVoicesApi.Voices> voicesResponse = voicesApi.getVoices();
assertThat(voicesResponse.getStatusCode().is2xxSuccessful()).isTrue();
List<ElevenLabsVoicesApi.Voice> voices = voicesResponse.getBody().voices();
assertThat(voices).isNotEmpty();
String voiceId = voices.get(0).voiceId();
ResponseEntity<ElevenLabsVoicesApi.Voice> voiceResponse = voicesApi.getVoice(voiceId);
assertThat(voiceResponse.getStatusCode().is2xxSuccessful()).isTrue();
assertThat(voiceResponse.getBody()).isNotNull();
ElevenLabsVoicesApi.Voice voice = voiceResponse.getBody();
assertThat(voice.voiceId()).isEqualTo(voiceId);
assertThat(voice.name()).isNotBlank();
}
}

File diff suppressed because it is too large Load Diff

View File

@@ -29,7 +29,10 @@ import org.springframework.lang.Nullable;
*
* @author Ahmed Yousri
* @since 1.0.0-M1
* @deprecated Use {@link org.springframework.ai.audio.tts.Speech} from the core package
* instead. This class will be removed in a future release.
*/
@Deprecated
public class Speech implements ModelResult<byte[]> {
private final byte[] audio;

View File

@@ -24,7 +24,10 @@ import java.util.Objects;
*
* @author Ahmed Yousri
* @since 1.0.0-M1
* @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechMessage} from the
* core package instead. This class will be removed in a future release.
*/
@Deprecated
public class SpeechMessage {
private String text;

View File

@@ -25,7 +25,10 @@ import org.springframework.ai.model.Model;
*
* @author Ahmed Yousri
* @since 1.0.0-M1
* @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechModel} from the
* core package instead. This interface will be removed in a future release.
*/
@Deprecated
@FunctionalInterface
public interface SpeechModel extends Model<SpeechPrompt, SpeechResponse> {

View File

@@ -29,7 +29,10 @@ import org.springframework.ai.openai.OpenAiAudioSpeechOptions;
*
* @author Ahmed Yousri
* @since 1.0.0-M1
* @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechPrompt} from the
* core package instead. This class will be removed in a future release.
*/
@Deprecated
public class SpeechPrompt implements ModelRequest<SpeechMessage> {
private final SpeechMessage message;

View File

@@ -28,7 +28,10 @@ import org.springframework.ai.openai.metadata.audio.OpenAiAudioSpeechResponseMet
*
* @author Ahmed Yousri
* @since 1.0.0-M1
* @deprecated Use {@link org.springframework.ai.audio.tts.TextToSpeechResponse} from the
* core package instead. This class will be removed in a future release.
*/
@Deprecated
public class SpeechResponse implements ModelResponse<Speech> {
private final Speech speech;

View File

@@ -27,7 +27,10 @@ import org.springframework.ai.model.StreamingModel;
*
* @author Ahmed Yousri
* @since 1.0.0-M1
* @deprecated Use {@link org.springframework.ai.audio.tts.StreamingTextToSpeechModel}
* from the core package instead. This interface will be removed in a future release.
*/
@Deprecated
@FunctionalInterface
public interface StreamingSpeechModel extends StreamingModel<SpeechPrompt, SpeechResponse> {

View File

@@ -16,9 +16,9 @@
package org.springframework.ai.openai.metadata.audio;
import org.springframework.ai.audio.tts.TextToSpeechResponseMetadata;
import org.springframework.ai.chat.metadata.EmptyRateLimit;
import org.springframework.ai.chat.metadata.RateLimit;
import org.springframework.ai.model.MutableResponseMetadata;
import org.springframework.ai.openai.api.OpenAiAudioApi;
import org.springframework.lang.Nullable;
import org.springframework.util.Assert;
@@ -29,7 +29,7 @@ import org.springframework.util.Assert;
* @author Ahmed Yousri
* @see RateLimit
*/
public class OpenAiAudioSpeechResponseMetadata extends MutableResponseMetadata {
public class OpenAiAudioSpeechResponseMetadata extends TextToSpeechResponseMetadata {
public static final OpenAiAudioSpeechResponseMetadata NULL = new OpenAiAudioSpeechResponseMetadata() {

View File

@@ -99,6 +99,7 @@
<module>auto-configurations/models/spring-ai-autoconfigure-model-anthropic</module>
<module>auto-configurations/models/spring-ai-autoconfigure-model-azure-openai</module>
<module>auto-configurations/models/spring-ai-autoconfigure-model-bedrock-ai</module>
<module>auto-configurations/models/spring-ai-autoconfigure-model-elevenlabs</module>
<module>auto-configurations/models/spring-ai-autoconfigure-model-huggingface</module>
<module>auto-configurations/models/spring-ai-autoconfigure-model-openai</module>
<module>auto-configurations/models/spring-ai-autoconfigure-model-minimax</module>
@@ -162,6 +163,7 @@
<module>models/spring-ai-azure-openai</module>
<module>models/spring-ai-bedrock</module>
<module>models/spring-ai-bedrock-converse</module>
<module>models/spring-ai-elevenlabs</module>
<module>models/spring-ai-huggingface</module>
<module>models/spring-ai-minimax</module>
<module>models/spring-ai-mistral-ai</module>
@@ -180,6 +182,7 @@
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-azure-openai</module>
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-bedrock</module>
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-bedrock-converse</module>
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-elevenlabs</module>
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-huggingface</module>
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-minimax</module>
<module>spring-ai-spring-boot-starters/spring-ai-starter-model-mistral-ai</module>
@@ -712,7 +715,8 @@
<exclude>org.springframework.ai.anthropic/**/*IT.java</exclude>
<exclude>org.springframework.ai.azure.openai/**/*IT.java</exclude>
<exclude>org.springframework.ai.bedrock/**/*IT.java</exclude>
<exclude>org.springframework.ai.bedrock.converse/**/*IT.java</exclude>
<exclude>org.springframework.ai.bedrock.converse/**/*IT.java</exclude>
<exclude>org.springframework.ai.elevenlabs/**/*IT.java</exclude>
<exclude>org.springframework.ai.huggingface/**/*IT.java</exclude>
<exclude>org.springframework.ai.minimax/**/*IT.java</exclude>
<exclude>org.springframework.ai.mistralai/**/*IT.java</exclude>
@@ -760,6 +764,7 @@
<exclude>org.springframework.ai.autoconfigure.huggingface/**/**IT.java</exclude>
<exclude>org.springframework.ai.autoconfigure.chat/**/**IT.java</exclude>
<exclude>org.springframework.ai.autoconfigure.elevenlabs/**/**IT.java</exclude>
<exclude>org.springframework.ai.autoconfigure.embedding/**/**IT.java</exclude>
<exclude>org.springframework.ai.autoconfigure.image/**/**IT.java</exclude>

View File

@@ -243,6 +243,13 @@
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-elevenlabs</artifactId>
<version>${project.version}</version>
<optional>true</optional>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-huggingface</artifactId>
@@ -310,7 +317,6 @@
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-zhipuai</artifactId>
@@ -565,6 +571,11 @@
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-autoconfigure-model-elevenlabs</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-autoconfigure-model-huggingface</artifactId>
@@ -914,6 +925,11 @@
<version>${project.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-model-elevenlabs</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-model-minimax</artifactId>

View File

@@ -1,5 +1,9 @@
[[Speech]]
= Text-To-Speech (TTS) API
Spring AI provides support for OpenAI's Speech API.
When additional providers for Speech are implemented, a common `SpeechModel` and `StreamingSpeechModel` interface will be extracted.
Spring AI provides support for the following Text-To-Speech (TTS) providers:
- xref:api/audio/speech/openai-speech.adoc[OpenAI's Speech API]
- xref:api/audio/speech/elevenlabs-speech.adoc[Eleven Labs Text-To-Speech API]
Future enhancements may introduce additional providers, at which point a common `TextToSpeechModel` and `StreamingTextToSpeechModel` interface will be extracted.

View File

@@ -0,0 +1,268 @@
= ElevenLabs Text-to-Speech (TTS)
== Introduction
ElevenLabs provides natural-sounding speech synthesis software using deep learning. Its AI audio models generate realistic, versatile, and contextually-aware speech, voices, and sound effects across 32 languages. The ElevenLabs Text-to-Speech API enables users to bring any book, article, PDF, newsletter, or text to life with ultra-realistic AI narration.
== Prerequisites
. Create an ElevenLabs account and obtain an API key. You can sign up at the https://elevenlabs.io/sign-up[ElevenLabs signup page]. Your API key can be found on your profile page after logging in.
. Add the `spring-ai-elevenlabs` dependency to your project's build file. For more information, refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section.
== Auto-configuration
Spring AI provides Spring Boot auto-configuration for the ElevenLabs Text-to-Speech Client.
To enable it, add the following dependency to your project's Maven `pom.xml` file:
[source,xml]
----
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-starter-model-elevenlabs</artifactId>
</dependency>
----
or to your Gradle `build.gradle` build file:
[source,groovy]
----
dependencies {
implementation 'org.springframework.ai:spring-ai-starter-model-elevenlabs'
}
----
TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file.
== Speech Properties
=== Connection Properties
The prefix `spring.ai.elevenlabs` is used as the property prefix for *all* ElevenLabs related configurations (both connection and TTS specific settings). This is defined in `ElevenLabsConnectionProperties`.
[cols="3,5,1"]
|====
| Property | Description | Default
| spring.ai.elevenlabs.base-url | The base URL for the ElevenLabs API. | https://api.elevenlabs.io
| spring.ai.elevenlabs.api-key | Your ElevenLabs API key. | -
|====
=== Configuration Properties
The prefix `spring.ai.elevenlabs.tts` is used as the property prefix to configure the ElevenLabs Text-to-Speech client, specifically. This is defined in `ElevenLabsSpeechProperties`.
[cols="3,5,2"]
|====
| Property | Description | Default
| spring.ai.elevenlabs.tts.options.model-id | The ID of the model to use. | eleven_turbo_v2_5
| spring.ai.elevenlabs.tts.options.voice-id | The ID of the voice to use. This is the *voice ID*, not the voice name. | 9BWtsMINqrJLrRacOk9x
| spring.ai.elevenlabs.tts.options.output-format | The output format for the generated audio. See xref:#output-formats[Output Formats] below. | mp3_22050_32
| spring.ai.elevenlabs.tts.enabled | Enable or disable the ElevenLabs Text-to-Speech client. | true
|====
NOTE: The base URL and API key can also be configured *specifically* for TTS using `spring.ai.elevenlabs.tts.base-url` and `spring.ai.elevenlabs.tts.api-key`. However, it is generally recommended to use the global `spring.ai.elevenlabs` prefix for simplicity, unless you have a specific reason to use different credentials for different ElevenLabs services. The more specific `tts` properties will override the global ones.
TIP: All properties prefixed with `spring.ai.elevenlabs.tts.options` can be overridden at runtime.
[[output-formats]]
.Available Output Formats
[cols="1,1"]
|====
| Enum Value | Description
| MP3_22050_32 | MP3, 22.05 kHz, 32 kbps
| MP3_44100_32 | MP3, 44.1 kHz, 32 kbps
| MP3_44100_64 | MP3, 44.1 kHz, 64 kbps
| MP3_44100_96 | MP3, 44.1 kHz, 96 kbps
| MP3_44100_128 | MP3, 44.1 kHz, 128 kbps
| MP3_44100_192 | MP3, 44.1 kHz, 192 kbps
| PCM_8000 | PCM, 8 kHz
| PCM_16000 | PCM, 16 kHz
| PCM_22050 | PCM, 22.05 kHz
| PCM_24000 | PCM, 24 kHz
| PCM_44100 | PCM, 44.1 kHz
| PCM_48000 | PCM, 48 kHz
| ULAW_8000 | µ-law, 8 kHz
| ALAW_8000 | A-law, 8 kHz
| OPUS_48000_32 | Opus, 48 kHz, 32 kbps
| OPUS_48000_64 | Opus, 48 kHz, 64 kbps
| OPUS_48000_96 | Opus, 48 kHz, 96 kbps
| OPUS_48000_128 | Opus, 48 kHz, 128 kbps
| OPUS_48000_192 | Opus, 48 kHz, 192 kbps
|====
== Runtime Options [[speech-options]]
The `ElevenLabsTextToSpeechOptions` class provides options to use when making a text-to-speech request. On start-up, the options specified by `spring.ai.elevenlabs.tts` are used, but you can override these at runtime. The following options are available:
* `modelId`: The ID of the model to use.
* `voiceId`: The ID of the voice to use.
* `outputFormat`: The output format of the generated audio.
* `voiceSettings`: An object containing voice settings such as `stability`, `similarityBoost`, `style`, `useSpeakerBoost`, and `speed`.
* `enableLogging`: A boolean to enable or disable logging.
* `languageCode`: The language code of the input text (e.g., "en" for English).
* `pronunciationDictionaryLocators`: A list of pronunciation dictionary locators.
* `seed`: A seed for random number generation, for reproducibility.
* `previousText`: Text before the main text, for context in multi-turn conversations.
* `nextText`: Text after the main text, for context in multi-turn conversations.
* `previousRequestIds`: Request IDs from previous turns in a conversation.
* `nextRequestIds`: Request IDs for subsequent turns in a conversation.
* `applyTextNormalization`: Apply text normalization ("auto", "on", or "off").
* `applyLanguageTextNormalization`: Apply language text normalization.
For example:
[source,java]
----
ElevenLabsTextToSpeechOptions speechOptions = ElevenLabsTextToSpeechOptions.builder()
.model("eleven_multilingual_v2")
.voiceId("your_voice_id")
.outputFormat(ElevenLabsApi.OutputFormat.MP3_44100_128.getValue())
.build();
TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Hello, this is a text-to-speech example.", speechOptions);
TextToSpeechResponse response = elevenLabsTextToSpeechModel.call(speechPrompt);
----
=== Using Voice Settings
You can customize the voice output by providing `VoiceSettings` in the options. This allows you to control properties like stability and similarity.
[source,java]
----
var voiceSettings = new ElevenLabsApi.SpeechRequest.VoiceSettings(0.75f, 0.75f, 0.0f, true);
ElevenLabsTextToSpeechOptions speechOptions = ElevenLabsTextToSpeechOptions.builder()
.model("eleven_multilingual_v2")
.voiceId("your_voice_id")
.voiceSettings(voiceSettings)
.build();
TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("This is a test with custom voice settings!", speechOptions);
TextToSpeechResponse response = elevenLabsTextToSpeechModel.call(speechPrompt);
----
== Manual Configuration
Add the `spring-ai-elevenlabs` dependency to your project's Maven `pom.xml` file:
[source,xml]
----
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-elevenlabs</artifactId>
</dependency>
----
or to your Gradle `build.gradle` build file:
[source,groovy]
----
dependencies {
implementation 'org.springframework.ai:spring-ai-elevenlabs'
}
----
TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file.
Next, create an `ElevenLabsTextToSpeechModel`:
[source,java]
----
ElevenLabsApi elevenLabsApi = ElevenLabsApi.builder()
.apiKey(System.getenv("ELEVEN_LABS_API_KEY"))
.build();
ElevenLabsTextToSpeechModel elevenLabsTextToSpeechModel = ElevenLabsTextToSpeechModel.builder()
.elevenLabsApi(elevenLabsApi)
.defaultOptions(ElevenLabsTextToSpeechOptions.builder()
.model("eleven_turbo_v2_5")
.voiceId("your_voice_id") // e.g. "9BWtsMINqrJLrRacOk9x"
.outputFormat("mp3_44100_128")
.build())
.build();
// The call will use the default options configured above.
TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Hello, this is a text-to-speech example.");
TextToSpeechResponse response = elevenLabsTextToSpeechModel.call(speechPrompt);
byte[] responseAsBytes = response.getResult().getOutput();
----
== Streaming Real-time Audio
The ElevenLabs Speech API supports real-time audio streaming using chunk transfer encoding. This allows audio playback to begin before the entire audio file is generated.
[source,java]
----
ElevenLabsApi elevenLabsApi = ElevenLabsApi.builder()
.apiKey(System.getenv("ELEVEN_LABS_API_KEY"))
.build();
ElevenLabsTextToSpeechModel elevenLabsTextToSpeechModel = ElevenLabsTextToSpeechModel.builder()
.elevenLabsApi(elevenLabsApi)
.build();
ElevenLabsTextToSpeechOptions streamingOptions = ElevenLabsTextToSpeechOptions.builder()
.model("eleven_turbo_v2_5")
.voiceId("your_voice_id")
.outputFormat("mp3_44100_128")
.build();
TextToSpeechPrompt speechPrompt = new TextToSpeechPrompt("Today is a wonderful day to build something people love!", streamingOptions);
Flux<TextToSpeechResponse> responseStream = elevenLabsTextToSpeechModel.stream(speechPrompt);
// Process the stream, e.g., play the audio chunks
responseStream.subscribe(speechResponse -> {
byte[] audioChunk = speechResponse.getResult().getOutput();
// Play the audioChunk
});
----
== Voices API
The ElevenLabs Voices API allows you to retrieve information about available voices, their settings, and default voice settings. You can use this API to discover the `voiceId`s to use in your speech requests.
To use the Voices API, you'll need to create an instance of `ElevenLabsVoicesApi`:
[source,java]
----
ElevenLabsVoicesApi voicesApi = ElevenLabsVoicesApi.builder()
.apiKey(System.getenv("ELEVEN_LABS_API_KEY"))
.build();
----
You can then use the following methods:
* `getVoices()`: Retrieves a list of all available voices.
* `getDefaultVoiceSettings()`: Gets the default settings for voices.
* `getVoiceSettings(String voiceId)`: Returns the settings for a specific voice.
* `getVoice(String voiceId)`: Returns metadata about a specific voice.
Example:
[source,java]
----
// Get all voices
ResponseEntity<ElevenLabsVoicesApi.Voices> voicesResponse = voicesApi.getVoices();
List<ElevenLabsVoicesApi.Voice> voices = voicesResponse.getBody().voices();
// Get default voice settings
ResponseEntity<ElevenLabsVoicesApi.VoiceSettings> defaultSettingsResponse = voicesApi.getDefaultVoiceSettings();
ElevenLabsVoicesApi.VoiceSettings defaultSettings = defaultSettingsResponse.getBody();
// Get settings for a specific voice
ResponseEntity<ElevenLabsVoicesApi.VoiceSettings> voiceSettingsResponse = voicesApi.getVoiceSettings(voiceId);
ElevenLabsVoicesApi.VoiceSettings voiceSettings = voiceSettingsResponse.getBody();
// Get details for a specific voice
ResponseEntity<ElevenLabsVoicesApi.Voice> voiceDetailsResponse = voicesApi.getVoice(voiceId);
ElevenLabsVoicesApi.Voice voiceDetails = voiceDetailsResponse.getBody();
----
== Example Code
* The link:https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/ElevenLabsTextToSpeechModelIT.java[ElevenLabsTextToSpeechModelIT.java] test provides some general examples of how to use the library.
* The link:https://github.com/spring-projects/spring-ai/blob/main/models/spring-ai-elevenlabs/src/test/java/org/springframework/ai/elevenlabs/api/ElevenLabsApiIT.java[ElevenLabsApiIT.java] test provides examples of using the low-level `ElevenLabsApi`.

View File

@@ -0,0 +1,147 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.audio.tts;
import java.util.Objects;
import com.fasterxml.jackson.annotation.JsonInclude;
/**
* Default implementation of the {@link TextToSpeechOptions} interface.
*
* @author Alexandros Pappas
*/
@JsonInclude(JsonInclude.Include.NON_NULL)
public class DefaultTextToSpeechOptions implements TextToSpeechOptions {
private final String model;
private final String voice;
private final String format;
private final Double speed;
private DefaultTextToSpeechOptions(String model, String voice, String format, Double speed) {
this.model = model;
this.voice = voice;
this.format = format;
this.speed = speed;
}
public static Builder builder() {
return new Builder();
}
@Override
public String getModel() {
return this.model;
}
@Override
public String getVoice() {
return this.voice;
}
@Override
public String getFormat() {
return this.format;
}
@Override
public Double getSpeed() {
return this.speed;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (!(o instanceof DefaultTextToSpeechOptions that))
return false;
return Objects.equals(model, that.model) && Objects.equals(voice, that.voice)
&& Objects.equals(format, that.format) && Objects.equals(speed, that.speed);
}
@Override
public int hashCode() {
return Objects.hash(model, voice, format, speed);
}
@Override
public String toString() {
return "DefaultTextToSpeechOptions{" + "model='" + model + '\'' + ", voice='" + voice + '\'' + ", format='"
+ format + '\'' + ", speed=" + speed + '}';
}
@Override
@SuppressWarnings("unchecked")
public DefaultTextToSpeechOptions copy() {
return new Builder(this).build();
}
public static class Builder implements TextToSpeechOptions.Builder {
private String model;
private String voice;
private String format;
private Double speed;
public Builder() {
}
private Builder(DefaultTextToSpeechOptions options) {
this.model = options.model;
this.voice = options.voice;
this.format = options.format;
this.speed = options.speed;
}
@Override
public Builder model(String model) {
this.model = model;
return this;
}
@Override
public Builder voice(String voice) {
this.voice = voice;
return this;
}
@Override
public Builder format(String format) {
this.format = format;
return this;
}
@Override
public Builder speed(Double speed) {
this.speed = speed;
return this;
}
public DefaultTextToSpeechOptions build() {
return new DefaultTextToSpeechOptions(this.model, this.voice, this.format, this.speed);
}
}
}

View File

@@ -0,0 +1,67 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.audio.tts;
import java.util.Arrays;
import java.util.Objects;
import org.springframework.ai.model.ModelResult;
import org.springframework.ai.model.ResultMetadata;
/**
* Implementation of the {@link ModelResult} interface for the speech model.
*
* @author Alexandros Pappas
*/
public class Speech implements ModelResult<byte[]> {
private final byte[] speech;
public Speech(byte[] speech) {
this.speech = speech;
}
@Override
public byte[] getOutput() {
return this.speech;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (!(o instanceof Speech speech1))
return false;
return Arrays.equals(speech, speech1.speech);
}
@Override
public int hashCode() {
return Objects.hash(Arrays.hashCode(speech));
}
@Override
public String toString() {
return "Speech{" + "speech=" + Arrays.toString(speech) + '}';
}
@Override
public ResultMetadata getMetadata() {
return null;
}
}

View File

@@ -0,0 +1,45 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.audio.tts;
import reactor.core.publisher.Flux;
import org.springframework.ai.model.StreamingModel;
/**
* Interface for the streaming text to speech model.
*
* @author Alexandros Pappas
*/
public interface StreamingTextToSpeechModel extends StreamingModel<TextToSpeechPrompt, TextToSpeechResponse> {
default Flux<byte[]> stream(String text) {
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text);
return stream(prompt).map(response -> (response.getResult() == null || response.getResult().getOutput() == null)
? new byte[0] : response.getResult().getOutput());
}
default Flux<byte[]> stream(String text, TextToSpeechOptions options) {
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text, options);
return stream(prompt).map(response -> (response.getResult() == null || response.getResult().getOutput() == null)
? new byte[0] : response.getResult().getOutput());
}
@Override
Flux<TextToSpeechResponse> stream(TextToSpeechPrompt prompt);
}

View File

@@ -0,0 +1,58 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.audio.tts;
import java.util.Objects;
/**
* Implementation of the {@link TextToSpeechMessage} interface for the text to speech
* message.
*
* @author Alexandros Pappas
*/
public class TextToSpeechMessage {
private final String text;
public TextToSpeechMessage(String text) {
this.text = text;
}
public String getText() {
return text;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (!(o instanceof TextToSpeechMessage that))
return false;
return Objects.equals(text, that.text);
}
@Override
public int hashCode() {
return Objects.hash(text);
}
@Override
public String toString() {
return "TextToSpeechMessage{" + "text='" + text + '\'' + '}';
}
}

View File

@@ -0,0 +1,42 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.audio.tts;
import org.springframework.ai.model.Model;
import org.springframework.ai.model.ModelResult;
/**
* Interface for the text to speech model.
*
* @author Alexandros Pappas
*/
public interface TextToSpeechModel extends Model<TextToSpeechPrompt, TextToSpeechResponse> {
default byte[] call(String text) {
TextToSpeechPrompt prompt = new TextToSpeechPrompt(text);
ModelResult<byte[]> result = call(prompt).getResult();
return (result != null) ? result.getOutput() : new byte[0];
}
@Override
TextToSpeechResponse call(TextToSpeechPrompt prompt);
default TextToSpeechOptions getDefaultOptions() {
return TextToSpeechOptions.builder().build();
}
}

View File

@@ -0,0 +1,114 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.audio.tts;
import org.springframework.ai.model.ModelOptions;
import org.springframework.lang.Nullable;
/**
* Interface for text-to-speech model options. Defines the common, portable options that
* should be supported by all implementations.
*
* @author Alexandros Pappas
*/
public interface TextToSpeechOptions extends ModelOptions {
/**
* Creates a new {@link TextToSpeechOptions.Builder} to create the default
* {@link TextToSpeechOptions}.
* @return Returns a new {@link TextToSpeechOptions.Builder}.
*/
static TextToSpeechOptions.Builder builder() {
return new DefaultTextToSpeechOptions.Builder();
}
/**
* Returns the model to use for text-to-speech.
* @return The model name.
*/
@Nullable
String getModel();
/**
* Returns the voice to use for text-to-speech.
* @return The voice identifier.
*/
@Nullable
String getVoice();
/**
* Returns the output format for the generated audio.
* @return The output format (e.g., "mp3", "wav").
*/
@Nullable
String getFormat();
/**
* Returns the speed of the generated speech.
* @return The speech speed.
*/
@Nullable
Double getSpeed();
/**
* Returns a copy of this {@link TextToSpeechOptions}.
* @return a copy of this {@link TextToSpeechOptions}
*/
<T extends TextToSpeechOptions> T copy();
/**
* Builder for {@link TextToSpeechOptions}.
*/
interface Builder {
/**
* Sets the model to use for text-to-speech.
* @param model The model name.
* @return This builder.
*/
Builder model(String model);
/**
* Sets the voice to use for text-to-speech.
* @param voice The voice identifier.
* @return This builder.
*/
Builder voice(String voice);
/**
* Sets the output format for the generated audio.
* @param format The output format (e.g., "mp3", "wav").
* @return This builder.
*/
Builder format(String format);
/**
* Sets the speed of the generated speech.
* @param speed The speech speed.
* @return This builder.
*/
Builder speed(Double speed);
/**
* Builds the {@link TextToSpeechOptions}.
* @return The {@link TextToSpeechOptions}.
*/
TextToSpeechOptions build();
}
}

View File

@@ -0,0 +1,84 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.audio.tts;
import java.util.Objects;
import org.springframework.ai.model.ModelRequest;
/**
* Implementation of the {@link ModelRequest} interface for the text to speech prompt.
*
* @author Alexandros Pappas
*/
public class TextToSpeechPrompt implements ModelRequest<TextToSpeechMessage> {
private final TextToSpeechMessage message;
private TextToSpeechOptions options;
public TextToSpeechPrompt(String text) {
this(new TextToSpeechMessage(text), TextToSpeechOptions.builder().build());
}
public TextToSpeechPrompt(String text, TextToSpeechOptions options) {
this(new TextToSpeechMessage(text), options);
}
public TextToSpeechPrompt(TextToSpeechMessage message) {
this(message, TextToSpeechOptions.builder().build());
}
public TextToSpeechPrompt(TextToSpeechMessage message, TextToSpeechOptions options) {
this.message = message;
this.options = options;
}
@Override
public TextToSpeechMessage getInstructions() {
return this.message;
}
@Override
public TextToSpeechOptions getOptions() {
return this.options;
}
public void setOptions(TextToSpeechOptions options) {
this.options = options;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (!(o instanceof TextToSpeechPrompt that))
return false;
return Objects.equals(message, that.message) && Objects.equals(options, that.options);
}
@Override
public int hashCode() {
return Objects.hash(message, options);
}
@Override
public String toString() {
return "TextToSpeechPrompt{" + "message=" + message + ", options=" + options + '}';
}
}

View File

@@ -0,0 +1,78 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.audio.tts;
import java.util.List;
import java.util.Objects;
import org.springframework.ai.model.ModelResponse;
import org.springframework.ai.model.ResponseMetadata;
/**
* Implementation of the {@link ModelResponse} interface for the text to speech response.
*
* @author Alexandros Pappas
*/
public class TextToSpeechResponse implements ModelResponse<Speech> {
private final List<Speech> results;
private final TextToSpeechResponseMetadata textToSpeechResponseMetadata;
public TextToSpeechResponse(List<Speech> results) {
this(results, null);
}
public TextToSpeechResponse(List<Speech> results, TextToSpeechResponseMetadata textToSpeechResponseMetadata) {
this.results = results;
this.textToSpeechResponseMetadata = textToSpeechResponseMetadata;
}
@Override
public List<Speech> getResults() {
return this.results;
}
public Speech getResult() {
return this.results.get(0);
}
@Override
public TextToSpeechResponseMetadata getMetadata() {
return this.textToSpeechResponseMetadata;
}
@Override
public boolean equals(Object o) {
if (this == o)
return true;
if (!(o instanceof TextToSpeechResponse that))
return false;
return Objects.equals(results, that.results);
}
@Override
public int hashCode() {
return Objects.hash(results);
}
@Override
public String toString() {
return "TextToSpeechResponse{" + "results=" + results + '}';
}
}

View File

@@ -0,0 +1,12 @@
package org.springframework.ai.audio.tts;
import org.springframework.ai.model.MutableResponseMetadata;
/**
* Metadata associated with an audio transcription response.
*
* @author Alexandros Pappas
*/
public class TextToSpeechResponseMetadata extends MutableResponseMetadata {
}

View File

@@ -0,0 +1,67 @@
/*
* Copyright 2025-2025 the original author or authors.
*
* Licensed under the Apache License, Version 2.0 (the "License");
* you may not use this file except in compliance with the License.
* You may obtain a copy of the License at
*
* https://www.apache.org/licenses/LICENSE-2.0
*
* Unless required by applicable law or agreed to in writing, software
* distributed under the License is distributed on an "AS IS" BASIS,
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
* See the License for the specific language governing permissions and
* limitations under the License.
*/
package org.springframework.ai.audio.tts;
import static org.assertj.core.api.Assertions.assertThat;
import static org.assertj.core.api.Assertions.within;
import org.junit.jupiter.api.Test;
/**
* Unit tests for {@link DefaultTextToSpeechOptions}.
*
* @author Alexandros Pappas
*/
class DefaultTextToSpeechOptionsTests {
@Test
void testBuilderWithAllFields() {
TextToSpeechOptions options = DefaultTextToSpeechOptions.builder()
.model("test-model")
.voice("test-voice")
.format("test-format")
.speed(0.8)
.build();
assertThat(options.getModel()).isEqualTo("test-model");
assertThat(options.getVoice()).isEqualTo("test-voice");
assertThat(options.getFormat()).isEqualTo("test-format");
assertThat(options.getSpeed()).isCloseTo(0.8, within(0.0001));
}
@Test
void testCopy() {
TextToSpeechOptions original = DefaultTextToSpeechOptions.builder()
.model("test-model")
.voice("test-voice")
.format("test-format")
.speed(0.8)
.build();
DefaultTextToSpeechOptions copied = original.copy();
assertThat(copied).isNotSameAs(original).isEqualTo(original);
}
@Test
void testDefaultValues() {
DefaultTextToSpeechOptions options = DefaultTextToSpeechOptions.builder().build();
assertThat(options.getModel()).isNull();
assertThat(options.getVoice()).isNull();
assertThat(options.getFormat()).isNull();
assertThat(options.getSpeed()).isNull();
}
}

View File

@@ -0,0 +1,44 @@
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0"
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<parent>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-parent</artifactId>
<version>1.1.0-SNAPSHOT</version>
<relativePath>../../pom.xml</relativePath>
</parent>
<artifactId>spring-ai-starter-model-elevenlabs</artifactId>
<packaging>jar</packaging>
<name>Spring AI Starter - ElevenLabs</name>
<description>Spring AI ElevenLabs Auto Configuration</description>
<url>https://github.com/spring-projects/spring-ai</url>
<scm>
<url>https://github.com/spring-projects/spring-ai</url>
<connection>git://github.com/spring-projects/spring-ai.git</connection>
<developerConnection>git@github.com:spring-projects/spring-ai.git</developerConnection>
</scm>
<dependencies>
<dependency>
<groupId>org.springframework.boot</groupId>
<artifactId>spring-boot-starter</artifactId>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-autoconfigure-model-elevenlabs</artifactId>
<version>${project.parent.version}</version>
</dependency>
<dependency>
<groupId>org.springframework.ai</groupId>
<artifactId>spring-ai-elevenlabs</artifactId>
<version>${project.parent.version}</version>
</dependency>
</dependencies>
</project>