Add Support for Oracle 23ai as vector database
- add OracleVectorStore with metadata filter expression support. - add ITs using oracle-free-slim testcontainers. - add auto-configuration and boot starter. - add adoc documentation. - Adjust javadoc references. Resolves #703 Co-authored-by: Eddú Meléndez Gonzales <eddu.melendez@gmail.com>
This commit is contained in:
committed by
Christian Tzolov
parent
be9e4a828a
commit
41cf693bc0
@@ -163,7 +163,7 @@ Though the `DocumentWriter` interface isn't exclusively for Vector Database writ
|
||||
|
||||
**Vector Stores:** Vector Databases are instrumental in incorporating your data with AI models.
|
||||
They ascertain which document sections the AI should use for generating responses.
|
||||
Examples of Vector Databases include Chroma, Postgres, Pinecone, Qdrant, Weaviate, Mongo Atlas, and Redis. Spring AI's `VectorStore` abstraction permits effortless transitions between database implementations.
|
||||
Examples of Vector Databases include Chroma, Oracle, Postgres, Pinecone, Qdrant, Weaviate, Mongo Atlas, and Redis. Spring AI's `VectorStore` abstraction permits effortless transitions between database implementations.
|
||||
|
||||
|
||||
|
||||
|
||||
5
pom.xml
5
pom.xml
@@ -35,6 +35,7 @@
|
||||
<module>vector-stores/spring-ai-milvus-store</module>
|
||||
<module>vector-stores/spring-ai-mongodb-atlas-store</module>
|
||||
<module>vector-stores/spring-ai-neo4j-store</module>
|
||||
<module>vector-stores/spring-ai-oracle-store</module>
|
||||
<module>vector-stores/spring-ai-pgvector-store</module>
|
||||
<module>vector-stores/spring-ai-pinecone-store</module>
|
||||
<module>vector-stores/spring-ai-qdrant-store</module>
|
||||
@@ -49,6 +50,7 @@
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-milvus-store</module>
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-mongodb-atlas-store</module>
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-neo4j-store</module>
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-oracle-store</module>
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-pgvector-store</module>
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-pinecone-store</module>
|
||||
<module>spring-ai-spring-boot-starters/spring-ai-starter-qdrant-store</module>
|
||||
@@ -157,6 +159,7 @@
|
||||
<pdfbox.version>3.0.1</pdfbox.version>
|
||||
<pgvector.version>0.1.4</pgvector.version>
|
||||
<sap.hanadb.version>2.20.11</sap.hanadb.version>
|
||||
<oracle.version>23.4.0.24.05</oracle.version>
|
||||
<postgresql.version>42.7.2</postgresql.version>
|
||||
<milvus.version>2.3.4</milvus.version>
|
||||
<pinecone.version>0.8.0</pinecone.version>
|
||||
@@ -181,7 +184,7 @@
|
||||
<flatten-maven-plugin.version>1.5.0</flatten-maven-plugin.version>
|
||||
<maven-deploy-plugin.version>3.1.1</maven-deploy-plugin.version>
|
||||
<asciidoctor-maven-plugin.version>2.2.3</asciidoctor-maven-plugin.version>
|
||||
<maven-assembly-plugin.version>3.6.0</maven-assembly-plugin.version>
|
||||
<maven-assembly-plugin.version>3.7.0</maven-assembly-plugin.version>
|
||||
<maven-dependency-plugin.version>3.5.0</maven-dependency-plugin.version>
|
||||
<!-- <maven-site-plugin.version>3.12.1</maven-site-plugin.version> -->
|
||||
<maven-site-plugin.version>4.0.0-M13</maven-site-plugin.version>
|
||||
|
||||
@@ -162,6 +162,12 @@
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-oracle-store</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-pgvector-store</artifactId>
|
||||
@@ -296,6 +302,12 @@
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-oracle-store-spring-boot-starter</artifactId>
|
||||
<version>${project.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-pgvector-store-spring-boot-starter</artifactId>
|
||||
|
||||
@@ -62,6 +62,7 @@
|
||||
*** xref:api/vectordbs/milvus.adoc[]
|
||||
*** xref:api/vectordbs/mongodb.adoc[]
|
||||
*** xref:api/vectordbs/neo4j.adoc[]
|
||||
*** xref:api/vectordbs/oracle.adoc[Oracle DB AI Vector Search]
|
||||
*** xref:api/vectordbs/pgvector.adoc[]
|
||||
*** xref:api/vectordbs/pinecone.adoc[]
|
||||
*** xref:api/vectordbs/qdrant.adoc[]
|
||||
|
||||
@@ -18,6 +18,7 @@ The following sections describe the Spring AI interface for using multiple vecto
|
||||
|
||||
The last section is intended to demystify the underlying approach of similarity searching in vector databases.
|
||||
|
||||
[[api-overview]]
|
||||
== API Overview
|
||||
This section serves as a guide to the `VectorStore` interface and its associated classes within the Spring AI framework.
|
||||
|
||||
@@ -102,6 +103,7 @@ These are the available implementations of the `VectorStore` interface:
|
||||
* xref:api/vectordbs/milvus.adoc[Milvus Vector Store] - The https://milvus.io/[Milvus] vector store.
|
||||
* xref:api/vectordbs/mongodb.adoc[MongoDB Atlas Vector Store] - The https://www.mongodb.com/atlas/database[MongoDB Atlas] vector store.
|
||||
* xref:api/vectordbs/neo4j.adoc[Neo4j Vector Store] - The https://neo4j.com/[Neo4j] vector store.
|
||||
* xref:api/vectordbs/oracle.adoc[OracleVectorStore] - The https://docs.oracle.com/en/database/oracle/oracle-database/23/vecse/overview-ai-vector-search.html[Oracle Database] vector store.
|
||||
* xref:api/vectordbs/pgvector.adoc[PgVectorStore] - The https://github.com/pgvector/pgvector[PostgreSQL/PGVector] vector store.
|
||||
* xref:api/vectordbs/pinecone.adoc[Pinecone Vector Store] - https://www.pinecone.io/[PineCone] vector store.
|
||||
* xref:api/vectordbs/qdrant.adoc[Qdrant Vector Store] - https://www.qdrant.tech/[Qdrant] vector store.
|
||||
|
||||
@@ -0,0 +1,203 @@
|
||||
= Oracle Database 23ai - AI Vector Search
|
||||
|
||||
The link:https://docs.oracle.com/en/database/oracle/oracle-database/23/vecse/overview-ai-vector-search.html[AI Vector Search] capabilities of the Oracle Database 23ai (23.4+) are available as a Spring AI `VectorStore` to help you to store document embeddings and perform similarity searches. Of course, all other features are also available.
|
||||
|
||||
TIP: The <<Run Oracle Database 23ai locally,Run Oracle Database 23ai locally>> appendix shows how to start a database with a lightweight Docker container.
|
||||
|
||||
== Auto-Configuration
|
||||
|
||||
Start by adding the Oracle Vector Store boot starter dependency to your project:
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-oracle-store-spring-boot-starter</artifactId>
|
||||
</dependency>
|
||||
----
|
||||
|
||||
or to your Gradle `build.gradle` build file.
|
||||
|
||||
[source,groovy]
|
||||
----
|
||||
dependencies {
|
||||
implementation 'org.springframework.ai:spring-ai-oracle-store-spring-boot-starter'
|
||||
}
|
||||
----
|
||||
|
||||
If you need this vector store to initialize the schema for you then you'll need to pass true for the `initializeSchema` boolean parameter in the appropriate constructor or by setting `...initialize-schema=true` in the `application.properties` file.
|
||||
|
||||
NOTE: this is a breaking change! In earlier versions of Spring AI, this schema initialization happened by default.
|
||||
|
||||
The Vector Store, also requires an `EmbeddingModel` instance to calculate embeddings for the documents.
|
||||
You can pick one of the available xref:api/embeddings.adoc#available-implementations[EmbeddingModel Implementations].
|
||||
|
||||
For example to use the xref:api/embeddings/openai-embeddings.adoc[OpenAI EmbeddingModel] add the following dependency to your project:
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-openai-spring-boot-starter</artifactId>
|
||||
</dependency>
|
||||
----
|
||||
|
||||
or to your Gradle `build.gradle` build file.
|
||||
|
||||
[source,groovy]
|
||||
----
|
||||
dependencies {
|
||||
implementation 'org.springframework.ai:spring-ai-openai-spring-boot-starter'
|
||||
}
|
||||
----
|
||||
|
||||
TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file.
|
||||
Refer to the xref:getting-started.adoc#repositories[Repositories] section to add Milestone and/or Snapshot Repositories to your build file.
|
||||
|
||||
To connect to and configure the `OracleVectorStore`, you need to provide access details for your database.
|
||||
A simple configuration can either be provided via Spring Boot's `application.yml`
|
||||
|
||||
[yml]
|
||||
----
|
||||
spring:
|
||||
datasource:
|
||||
url: jdbc:oracle:thin:@//localhost:1521/freepdb1
|
||||
username: mlops
|
||||
password: mlops
|
||||
ai:
|
||||
vectorstore:
|
||||
oracle:
|
||||
index-type: IVF
|
||||
distance-type: COSINE
|
||||
dimensions: 1536
|
||||
----
|
||||
|
||||
TIP: Check the list of xref:#oracle-properties[configuration parameters] to learn about the default values and configuration options.
|
||||
|
||||
Now you can Auto-wire the `OracleVectorStore` in your application and use it:
|
||||
|
||||
[source,java]
|
||||
----
|
||||
@Autowired VectorStore vectorStore;
|
||||
|
||||
// ...
|
||||
|
||||
List<Document> documents = List.of(
|
||||
new Document("Spring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!!", Map.of("meta1", "meta1")),
|
||||
new Document("The World is Big and Salvation Lurks Around the Corner"),
|
||||
new Document("You walk forward facing the past and you turn back toward the future.", Map.of("meta2", "meta2")));
|
||||
|
||||
// Add the documents to Oracle Vector Store
|
||||
vectorStore.add(List.of(document));
|
||||
|
||||
// Retrieve documents similar to a query
|
||||
List<Document> results = vectorStore.similaritySearch(SearchRequest.query("Spring").withTopK(5));
|
||||
----
|
||||
|
||||
[[oracle-properties]]
|
||||
=== Configuration properties
|
||||
|
||||
You can use the following properties in your Spring Boot configuration to customize the `OracleVectorStore`.
|
||||
|
||||
[cols="2,5,1"]
|
||||
|===
|
||||
|Property| Description | Default value
|
||||
|
||||
|`spring.ai.vectorstore.oracle.index-type`| Nearest neighbor search index type. Options are `NONE` - exact nearest neighbor search, `IVF` - Inverted Flat File index. It has faster build times and uses less memory than HNSW, but has lower query performance (in terms of speed-recall tradeoff). `HNSW` - creates a multilayer graph. It has slower build times and uses more memory than IVF, but has better query performance (in terms of speed-recall tradeoff). | NONE
|
||||
|`spring.ai.vectorstore.oracle.distance-type`| Search distance type among `COSINE` (default), `DOT`, `EUCLIDEAN`, `EUCLIDEAN_SQUARED`, and `MANHATTAN`.
|
||||
|
||||
NOTE: If vectors are normalized, you can use `DOT` or `COSINE` for best performance.| COSINE
|
||||
|`spring.ai.vectorstore.oracle.forced-normalization`| Allows enabling vector normalization (if true) before insertion and for similarity search.
|
||||
|
||||
CAUTION: Setting this to true is a requirement to allow for xref:api/vectordbs.adoc#api-overview[search request similarity threshold].
|
||||
|
||||
NOTE: If vectors are normalized, you can use `DOT` or `COSINE` for best performance. | false
|
||||
|`spring.ai.vectorstore.oracle.dimensions`| Embeddings dimension. If not specified explicitly the OracleVectorStore will allow the maximum: 65535. Dimensions are set to the embedding column on table creation. If you change the dimensions your would have to re-create the table as well. | 65535
|
||||
|`spring.ai.vectorstore.oracle.remove-existing-vector-store-table` | Drops the existing table on start up. | false
|
||||
|`spring.ai.vectorstore.oracle.initialize-schema` | Whether to initialize the required schema. | false
|
||||
|`spring.ai.vectorstore.oracle.search-accuracy` | Denote the requested accuracy target in the presence of index. Disabled by default. You need to provide an integer in the range [1,100] to override the default index accuracy (95). Using lower accuracy provides approximate similarity search trading off speed versus accuracy. | -1 (`DEFAULT_SEARCH_ACCURACY`)
|
||||
|
||||
|===
|
||||
|
||||
== Metadata filtering
|
||||
|
||||
You can leverage the generic, portable link:https://docs.spring.io/spring-ai/reference/api/vectordbs.html#_metadata_filters[metadata filters] with the `OracleVectorStore`.
|
||||
|
||||
For example, you can use either the text expression language:
|
||||
|
||||
[source,java]
|
||||
----
|
||||
vectorStore.similaritySearch(
|
||||
SearchRequest.defaults()
|
||||
.withQuery("The World")
|
||||
.withTopK(TOP_K)
|
||||
.withSimilarityThreshold(SIMILARITY_THRESHOLD)
|
||||
.withFilterExpression("author in ['john', 'jill'] && article_type == 'blog'"));
|
||||
----
|
||||
|
||||
or programmatically using the `Filter.Expression` DSL:
|
||||
|
||||
[source,java]
|
||||
----
|
||||
FilterExpressionBuilder b = new FilterExpressionBuilder();
|
||||
|
||||
vectorStore.similaritySearch(SearchRequest.defaults()
|
||||
.withQuery("The World")
|
||||
.withTopK(TOP_K)
|
||||
.withSimilarityThreshold(SIMILARITY_THRESHOLD)
|
||||
.withFilterExpression(b.and(
|
||||
b.in("author","john", "jill"),
|
||||
b.eq("article_type", "blog")).build()));
|
||||
----
|
||||
|
||||
NOTE: These filter expressions are converted into the equivalent `OracleVectorStore` filters.
|
||||
|
||||
== Manual Configuration
|
||||
|
||||
Instead of using the Spring Boot auto-configuration, you can manually configure the `OracleVectorStore`.
|
||||
For this you need to add the Oracle JDBC driver and `JdbcTemplate` auto-configuration dependencies to your project:
|
||||
|
||||
[source,xml]
|
||||
----
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-jdbc</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.oracle.database.jdbc</groupId>
|
||||
<artifactId>ojdbc11</artifactId>
|
||||
<scope>runtime</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-oracle-store</artifactId>
|
||||
</dependency>
|
||||
----
|
||||
|
||||
TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file.
|
||||
|
||||
To configure the `OracleVectorStore` in your application, you can use the following setup:
|
||||
|
||||
[source,java]
|
||||
----
|
||||
@Bean
|
||||
public VectorStore vectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel) {
|
||||
return new OracleVectorStore(jdbcTemplate, embeddingModel, true);
|
||||
}
|
||||
----
|
||||
|
||||
== Run Oracle Database 23ai locally
|
||||
|
||||
----
|
||||
docker run --rm --name oracle23ai -p 1521:1521 -e APP_USER=mlops -e APP_USER_PASSWORD=mlops -e ORACLE_PASSWORD=mlops gvenzl/oracle-free:23-slim
|
||||
----
|
||||
|
||||
You can then connect to the database using:
|
||||
|
||||
----
|
||||
sql mlops/mlops@localhost/freepdb1
|
||||
----
|
||||
|
||||
|
||||
@@ -15,7 +15,7 @@ Spring AI provides the following features:
|
||||
* Supported Model types are Chat, Text to Image, Audio Transcription, Text to Speech, and more on the way.
|
||||
* Portable API across AI providers for all models. Both synchronous and stream API options are supported. Dropping down to access model specific features is also supported.
|
||||
* Mapping of AI Model output to POJOs.
|
||||
* Support for all major Vector Database providers such as Apache Cassandra, Azure Vector Search, Chroma, Milvus, Neo4j, PostgreSQL/PGVector, PineCone, Qdrant, Redis, and Weaviate.
|
||||
* Support for all major Vector Database providers such as Apache Cassandra, Azure Vector Search, Chroma, Milvus, Neo4j, Oracle, PostgreSQL/PGVector, PineCone, Qdrant, Redis, and Weaviate.
|
||||
* Portable API across Vector Store providers, including a novel SQL-like metadata filter API that is also portable.
|
||||
* Function calling.
|
||||
* Spring Boot Auto Configuration and Starters for AI Models and Vector Stores.
|
||||
|
||||
@@ -101,6 +101,14 @@
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
|
||||
<!-- Oracle AI Vector Search Store -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-oracle-store</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
<optional>true</optional>
|
||||
</dependency>
|
||||
|
||||
<!-- PG Vector Store-->
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
|
||||
@@ -0,0 +1,48 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.springframework.ai.autoconfigure.vectorstore.oracle;
|
||||
|
||||
import org.springframework.ai.embedding.EmbeddingModel;
|
||||
import org.springframework.ai.vectorstore.OracleVectorStore;
|
||||
import org.springframework.boot.autoconfigure.AutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnClass;
|
||||
import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean;
|
||||
import org.springframework.boot.autoconfigure.jdbc.JdbcTemplateAutoConfiguration;
|
||||
import org.springframework.boot.context.properties.EnableConfigurationProperties;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
|
||||
import javax.sql.DataSource;
|
||||
|
||||
/**
|
||||
* @author Loïc Lefèvre
|
||||
*/
|
||||
@AutoConfiguration(after = JdbcTemplateAutoConfiguration.class)
|
||||
@ConditionalOnClass({ OracleVectorStore.class, DataSource.class, JdbcTemplate.class })
|
||||
@EnableConfigurationProperties(OracleAIVectorSearchStoreProperties.class)
|
||||
public class OracleAIVectorSearchStoreAutoConfiguration {
|
||||
|
||||
@Bean
|
||||
@ConditionalOnMissingBean
|
||||
public OracleVectorStore vectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel,
|
||||
OracleAIVectorSearchStoreProperties properties) {
|
||||
return new OracleVectorStore(jdbcTemplate, embeddingModel, properties.getTableName(), properties.getIndexType(),
|
||||
properties.getDistanceType(), properties.getDimensions(), properties.getSearchAccuracy(),
|
||||
properties.isInitializeSchema(), properties.isRemoveExistingVectorStoreTable(),
|
||||
properties.isForcedNormalization());
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,102 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.springframework.ai.autoconfigure.vectorstore.oracle;
|
||||
|
||||
import org.springframework.ai.autoconfigure.CommonVectorStoreProperties;
|
||||
import org.springframework.ai.vectorstore.OracleVectorStore;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
|
||||
import static org.springframework.ai.vectorstore.OracleVectorStore.DEFAULT_SEARCH_ACCURACY;
|
||||
|
||||
/**
|
||||
* @author Loïc Lefèvre
|
||||
*/
|
||||
@ConfigurationProperties(OracleAIVectorSearchStoreProperties.CONFIG_PREFIX)
|
||||
public class OracleAIVectorSearchStoreProperties extends CommonVectorStoreProperties {
|
||||
|
||||
public static final String CONFIG_PREFIX = "spring.ai.vectorstore.oracle";
|
||||
|
||||
private String tableName = OracleVectorStore.DEFAULT_TABLE_NAME;
|
||||
|
||||
private OracleVectorStore.OracleAIVectorSearchIndexType indexType = OracleVectorStore.DEFAULT_INDEX_TYPE;
|
||||
|
||||
private OracleVectorStore.OracleAIVectorSearchDistanceType distanceType = OracleVectorStore.DEFAULT_DISTANCE_TYPE;
|
||||
|
||||
private int dimensions = OracleVectorStore.DEFAULT_DIMENSIONS;
|
||||
|
||||
private boolean removeExistingVectorStoreTable;
|
||||
|
||||
private boolean forcedNormalization;
|
||||
|
||||
private int searchAccuracy = DEFAULT_SEARCH_ACCURACY;
|
||||
|
||||
public String getTableName() {
|
||||
return tableName;
|
||||
}
|
||||
|
||||
public void setTableName(String tableName) {
|
||||
this.tableName = tableName;
|
||||
}
|
||||
|
||||
public OracleVectorStore.OracleAIVectorSearchIndexType getIndexType() {
|
||||
return indexType;
|
||||
}
|
||||
|
||||
public void setIndexType(OracleVectorStore.OracleAIVectorSearchIndexType indexType) {
|
||||
this.indexType = indexType;
|
||||
}
|
||||
|
||||
public OracleVectorStore.OracleAIVectorSearchDistanceType getDistanceType() {
|
||||
return distanceType;
|
||||
}
|
||||
|
||||
public void setDistanceType(OracleVectorStore.OracleAIVectorSearchDistanceType distanceType) {
|
||||
this.distanceType = distanceType;
|
||||
}
|
||||
|
||||
public int getDimensions() {
|
||||
return dimensions;
|
||||
}
|
||||
|
||||
public void setDimensions(int dimensions) {
|
||||
this.dimensions = dimensions;
|
||||
}
|
||||
|
||||
public boolean isRemoveExistingVectorStoreTable() {
|
||||
return removeExistingVectorStoreTable;
|
||||
}
|
||||
|
||||
public void setRemoveExistingVectorStoreTable(boolean removeExistingVectorStoreTable) {
|
||||
this.removeExistingVectorStoreTable = removeExistingVectorStoreTable;
|
||||
}
|
||||
|
||||
public boolean isForcedNormalization() {
|
||||
return forcedNormalization;
|
||||
}
|
||||
|
||||
public void setForcedNormalization(boolean forcedNormalization) {
|
||||
this.forcedNormalization = forcedNormalization;
|
||||
}
|
||||
|
||||
public int getSearchAccuracy() {
|
||||
return searchAccuracy;
|
||||
}
|
||||
|
||||
public void setSearchAccuracy(int searchAccuracy) {
|
||||
this.searchAccuracy = searchAccuracy;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -15,6 +15,7 @@ org.springframework.ai.autoconfigure.bedrock.titan.BedrockTitanChatAutoConfigura
|
||||
org.springframework.ai.autoconfigure.bedrock.titan.BedrockTitanEmbeddingAutoConfiguration
|
||||
org.springframework.ai.autoconfigure.ollama.OllamaAutoConfiguration
|
||||
org.springframework.ai.autoconfigure.mistralai.MistralAiAutoConfiguration
|
||||
org.springframework.ai.autoconfigure.vectorstore.oracle.OracleAIVectorSearchStoreAutoConfiguration
|
||||
org.springframework.ai.autoconfigure.vectorstore.pgvector.PgVectorStoreAutoConfiguration
|
||||
org.springframework.ai.autoconfigure.vectorstore.pinecone.PineconeVectorStoreAutoConfiguration
|
||||
org.springframework.ai.autoconfigure.vectorstore.milvus.MilvusVectorStoreAutoConfiguration
|
||||
|
||||
@@ -0,0 +1,42 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
<artifactId>spring-ai-oracle-store-spring-boot-starter</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Spring AI Starter - Oracle</name>
|
||||
<description>Spring AI Oracle Vector Store Auto Configuration</description>
|
||||
<url>https://github.com/spring-projects/spring-ai</url>
|
||||
|
||||
<scm>
|
||||
<url>https://github.com/spring-projects/spring-ai</url>
|
||||
<connection>git://github.com/spring-projects/spring-ai.git</connection>
|
||||
<developerConnection>git@github.com:spring-projects/spring-ai.git</developerConnection>
|
||||
</scm>
|
||||
|
||||
<dependencies>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter</artifactId>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-spring-boot-autoconfigure</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-oracle-store</artifactId>
|
||||
<version>${project.parent.version}</version>
|
||||
</dependency>
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
1
vector-stores/spring-ai-oracle-store/README.md
Normal file
1
vector-stores/spring-ai-oracle-store/README.md
Normal file
@@ -0,0 +1 @@
|
||||
[Oracle AI Vector Search Documentation](https://docs.oracle.com/en/database/oracle/oracle-database/23/nfcoa/ai_vector_search.html)
|
||||
101
vector-stores/spring-ai-oracle-store/pom.xml
Normal file
101
vector-stores/spring-ai-oracle-store/pom.xml
Normal file
@@ -0,0 +1,101 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
<parent>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai</artifactId>
|
||||
<version>1.0.0-SNAPSHOT</version>
|
||||
<relativePath>../../pom.xml</relativePath>
|
||||
</parent>
|
||||
<artifactId>spring-ai-oracle-store</artifactId>
|
||||
<packaging>jar</packaging>
|
||||
<name>Spring AI Vector Store - Oracle</name>
|
||||
<description>AI Vector Search from Oracle Database 23ai+ as a Spring AI Vector Store</description>
|
||||
<url>https://github.com/spring-projects/spring-ai</url>
|
||||
|
||||
<scm>
|
||||
<url>https://github.com/spring-projects/spring-ai</url>
|
||||
<connection>git://github.com/spring-projects/spring-ai.git</connection>
|
||||
<developerConnection>git@github.com:spring-projects/spring-ai.git</developerConnection>
|
||||
</scm>
|
||||
|
||||
<dependencies>
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-core</artifactId>
|
||||
<version>${parent.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.slf4j</groupId>
|
||||
<artifactId>slf4j-api</artifactId>
|
||||
<version>2.0.13</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.oracle.database.jdbc</groupId>
|
||||
<artifactId>ojdbc11</artifactId>
|
||||
<version>${oracle.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.oracle.database.jdbc</groupId>
|
||||
<artifactId>ucp</artifactId>
|
||||
<version>${oracle.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>com.oracle.database.ha</groupId>
|
||||
<artifactId>simplefan</artifactId>
|
||||
<version>${oracle.version}</version>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework</groupId>
|
||||
<artifactId>spring-jdbc</artifactId>
|
||||
</dependency>
|
||||
|
||||
<!-- TESTING -->
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-transformers</artifactId>
|
||||
<version>${parent.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.ai</groupId>
|
||||
<artifactId>spring-ai-test</artifactId>
|
||||
<version>${parent.version}</version>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.springframework.boot</groupId>
|
||||
<artifactId>spring-boot-starter-test</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.testcontainers</groupId>
|
||||
<artifactId>testcontainers</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.testcontainers</groupId>
|
||||
<artifactId>oracle-free</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
<dependency>
|
||||
<groupId>org.testcontainers</groupId>
|
||||
<artifactId>junit-jupiter</artifactId>
|
||||
<scope>test</scope>
|
||||
</dependency>
|
||||
|
||||
</dependencies>
|
||||
|
||||
</project>
|
||||
@@ -0,0 +1,88 @@
|
||||
package org.springframework.ai.vectorstore;
|
||||
|
||||
import org.springframework.ai.vectorstore.filter.Filter;
|
||||
import org.springframework.ai.vectorstore.filter.converter.AbstractFilterExpressionConverter;
|
||||
|
||||
/**
|
||||
* @author Loïc Lefèvre
|
||||
* @see <a href=
|
||||
* "https://docs.oracle.com/en/database/oracle/oracle-database/23/adjsn/json-path-expressions.html#GUID-8656CAB9-C293-4A99-BB62-F38F3CFC4C13">JSON
|
||||
* Path Documentation</a>
|
||||
*/
|
||||
public class IsoSqlJsonPathFilterExpressionConverter extends AbstractFilterExpressionConverter {
|
||||
|
||||
@Override
|
||||
protected String convertOperand(final Filter.Operand operand) {
|
||||
final StringBuilder context = new StringBuilder();
|
||||
context.append("$?( ");
|
||||
this.convertOperand(operand, context);
|
||||
return context.append(" )").toString();
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doExpression(final Filter.Expression expression, final StringBuilder context) {
|
||||
if (expression.type() == Filter.ExpressionType.NIN) {
|
||||
context.append("!( ");
|
||||
this.convertOperand(expression.left(), context);
|
||||
context.append(" in ");
|
||||
this.convertOperand(expression.right(), context);
|
||||
context.append(" )");
|
||||
}
|
||||
else {
|
||||
this.convertOperand(expression.left(), context);
|
||||
context.append(getOperationSymbol(expression));
|
||||
this.convertOperand(expression.right(), context);
|
||||
}
|
||||
}
|
||||
|
||||
private String getOperationSymbol(final Filter.Expression exp) {
|
||||
switch (exp.type()) {
|
||||
case AND:
|
||||
return " && ";
|
||||
case OR:
|
||||
return " || ";
|
||||
case EQ:
|
||||
return " == ";
|
||||
case NE:
|
||||
return " != ";
|
||||
case LT:
|
||||
return " < ";
|
||||
case LTE:
|
||||
return " <= ";
|
||||
case GT:
|
||||
return " > ";
|
||||
case GTE:
|
||||
return " >= ";
|
||||
case IN:
|
||||
return " in ";
|
||||
default:
|
||||
throw new RuntimeException("Not supported expression type: " + exp.type());
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doStartValueRange(Filter.Value listValue, StringBuilder context) {
|
||||
context.append("( ");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doEndValueRange(Filter.Value listValue, StringBuilder context) {
|
||||
context.append(" )");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doKey(final Filter.Key key, final StringBuilder context) {
|
||||
context.append("@.").append(key.key());
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doStartGroup(final Filter.Group group, final StringBuilder context) {
|
||||
context.append("(");
|
||||
}
|
||||
|
||||
@Override
|
||||
protected void doEndGroup(final Filter.Group group, final StringBuilder context) {
|
||||
context.append(")");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,602 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
package org.springframework.ai.vectorstore;
|
||||
|
||||
import oracle.jdbc.OracleType;
|
||||
import oracle.sql.VECTOR;
|
||||
import oracle.sql.json.OracleJsonFactory;
|
||||
import oracle.sql.json.OracleJsonGenerator;
|
||||
import oracle.sql.json.OracleJsonObject;
|
||||
import oracle.sql.json.OracleJsonValue;
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.embedding.EmbeddingModel;
|
||||
import org.springframework.ai.vectorstore.filter.FilterExpressionConverter;
|
||||
import org.springframework.beans.factory.InitializingBean;
|
||||
import org.springframework.jdbc.core.BatchPreparedStatementSetter;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.jdbc.core.RowMapper;
|
||||
import org.springframework.util.StringUtils;
|
||||
|
||||
import java.io.ByteArrayOutputStream;
|
||||
import java.sql.PreparedStatement;
|
||||
import java.sql.ResultSet;
|
||||
import java.sql.SQLException;
|
||||
import java.sql.Statement;
|
||||
import java.sql.Types;
|
||||
import java.util.ArrayList;
|
||||
import java.util.HashMap;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.Optional;
|
||||
|
||||
import static org.springframework.ai.vectorstore.OracleVectorStore.OracleAIVectorSearchDistanceType.DOT;
|
||||
import static org.springframework.jdbc.core.StatementCreatorUtils.setParameterValue;
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* Integration of Oracle database 23ai as a Vector Store.
|
||||
* </p>
|
||||
* <p>
|
||||
* With the release 23ai (23.4), the Oracle database provides numerous features useful for
|
||||
* artificial intelligence such as Vectors, Similarity search, Vector indexes, ONNX
|
||||
* models...
|
||||
* </p>
|
||||
* <p>
|
||||
* This Spring AI Vector store supports the following features:
|
||||
* <ul>
|
||||
* <li>Vectors with unspecified or fixed dimensions</li>
|
||||
* <li>Distance type for similarity search (note that similarity threshold can be used
|
||||
* only with distance type COSINE and DOT when ingested vectors are normalized, see
|
||||
* forcedNormalization)</li>
|
||||
* <li>Vector indexes (use IVF as of 23.4)</li>
|
||||
* <li>Exact and Approximate similarity search</li>
|
||||
* <li>Filter expression as SQL/JSON Path expression evaluation</li>
|
||||
* </ul>
|
||||
*
|
||||
* @author Loïc Lefèvre
|
||||
*/
|
||||
public class OracleVectorStore implements VectorStore, InitializingBean {
|
||||
|
||||
private static final Logger logger = LoggerFactory.getLogger(OracleVectorStore.class);
|
||||
|
||||
public static final double SIMILARITY_THRESHOLD_EXACT_MATCH = 1.0d;
|
||||
|
||||
public enum OracleAIVectorSearchIndexType {
|
||||
|
||||
/**
|
||||
* Performs exact nearest neighbor search.
|
||||
*/
|
||||
NONE,
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* The default type of index created for an In-Memory Neighbor Graph vector index
|
||||
* is Hierarchical Navigable Small World (HNSW).
|
||||
* </p>
|
||||
*
|
||||
* <p>
|
||||
* With Navigable Small World (NSW), the idea is to build a proximity graph where
|
||||
* each vector in the graph connects to several others based on three
|
||||
* characteristics:
|
||||
* <ul>
|
||||
* <li>The distance between vectors</li>
|
||||
* <li>The maximum number of closest vector candidates considered at each step of
|
||||
* the search during insertion (EFCONSTRUCTION)</li>
|
||||
* <li>Within the maximum number of connections (NEIGHBORS) permitted per
|
||||
* vector</li>
|
||||
* </ul>
|
||||
*
|
||||
* @see <a href=
|
||||
* "https://docs.oracle.com/en/database/oracle/oracle-database/23/vecse/understand-hierarchical-navigable-small-world-indexes.html">Oracle
|
||||
* Database documentation</a>
|
||||
*/
|
||||
HNSW,
|
||||
|
||||
/**
|
||||
* <p>
|
||||
* The default type of index created for a Neighbor Partition vector index is
|
||||
* Inverted File Flat (IVF) vector index. The IVF index is a technique designed to
|
||||
* enhance search efficiency by narrowing the search area through the use of
|
||||
* neighbor partitions or clusters.
|
||||
* </p>
|
||||
*
|
||||
* * @see <a href=
|
||||
* "https://docs.oracle.com/en/database/oracle/oracle-database/23/vecse/understand-inverted-file-flat-vector-indexes.html">Oracle
|
||||
* Database documentation</a>
|
||||
*/
|
||||
IVF;
|
||||
|
||||
}
|
||||
|
||||
public enum OracleAIVectorSearchDistanceType {
|
||||
|
||||
/**
|
||||
* Default metric. It calculates the cosine distane between two vectors.
|
||||
*/
|
||||
COSINE,
|
||||
|
||||
/**
|
||||
* Also called the inner product, calculates the negated dot product of two
|
||||
* vectors.
|
||||
*/
|
||||
DOT,
|
||||
|
||||
/**
|
||||
* Also called L2_DISTANCE, calculates the Euclidean distance between two vectors.
|
||||
*/
|
||||
EUCLIDEAN,
|
||||
|
||||
/**
|
||||
* Also called L2_SQUARED is the Euclidean distance without taking the square
|
||||
* root.
|
||||
*/
|
||||
EUCLIDEAN_SQUARED,
|
||||
|
||||
/*
|
||||
* Calculates the hamming distance between two vectors. Requires INT8 element
|
||||
* type.
|
||||
*/
|
||||
// TODO: add HAMMING support,
|
||||
|
||||
/**
|
||||
* Also called L1_DISTANCE or taxicab distance, calculates the Manhattan distance.
|
||||
*/
|
||||
MANHATTAN
|
||||
|
||||
}
|
||||
|
||||
public static final String DEFAULT_TABLE_NAME = "SPRING_AI_VECTORS";
|
||||
|
||||
public static final OracleAIVectorSearchIndexType DEFAULT_INDEX_TYPE = OracleAIVectorSearchIndexType.IVF;
|
||||
|
||||
public static final OracleAIVectorSearchDistanceType DEFAULT_DISTANCE_TYPE = OracleAIVectorSearchDistanceType.COSINE;
|
||||
|
||||
public static final int DEFAULT_DIMENSIONS = -1;
|
||||
|
||||
public static final int DEFAULT_SEARCH_ACCURACY = -1;
|
||||
|
||||
private final JdbcTemplate jdbcTemplate;
|
||||
|
||||
private final EmbeddingModel embeddingModel;
|
||||
|
||||
private final boolean initializeSchema;
|
||||
|
||||
private final boolean removeExistingVectorStoreTable;
|
||||
|
||||
public final FilterExpressionConverter filterExpressionConverter = new IsoSqlJsonPathFilterExpressionConverter();
|
||||
|
||||
/**
|
||||
* Table name where vectors will be stored.
|
||||
*/
|
||||
private final String tableName;
|
||||
|
||||
/**
|
||||
* Index type used to index the vectors. It can impact performance and database memory
|
||||
* consumption.
|
||||
*/
|
||||
private final OracleAIVectorSearchIndexType indexType;
|
||||
|
||||
/**
|
||||
* Distance type to use for computing vector distances.
|
||||
*/
|
||||
private final OracleAIVectorSearchDistanceType distanceType;
|
||||
|
||||
/**
|
||||
* Expected number of dimensions for vectors. Enforcing vector dimensions is very
|
||||
* useful to ensure future vector distance computations will be relevant.
|
||||
*/
|
||||
private final int dimensions;
|
||||
|
||||
private final boolean forcedNormalization;
|
||||
|
||||
private final int searchAccuracy;
|
||||
|
||||
public OracleVectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel) {
|
||||
this(jdbcTemplate, embeddingModel, DEFAULT_TABLE_NAME, DEFAULT_INDEX_TYPE, DEFAULT_DISTANCE_TYPE,
|
||||
DEFAULT_DIMENSIONS, DEFAULT_SEARCH_ACCURACY, false, false, false);
|
||||
}
|
||||
|
||||
public OracleVectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel, boolean initializeSchema) {
|
||||
this(jdbcTemplate, embeddingModel, DEFAULT_TABLE_NAME, DEFAULT_INDEX_TYPE, DEFAULT_DISTANCE_TYPE,
|
||||
DEFAULT_DIMENSIONS, DEFAULT_SEARCH_ACCURACY, initializeSchema, false, false);
|
||||
}
|
||||
|
||||
public OracleVectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel, String tableName,
|
||||
OracleAIVectorSearchIndexType indexType, OracleAIVectorSearchDistanceType distanceType, int dimensions,
|
||||
int searchAccuracy, boolean initializeSchema, boolean removeExistingVectorStoreTable,
|
||||
boolean forcedNormalization) {
|
||||
if (dimensions != DEFAULT_DIMENSIONS) {
|
||||
if (dimensions <= 0) {
|
||||
throw new RuntimeException("Number of dimensions must be strictly positive");
|
||||
}
|
||||
if (dimensions > 65535) {
|
||||
throw new RuntimeException("Number of dimensions must be at most 65535");
|
||||
}
|
||||
}
|
||||
|
||||
if (searchAccuracy != DEFAULT_SEARCH_ACCURACY) {
|
||||
if (searchAccuracy < 1) {
|
||||
throw new RuntimeException("Search accuracy must be greater or equals to 1");
|
||||
}
|
||||
if (searchAccuracy > 100) {
|
||||
throw new RuntimeException("Search accuracy must be lower or equals to 100");
|
||||
}
|
||||
}
|
||||
|
||||
this.jdbcTemplate = jdbcTemplate;
|
||||
this.embeddingModel = embeddingModel;
|
||||
this.tableName = tableName;
|
||||
this.indexType = indexType;
|
||||
this.distanceType = distanceType;
|
||||
this.dimensions = dimensions;
|
||||
this.searchAccuracy = searchAccuracy;
|
||||
this.initializeSchema = initializeSchema;
|
||||
this.removeExistingVectorStoreTable = removeExistingVectorStoreTable;
|
||||
this.forcedNormalization = forcedNormalization;
|
||||
}
|
||||
|
||||
@Override
|
||||
public void add(final List<Document> documents) {
|
||||
this.jdbcTemplate.batchUpdate(getIngestStatement(), new BatchPreparedStatementSetter() {
|
||||
@Override
|
||||
public void setValues(PreparedStatement ps, int i) throws SQLException {
|
||||
final Document document = documents.get(i);
|
||||
final String content = document.getContent();
|
||||
final byte[] json = toJson(document.getMetadata());
|
||||
final VECTOR embeddingVector = toVECTOR(embeddingModel.embed(document));
|
||||
|
||||
setParameterValue(ps, 1, Types.VARCHAR, document.getId());
|
||||
setParameterValue(ps, 2, Types.VARCHAR, content);
|
||||
setParameterValue(ps, 3, OracleType.JSON.getVendorTypeNumber(), json);
|
||||
setParameterValue(ps, 4, OracleType.VECTOR.getVendorTypeNumber(), embeddingVector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBatchSize() {
|
||||
return documents.size();
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
private String getIngestStatement() {
|
||||
return String
|
||||
.format("""
|
||||
merge into %s target using (values(?, ?, ?, ?)) source (id, content, metadata, embedding) on (target.id = source.id)
|
||||
when matched then update set target.content = source.content, target.metadata = source.metadata, target.embedding = source.embedding
|
||||
when not matched then insert (target.id, target.content, target.metadata, target.embedding) values (source.id, source.content, source.metadata, source.embedding)""",
|
||||
tableName);
|
||||
}
|
||||
|
||||
private final OracleJsonFactory osonFactory = new OracleJsonFactory();
|
||||
|
||||
private final ByteArrayOutputStream out = new ByteArrayOutputStream();
|
||||
|
||||
/**
|
||||
* Bind binary JSON from the client.
|
||||
* @param m map of metadata
|
||||
* @return the binary JSON ready to be inserted
|
||||
*/
|
||||
private byte[] toJson(final Map<String, Object> m) {
|
||||
out.reset();
|
||||
try (OracleJsonGenerator gen = osonFactory.createJsonBinaryGenerator(out)) {
|
||||
gen.writeStartObject();
|
||||
for (String key : m.keySet()) {
|
||||
final Object o = m.get(key);
|
||||
if (o instanceof String) {
|
||||
gen.write(key, (String) o);
|
||||
}
|
||||
else if (o instanceof Integer) {
|
||||
gen.write(key, (Integer) o);
|
||||
}
|
||||
else if (o instanceof Float) {
|
||||
gen.write(key, (Float) o);
|
||||
}
|
||||
else if (o instanceof Double) {
|
||||
gen.write(key, (Double) o);
|
||||
}
|
||||
else if (o instanceof Boolean) {
|
||||
gen.write(key, (Boolean) o);
|
||||
}
|
||||
}
|
||||
gen.writeEnd();
|
||||
}
|
||||
|
||||
return out.toByteArray();
|
||||
}
|
||||
|
||||
/**
|
||||
* Converts a list of Double values into an Oracle VECTOR object ready to be inserted.
|
||||
* Optionally normalize the vector beforehand (see forcedNormalization).
|
||||
* @param doubleList
|
||||
* @return
|
||||
* @throws SQLException
|
||||
*/
|
||||
private VECTOR toVECTOR(final List<Double> doubleList) throws SQLException {
|
||||
final double[] doubles = new double[doubleList.size()];
|
||||
int i = 0;
|
||||
for (double d : doubleList) {
|
||||
doubles[i++] = d;
|
||||
}
|
||||
|
||||
if (forcedNormalization) {
|
||||
return VECTOR.ofFloat64Values(normalize(doubles));
|
||||
}
|
||||
|
||||
return VECTOR.ofFloat64Values(doubles);
|
||||
}
|
||||
|
||||
/**
|
||||
* Normalize a vector if requested.
|
||||
* @param v vector to normalize
|
||||
* @return the vector normalized
|
||||
*/
|
||||
private double[] normalize(final double[] v) {
|
||||
double squaredSum = 0d;
|
||||
|
||||
for (double e : v) {
|
||||
squaredSum += e * e;
|
||||
}
|
||||
|
||||
final double magnitude = Math.sqrt(squaredSum);
|
||||
|
||||
if (magnitude > 0) {
|
||||
final double multiplier = 1d / magnitude;
|
||||
final int length = v.length;
|
||||
for (int i = 0; i < length; i++) {
|
||||
v[i] *= multiplier;
|
||||
}
|
||||
}
|
||||
|
||||
return v;
|
||||
}
|
||||
|
||||
@Override
|
||||
public Optional<Boolean> delete(final List<String> idList) {
|
||||
final String sql = String.format("delete from %s where id=?", tableName);
|
||||
final int[] argTypes = { Types.VARCHAR };
|
||||
|
||||
final List<Object[]> batchArgs = new ArrayList<>();
|
||||
for (String id : idList) {
|
||||
batchArgs.add(new Object[] { id });
|
||||
}
|
||||
|
||||
final int[] deleteCounts = jdbcTemplate.batchUpdate(sql, batchArgs, argTypes);
|
||||
|
||||
int deleteCount = 0;
|
||||
for (int detailedResult : deleteCounts) {
|
||||
switch (detailedResult) {
|
||||
case Statement.EXECUTE_FAILED:
|
||||
break;
|
||||
case 1:
|
||||
case Statement.SUCCESS_NO_INFO:
|
||||
deleteCount++;
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
return Optional.of(deleteCount == idList.size());
|
||||
}
|
||||
|
||||
private static class DocumentRowMapper implements RowMapper<Document> {
|
||||
|
||||
@Override
|
||||
public Document mapRow(ResultSet rs, int rowNum) throws SQLException {
|
||||
final Map<String, Object> metadata = getMap(rs.getObject(3, OracleJsonValue.class));
|
||||
metadata.put("distance", rs.getDouble(5));
|
||||
|
||||
final Document document = new Document(rs.getString(1), rs.getString(2), metadata);
|
||||
final double[] embedding = rs.getObject(4, double[].class);
|
||||
document.setEmbedding(toDoubleList(embedding));
|
||||
return document;
|
||||
}
|
||||
|
||||
private Map<String, Object> getMap(OracleJsonValue value) {
|
||||
final Map<String, Object> result = new HashMap<>();
|
||||
|
||||
if (value != null) {
|
||||
final OracleJsonObject json = value.asJsonObject();
|
||||
for (String key : json.keySet()) {
|
||||
result.put(key, json.get(key));
|
||||
}
|
||||
}
|
||||
|
||||
return result;
|
||||
}
|
||||
|
||||
private List<Double> toDoubleList(final double[] embeddings) {
|
||||
final List<Double> result = new ArrayList<>(embeddings.length);
|
||||
for (double v : embeddings) {
|
||||
result.add(v);
|
||||
}
|
||||
return result;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@Override
|
||||
public List<Document> similaritySearch(SearchRequest request) {
|
||||
try {
|
||||
// From the provided query, generate a vector using the embedding model
|
||||
final VECTOR embeddingVector = toVECTOR(embeddingModel.embed(request.getQuery()));
|
||||
|
||||
if (logger.isDebugEnabled()) {
|
||||
this.jdbcTemplate.batchUpdate("insert into debug(embedding) values(?)",
|
||||
new BatchPreparedStatementSetter() {
|
||||
@Override
|
||||
public void setValues(PreparedStatement ps, int i) throws SQLException {
|
||||
setParameterValue(ps, 1, OracleType.VECTOR.getVendorTypeNumber(), embeddingVector);
|
||||
}
|
||||
|
||||
@Override
|
||||
public int getBatchSize() {
|
||||
return 1;
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
final String nativeFilterExpression = (request.getFilterExpression() != null)
|
||||
? this.filterExpressionConverter.convertExpression(request.getFilterExpression()) : "";
|
||||
|
||||
String jsonPathFilter = "";
|
||||
|
||||
if (request.getSimilarityThreshold() == SearchRequest.SIMILARITY_THRESHOLD_ACCEPT_ALL) {
|
||||
if (StringUtils.hasText(nativeFilterExpression)) {
|
||||
jsonPathFilter = String.format("where JSON_EXISTS( metadata, '%s' )\n", nativeFilterExpression);
|
||||
}
|
||||
|
||||
final String sql = searchAccuracy == DEFAULT_SEARCH_ACCURACY ? String.format("""
|
||||
select id, content, metadata, embedding, %sVECTOR_DISTANCE(embedding, ?, %s)%s as distance
|
||||
from %s
|
||||
%sorder by distance
|
||||
fetch first %d rows only""", distanceType == DOT ? "(1+" : "", distanceType.name(),
|
||||
distanceType == DOT ? ")/2" : "", tableName, jsonPathFilter, request.getTopK())
|
||||
: String.format(
|
||||
"""
|
||||
select id, content, metadata, embedding, %sVECTOR_DISTANCE(embedding, ?, %s)%s as distance
|
||||
from %s
|
||||
%sorder by distance
|
||||
fetch APPROXIMATE first %d rows only WITH TARGET ACCURACY %d""",
|
||||
distanceType == DOT ? "(1+" : "", distanceType.name(), distanceType == DOT ? ")/2" : "",
|
||||
tableName, jsonPathFilter, request.getTopK(), searchAccuracy);
|
||||
|
||||
logger.debug("SQL query: " + sql);
|
||||
|
||||
return this.jdbcTemplate.query(sql, new DocumentRowMapper(), embeddingVector);
|
||||
}
|
||||
else if (request.getSimilarityThreshold() == SIMILARITY_THRESHOLD_EXACT_MATCH) {
|
||||
if (StringUtils.hasText(nativeFilterExpression)) {
|
||||
jsonPathFilter = String.format("where JSON_EXISTS( metadata, '%s' )\n", nativeFilterExpression);
|
||||
}
|
||||
|
||||
final String sql = String.format("""
|
||||
select id, content, metadata, embedding, %sVECTOR_DISTANCE(embedding, ?, %s)%s as distance
|
||||
from %s
|
||||
%sorder by distance
|
||||
fetch EXACT first %d rows only""", distanceType == DOT ? "(1+" : "", distanceType.name(),
|
||||
distanceType == DOT ? ")/2" : "", tableName, jsonPathFilter, request.getTopK());
|
||||
|
||||
logger.debug("SQL query: " + sql);
|
||||
|
||||
return this.jdbcTemplate.query(sql, new DocumentRowMapper(), embeddingVector);
|
||||
}
|
||||
else {
|
||||
if (!forcedNormalization
|
||||
|| (distanceType != OracleAIVectorSearchDistanceType.COSINE && distanceType != DOT)) {
|
||||
throw new RuntimeException(
|
||||
"Similarity threshold filtering requires all vectors to be normalized, see the forcedNormalization parameter for this Vector store. Also only COSINE and DOT distance types are supported.");
|
||||
}
|
||||
|
||||
final double distance = distanceType == DOT ? (1d - request.getSimilarityThreshold()) * 2d - 1d
|
||||
: 1d - request.getSimilarityThreshold();
|
||||
|
||||
if (StringUtils.hasText(nativeFilterExpression)) {
|
||||
jsonPathFilter = String.format(" and JSON_EXISTS( metadata, '%s' )", nativeFilterExpression);
|
||||
}
|
||||
|
||||
final String sql = distanceType == DOT ? (searchAccuracy == DEFAULT_SEARCH_ACCURACY ? String.format("""
|
||||
select id, content, metadata, embedding, (1+VECTOR_DISTANCE(embedding, ?, DOT))/2 as distance
|
||||
from %s
|
||||
where VECTOR_DISTANCE(embedding, ?, DOT) <= ?%s
|
||||
order by distance
|
||||
fetch first %d rows only""", tableName, jsonPathFilter, request.getTopK()) : String.format("""
|
||||
select id, content, metadata, embedding, (1+VECTOR_DISTANCE(embedding, ?, DOT))/2 as distance
|
||||
from %s
|
||||
where VECTOR_DISTANCE(embedding, ?, DOT) <= ?%s
|
||||
order by distance
|
||||
fetch APPROXIMATE first %d rows only WITH TARGET ACCURACY %d""", tableName, jsonPathFilter,
|
||||
request.getTopK(), searchAccuracy)
|
||||
|
||||
) : (searchAccuracy == DEFAULT_SEARCH_ACCURACY ? String.format("""
|
||||
select id, content, metadata, embedding, VECTOR_DISTANCE(embedding, ?, COSINE) as distance
|
||||
from %s
|
||||
where VECTOR_DISTANCE(embedding, ?, COSINE) <= ?%s
|
||||
order by distance
|
||||
fetch first %d rows only""", tableName, jsonPathFilter, request.getTopK()) : String.format("""
|
||||
select id, content, metadata, embedding, VECTOR_DISTANCE(embedding, ?, COSINE) as distance
|
||||
from %s
|
||||
where VECTOR_DISTANCE(embedding, ?, COSINE) <= ?%s
|
||||
order by distance
|
||||
fetch APPROXIMATE first %d rows only WITH TARGET ACCURACY %d""", tableName, jsonPathFilter,
|
||||
request.getTopK(), searchAccuracy));
|
||||
|
||||
logger.debug("SQL query: " + sql);
|
||||
|
||||
return this.jdbcTemplate.query(sql, new DocumentRowMapper(), embeddingVector, embeddingVector,
|
||||
distance);
|
||||
}
|
||||
}
|
||||
catch (SQLException sqle) {
|
||||
throw new RuntimeException(sqle);
|
||||
}
|
||||
}
|
||||
|
||||
@Override
|
||||
public void afterPropertiesSet() throws Exception {
|
||||
if (this.initializeSchema) {
|
||||
// Remove existing VectorStoreTable
|
||||
if (this.removeExistingVectorStoreTable) {
|
||||
this.jdbcTemplate.execute(String.format("drop table if exists %s purge", tableName));
|
||||
}
|
||||
|
||||
this.jdbcTemplate.execute(String.format("""
|
||||
create table if not exists %s (
|
||||
id varchar2(36) default sys_guid() primary key,
|
||||
content clob not null,
|
||||
metadata json not null,
|
||||
embedding vector(%s,FLOAT64) annotations(Distance '%s', IndexType '%s')
|
||||
)""", tableName, dimensions == DEFAULT_DIMENSIONS ? "*" : String.valueOf(dimensions),
|
||||
distanceType.name(), indexType.name()));
|
||||
|
||||
if (logger.isDebugEnabled()) {
|
||||
this.jdbcTemplate.execute(String.format("""
|
||||
create table if not exists debug (
|
||||
id varchar2(36) default sys_guid() primary key,
|
||||
embedding vector(%s,FLOAT64) annotations(Distance '%s')
|
||||
)""", dimensions == DEFAULT_DIMENSIONS ? "*" : String.valueOf(dimensions),
|
||||
distanceType.name()));
|
||||
}
|
||||
|
||||
switch (indexType) {
|
||||
case IVF:
|
||||
this.jdbcTemplate.execute(String.format("""
|
||||
create vector index if not exists vector_index_%s on %s (embedding)
|
||||
organization neighbor partitions
|
||||
distance %s
|
||||
with target accuracy %d
|
||||
parameters (type IVF, neighbor partitions 10)""", tableName, tableName,
|
||||
distanceType.name(), searchAccuracy == DEFAULT_SEARCH_ACCURACY ? 95 : searchAccuracy));
|
||||
break;
|
||||
|
||||
/*
|
||||
* TODO: Enable for 23.5 case HNSW:
|
||||
* this.jdbcTemplate.execute(String.format(""" create vector index if not
|
||||
* exists vector_index_%s on %s (embedding) organization inmemory neighbor
|
||||
* graph distance %s with target accuracy %d parameters (type HNSW,
|
||||
* neighbors 40, efconstruction 500)""", tableName, tableName,
|
||||
* distanceType.name(), searchAccuracy == DEFAULT_SEARCH_ACCURACY ? 95 :
|
||||
* searchAccuracy)); break;
|
||||
*/
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
public String getTableName() {
|
||||
return tableName;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,29 @@
|
||||
package org.springframework.ai.vectorstore;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.ai.vectorstore.filter.Filter;
|
||||
import org.springframework.ai.vectorstore.filter.FilterExpressionTextParser;
|
||||
|
||||
public class IsoSqlJsonPathFilterExpressionConverterTests {
|
||||
|
||||
@Test
|
||||
public void testNIN() {
|
||||
final Filter.Expression e = new FilterExpressionTextParser().parse("weather nin [\"windy\", \"rainy\"]");
|
||||
|
||||
final String jsonPathExpression = new IsoSqlJsonPathFilterExpressionConverter().convertExpression(e);
|
||||
|
||||
assertThat(jsonPathExpression).isEqualTo("$?( !( @.weather in ( \"windy\",\"rainy\" ) ) )");
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testNOT() {
|
||||
final Filter.Expression e = new FilterExpressionTextParser().parse("NOT( weather in [\"windy\", \"rainy\"] )");
|
||||
|
||||
final String jsonPathExpression = new IsoSqlJsonPathFilterExpressionConverter().convertExpression(e);
|
||||
|
||||
assertThat(jsonPathExpression).isEqualTo("$?( (!( @.weather in ( \"windy\",\"rainy\" ) )) )");
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,328 @@
|
||||
package org.springframework.ai.vectorstore;
|
||||
|
||||
import oracle.jdbc.pool.OracleDataSource;
|
||||
import org.junit.Assert;
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
import org.junit.jupiter.params.provider.CsvSource;
|
||||
import org.junit.jupiter.params.provider.ValueSource;
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.embedding.EmbeddingModel;
|
||||
import org.springframework.ai.transformers.TransformersEmbeddingModel;
|
||||
import org.springframework.ai.vectorstore.filter.FilterExpressionTextParser;
|
||||
import org.springframework.beans.factory.annotation.Value;
|
||||
import org.springframework.boot.SpringBootConfiguration;
|
||||
import org.springframework.boot.autoconfigure.EnableAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration;
|
||||
import org.springframework.boot.autoconfigure.jdbc.DataSourceProperties;
|
||||
import org.springframework.boot.context.properties.ConfigurationProperties;
|
||||
import org.springframework.boot.test.context.runner.ApplicationContextRunner;
|
||||
import org.springframework.context.ApplicationContext;
|
||||
import org.springframework.context.annotation.Bean;
|
||||
import org.springframework.context.annotation.Primary;
|
||||
import org.springframework.core.io.DefaultResourceLoader;
|
||||
import org.springframework.jdbc.core.JdbcTemplate;
|
||||
import org.springframework.util.CollectionUtils;
|
||||
import org.testcontainers.junit.jupiter.Container;
|
||||
import org.testcontainers.junit.jupiter.Testcontainers;
|
||||
import org.testcontainers.oracle.OracleContainer;
|
||||
import org.testcontainers.utility.MountableFile;
|
||||
|
||||
import javax.sql.DataSource;
|
||||
import java.io.IOException;
|
||||
import java.nio.charset.StandardCharsets;
|
||||
import java.util.Collections;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
import java.util.UUID;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.springframework.ai.vectorstore.OracleVectorStore.DEFAULT_SEARCH_ACCURACY;
|
||||
|
||||
@Testcontainers
|
||||
public class OracleVectorStoreIT {
|
||||
|
||||
@Container
|
||||
static OracleContainer oracle23aiContainer = new OracleContainer("gvenzl/oracle-free:23-slim")
|
||||
.withCopyFileToContainer(MountableFile.forClasspathResource("/initialize.sql"),
|
||||
"/container-entrypoint-initdb.d/initialize.sql");
|
||||
|
||||
final List<Document> documents = List.of(
|
||||
new Document(getText("classpath:/test/data/spring.ai.txt"), Map.of("meta1", "meta1")),
|
||||
new Document(getText("classpath:/test/data/time.shelter.txt")),
|
||||
new Document(getText("classpath:/test/data/great.depression.txt"), Map.of("meta2", "meta2")));
|
||||
|
||||
public static String getText(final String uri) {
|
||||
try {
|
||||
return new DefaultResourceLoader().getResource(uri).getContentAsString(StandardCharsets.UTF_8);
|
||||
}
|
||||
catch (IOException e) {
|
||||
throw new RuntimeException(e);
|
||||
}
|
||||
}
|
||||
|
||||
private final ApplicationContextRunner contextRunner = new ApplicationContextRunner()
|
||||
.withUserConfiguration(TestClient.class)
|
||||
.withPropertyValues("test.spring.ai.vectorstore.oracle.distanceType=COSINE_DISTANCE",
|
||||
"test.spring.ai.vectorstore.oracle.dimensions=384",
|
||||
// JdbcTemplate configuration
|
||||
String.format("app.datasource.url=%s", oracle23aiContainer.getJdbcUrl()),
|
||||
String.format("app.datasource.username=%s", oracle23aiContainer.getUsername()),
|
||||
String.format("app.datasource.password=%s", oracle23aiContainer.getPassword()),
|
||||
"app.datasource.type=oracle.jdbc.pool.OracleDataSource");
|
||||
|
||||
@SpringBootConfiguration
|
||||
@EnableAutoConfiguration(exclude = { DataSourceAutoConfiguration.class })
|
||||
public static class TestClient {
|
||||
|
||||
@Value("${test.spring.ai.vectorstore.oracle.distanceType}")
|
||||
OracleVectorStore.OracleAIVectorSearchDistanceType distanceType;
|
||||
|
||||
@Value("${test.spring.ai.vectorstore.oracle.searchAccuracy}")
|
||||
int searchAccuracy;
|
||||
|
||||
@Bean
|
||||
public VectorStore vectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel) {
|
||||
return new OracleVectorStore(jdbcTemplate, embeddingModel, OracleVectorStore.DEFAULT_TABLE_NAME,
|
||||
OracleVectorStore.OracleAIVectorSearchIndexType.IVF, distanceType, 384, searchAccuracy, true, true,
|
||||
true);
|
||||
}
|
||||
|
||||
@Bean
|
||||
public JdbcTemplate myJdbcTemplate(DataSource dataSource) {
|
||||
return new JdbcTemplate(dataSource);
|
||||
}
|
||||
|
||||
@Bean
|
||||
@Primary
|
||||
@ConfigurationProperties("app.datasource")
|
||||
public DataSourceProperties dataSourceProperties() {
|
||||
return new DataSourceProperties();
|
||||
}
|
||||
|
||||
@Bean
|
||||
public OracleDataSource dataSource(DataSourceProperties dataSourceProperties) {
|
||||
return dataSourceProperties.initializeDataSourceBuilder().type(OracleDataSource.class).build();
|
||||
}
|
||||
|
||||
@Bean
|
||||
public EmbeddingModel embeddingModel() {
|
||||
try {
|
||||
TransformersEmbeddingModel tem = new TransformersEmbeddingModel();
|
||||
tem.afterPropertiesSet();
|
||||
return tem;
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException("Failed initializing embedding model", e);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
private static void dropTable(ApplicationContext context, String tableName) {
|
||||
JdbcTemplate jdbcTemplate = context.getBean(JdbcTemplate.class);
|
||||
jdbcTemplate.execute("DROP TABLE IF EXISTS " + tableName + " PURGE");
|
||||
}
|
||||
|
||||
@ParameterizedTest(name = "{0} : {displayName} ")
|
||||
@ValueSource(strings = { "COSINE", "DOT", "EUCLIDEAN", "EUCLIDEAN_SQUARED", "MANHATTAN" })
|
||||
public void addAndSearch(String distanceType) {
|
||||
contextRunner.withPropertyValues("test.spring.ai.vectorstore.oracle.distanceType=" + distanceType)
|
||||
.withPropertyValues("test.spring.ai.vectorstore.oracle.searchAccuracy=" + DEFAULT_SEARCH_ACCURACY)
|
||||
.run(context -> {
|
||||
|
||||
VectorStore vectorStore = context.getBean(VectorStore.class);
|
||||
|
||||
vectorStore.add(documents);
|
||||
|
||||
List<Document> results = vectorStore
|
||||
.similaritySearch(SearchRequest.query("What is Great Depression").withTopK(1));
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
Document resultDoc = results.get(0);
|
||||
assertThat(resultDoc.getId()).isEqualTo(documents.get(2).getId());
|
||||
assertThat(resultDoc.getMetadata()).containsKeys("meta2", "distance");
|
||||
|
||||
// Remove all documents from the store
|
||||
vectorStore.delete(documents.stream().map(doc -> doc.getId()).toList());
|
||||
|
||||
List<Document> results2 = vectorStore
|
||||
.similaritySearch(SearchRequest.query("Great Depression").withTopK(1));
|
||||
assertThat(results2).hasSize(0);
|
||||
|
||||
dropTable(context, ((OracleVectorStore) vectorStore).getTableName());
|
||||
});
|
||||
}
|
||||
|
||||
@ParameterizedTest(name = "Distance {0}, search accuracy {1} : {displayName} ")
|
||||
@CsvSource({ "COSINE,-1", "DOT,-1", "EUCLIDEAN,-1", "EUCLIDEAN_SQUARED,-1", "MANHATTAN,-1", "COSINE,75", "DOT,80",
|
||||
"EUCLIDEAN,60", "EUCLIDEAN_SQUARED,30", "MANHATTAN,42" })
|
||||
public void searchWithFilters(String distanceType, int searchAccuracy) {
|
||||
contextRunner.withPropertyValues("test.spring.ai.vectorstore.oracle.distanceType=" + distanceType)
|
||||
.withPropertyValues("test.spring.ai.vectorstore.oracle.searchAccuracy=" + searchAccuracy)
|
||||
.run(context -> {
|
||||
|
||||
VectorStore vectorStore = context.getBean(VectorStore.class);
|
||||
|
||||
var bgDocument = new Document("The World is Big and Salvation Lurks Around the Corner",
|
||||
Map.of("country", "BG", "year", 2020, "foo bar 1", "bar.foo"));
|
||||
var nlDocument = new Document("The World is Big and Salvation Lurks Around the Corner",
|
||||
Map.of("country", "NL"));
|
||||
var bgDocument2 = new Document("The World is Big and Salvation Lurks Around the Corner",
|
||||
Map.of("country", "BG", "year", 2023));
|
||||
|
||||
vectorStore.add(List.of(bgDocument, nlDocument, bgDocument2));
|
||||
|
||||
SearchRequest searchRequest = SearchRequest.query("The World").withTopK(5).withSimilarityThresholdAll();
|
||||
|
||||
List<Document> results = vectorStore.similaritySearch(searchRequest);
|
||||
|
||||
assertThat(results).hasSize(3);
|
||||
|
||||
results = vectorStore.similaritySearch(searchRequest.withFilterExpression("country == 'NL'"));
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).getId()).isEqualTo(nlDocument.getId());
|
||||
|
||||
results = vectorStore.similaritySearch(searchRequest.withFilterExpression("country == 'BG'"));
|
||||
|
||||
assertThat(results).hasSize(2);
|
||||
assertThat(results.get(0).getId()).isIn(bgDocument.getId(), bgDocument2.getId());
|
||||
assertThat(results.get(1).getId()).isIn(bgDocument.getId(), bgDocument2.getId());
|
||||
|
||||
results = vectorStore
|
||||
.similaritySearch(searchRequest.withFilterExpression("country == 'BG' && year == 2020"));
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).getId()).isEqualTo(bgDocument.getId());
|
||||
|
||||
results = vectorStore.similaritySearch(
|
||||
searchRequest.withFilterExpression("(country == 'BG' && year == 2020) || (country == 'NL')"));
|
||||
|
||||
assertThat(results).hasSize(2);
|
||||
assertThat(results.get(0).getId()).isIn(bgDocument.getId(), nlDocument.getId());
|
||||
assertThat(results.get(1).getId()).isIn(bgDocument.getId(), nlDocument.getId());
|
||||
|
||||
results = vectorStore.similaritySearch(searchRequest
|
||||
.withFilterExpression("NOT((country == 'BG' && year == 2020) || (country == 'NL'))"));
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).getId()).isEqualTo(bgDocument2.getId());
|
||||
|
||||
results = vectorStore.similaritySearch(SearchRequest.query("The World")
|
||||
.withTopK(5)
|
||||
.withSimilarityThresholdAll()
|
||||
.withFilterExpression("\"foo bar 1\" == 'bar.foo'"));
|
||||
assertThat(results).hasSize(1);
|
||||
assertThat(results.get(0).getId()).isEqualTo(bgDocument.getId());
|
||||
|
||||
try {
|
||||
vectorStore.similaritySearch(searchRequest.withFilterExpression("country == NL"));
|
||||
Assert.fail("Invalid filter expression should have been cached!");
|
||||
}
|
||||
catch (FilterExpressionTextParser.FilterExpressionParseException e) {
|
||||
assertThat(e.getMessage()).contains("Line: 1:17, Error: no viable alternative at input 'NL'");
|
||||
}
|
||||
|
||||
// Remove all documents from the store
|
||||
dropTable(context, ((OracleVectorStore) vectorStore).getTableName());
|
||||
});
|
||||
}
|
||||
|
||||
@ParameterizedTest(name = "{0} : {displayName} ")
|
||||
@ValueSource(strings = { "COSINE", "DOT", "EUCLIDEAN", "EUCLIDEAN_SQUARED", "MANHATTAN" })
|
||||
public void documentUpdate(String distanceType) {
|
||||
contextRunner.withPropertyValues("test.spring.ai.vectorstore.oracle.distanceType=" + distanceType)
|
||||
.withPropertyValues("test.spring.ai.vectorstore.oracle.searchAccuracy=" + DEFAULT_SEARCH_ACCURACY)
|
||||
.run(context -> {
|
||||
VectorStore vectorStore = context.getBean(VectorStore.class);
|
||||
|
||||
Document document = new Document(UUID.randomUUID().toString(), "Spring AI rocks!!",
|
||||
Collections.singletonMap("meta1", "meta1"));
|
||||
|
||||
vectorStore.add(List.of(document));
|
||||
|
||||
List<Document> results = vectorStore.similaritySearch(SearchRequest.query("Spring").withTopK(5));
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
Document resultDoc = results.get(0);
|
||||
assertThat(resultDoc.getId()).isEqualTo(document.getId());
|
||||
|
||||
assertThat(resultDoc.getContent()).isEqualTo("Spring AI rocks!!");
|
||||
assertThat(resultDoc.getMetadata()).containsKeys("meta1", "distance");
|
||||
|
||||
Document sameIdDocument = new Document(document.getId(),
|
||||
"The World is Big and Salvation Lurks Around the Corner",
|
||||
Collections.singletonMap("meta2", "meta2"));
|
||||
|
||||
vectorStore.add(List.of(sameIdDocument));
|
||||
|
||||
results = vectorStore.similaritySearch(SearchRequest.query("FooBar").withTopK(5));
|
||||
assertThat(results).hasSize(1);
|
||||
resultDoc = results.get(0);
|
||||
assertThat(resultDoc.getId()).isEqualTo(document.getId());
|
||||
assertThat(resultDoc.getContent()).isEqualTo("The World is Big and Salvation Lurks Around the Corner");
|
||||
assertThat(resultDoc.getMetadata()).containsKeys("meta2", "distance");
|
||||
|
||||
dropTable(context, ((OracleVectorStore) vectorStore).getTableName());
|
||||
});
|
||||
}
|
||||
|
||||
@ParameterizedTest(name = "{0} : {displayName} ")
|
||||
@ValueSource(strings = { "COSINE", "DOT" })
|
||||
public void searchWithThreshold(String distanceType) {
|
||||
contextRunner.withPropertyValues("test.spring.ai.vectorstore.oracle.distanceType=" + distanceType)
|
||||
.withPropertyValues("test.spring.ai.vectorstore.oracle.searchAccuracy=" + DEFAULT_SEARCH_ACCURACY)
|
||||
.run(context -> {
|
||||
|
||||
VectorStore vectorStore = context.getBean(VectorStore.class);
|
||||
|
||||
vectorStore.add(documents);
|
||||
|
||||
List<Document> fullResult = vectorStore
|
||||
.similaritySearch(SearchRequest.query("Time Shelter").withTopK(5).withSimilarityThresholdAll());
|
||||
|
||||
assertThat(fullResult).hasSize(3);
|
||||
|
||||
assertThat(isSortedByDistance(fullResult)).isTrue();
|
||||
|
||||
List<Double> distances = fullResult.stream()
|
||||
.map(doc -> (Double) doc.getMetadata().get("distance"))
|
||||
.toList();
|
||||
|
||||
double threshold = (distances.get(0) + distances.get(1)) / 2d;
|
||||
|
||||
List<Document> results = vectorStore.similaritySearch(
|
||||
SearchRequest.query("Time Shelter").withTopK(5).withSimilarityThreshold(1d - threshold));
|
||||
|
||||
assertThat(results).hasSize(1);
|
||||
Document resultDoc = results.get(0);
|
||||
assertThat(resultDoc.getId()).isEqualTo(documents.get(1).getId());
|
||||
|
||||
dropTable(context, ((OracleVectorStore) vectorStore).getTableName());
|
||||
});
|
||||
}
|
||||
|
||||
private static boolean isSortedByDistance(final List<Document> documents) {
|
||||
final List<Double> distances = documents.stream()
|
||||
.map(doc -> (Double) doc.getMetadata().get("distance"))
|
||||
.toList();
|
||||
|
||||
if (CollectionUtils.isEmpty(distances) || distances.size() == 1) {
|
||||
return true;
|
||||
}
|
||||
|
||||
Iterator<Double> iter = distances.iterator();
|
||||
Double current;
|
||||
Double previous = iter.next();
|
||||
while (iter.hasNext()) {
|
||||
current = iter.next();
|
||||
if (previous > current) {
|
||||
return false;
|
||||
}
|
||||
previous = current;
|
||||
}
|
||||
return true;
|
||||
}
|
||||
|
||||
}
|
||||
@@ -0,0 +1,10 @@
|
||||
-- Exit on any errors
|
||||
WHENEVER SQLERROR EXIT SQL.SQLCODE
|
||||
|
||||
-- Configure the size of the Vector Pool to 1 GiB.
|
||||
ALTER SYSTEM SET vector_memory_size=1G SCOPE=SPFILE;
|
||||
|
||||
SHUTDOWN ABORT;
|
||||
STARTUP;
|
||||
|
||||
exit;
|
||||
Reference in New Issue
Block a user