From 41cf693bc03b8551900c0c8b9576c198b1c121f9 Mon Sep 17 00:00:00 2001 From: LLEFEVRE Date: Thu, 30 May 2024 17:08:05 +0200 Subject: [PATCH] Add Support for Oracle 23ai as vector database MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - add OracleVectorStore with metadata filter expression support. - add ITs using oracle-free-slim testcontainers. - add auto-configuration and boot starter. - add adoc documentation. - Adjust javadoc references. Resolves #703 Co-authored-by: Eddú Meléndez Gonzales --- README.md | 2 +- pom.xml | 5 +- spring-ai-bom/pom.xml | 12 + .../src/main/antora/modules/ROOT/nav.adoc | 1 + .../modules/ROOT/pages/api/vectordbs.adoc | 2 + .../ROOT/pages/api/vectordbs/oracle.adoc | 203 ++++++ .../main/antora/modules/ROOT/pages/index.adoc | 2 +- spring-ai-spring-boot-autoconfigure/pom.xml | 8 + ...eAIVectorSearchStoreAutoConfiguration.java | 48 ++ .../OracleAIVectorSearchStoreProperties.java | 102 +++ ...ot.autoconfigure.AutoConfiguration.imports | 1 + .../spring-ai-starter-oracle-store/pom.xml | 42 ++ .../spring-ai-oracle-store/README.md | 1 + vector-stores/spring-ai-oracle-store/pom.xml | 101 +++ ...OSQLJSONPathFilterExpressionConverter.java | 88 +++ .../ai/vectorstore/OracleVectorStore.java | 602 ++++++++++++++++++ ...sonPathFilterExpressionConverterTests.java | 29 + .../ai/vectorstore/OracleVectorStoreIT.java | 328 ++++++++++ .../src/test/resources/initialize.sql | 10 + 19 files changed, 1584 insertions(+), 3 deletions(-) create mode 100644 spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/oracle.adoc create mode 100644 spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/oracle/OracleAIVectorSearchStoreAutoConfiguration.java create mode 100644 spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/oracle/OracleAIVectorSearchStoreProperties.java create mode 100644 spring-ai-spring-boot-starters/spring-ai-starter-oracle-store/pom.xml create mode 100644 vector-stores/spring-ai-oracle-store/README.md create mode 100644 vector-stores/spring-ai-oracle-store/pom.xml create mode 100644 vector-stores/spring-ai-oracle-store/src/main/java/org/springframework/ai/vectorstore/ISOSQLJSONPathFilterExpressionConverter.java create mode 100644 vector-stores/spring-ai-oracle-store/src/main/java/org/springframework/ai/vectorstore/OracleVectorStore.java create mode 100644 vector-stores/spring-ai-oracle-store/src/test/java/org/springframework/ai/vectorstore/IsoSqlJsonPathFilterExpressionConverterTests.java create mode 100644 vector-stores/spring-ai-oracle-store/src/test/java/org/springframework/ai/vectorstore/OracleVectorStoreIT.java create mode 100644 vector-stores/spring-ai-oracle-store/src/test/resources/initialize.sql diff --git a/README.md b/README.md index 38cd1dd85..cfe121838 100644 --- a/README.md +++ b/README.md @@ -163,7 +163,7 @@ Though the `DocumentWriter` interface isn't exclusively for Vector Database writ **Vector Stores:** Vector Databases are instrumental in incorporating your data with AI models. They ascertain which document sections the AI should use for generating responses. -Examples of Vector Databases include Chroma, Postgres, Pinecone, Qdrant, Weaviate, Mongo Atlas, and Redis. Spring AI's `VectorStore` abstraction permits effortless transitions between database implementations. +Examples of Vector Databases include Chroma, Oracle, Postgres, Pinecone, Qdrant, Weaviate, Mongo Atlas, and Redis. Spring AI's `VectorStore` abstraction permits effortless transitions between database implementations. diff --git a/pom.xml b/pom.xml index a42f747f5..abe272dc4 100644 --- a/pom.xml +++ b/pom.xml @@ -35,6 +35,7 @@ vector-stores/spring-ai-milvus-store vector-stores/spring-ai-mongodb-atlas-store vector-stores/spring-ai-neo4j-store + vector-stores/spring-ai-oracle-store vector-stores/spring-ai-pgvector-store vector-stores/spring-ai-pinecone-store vector-stores/spring-ai-qdrant-store @@ -49,6 +50,7 @@ spring-ai-spring-boot-starters/spring-ai-starter-milvus-store spring-ai-spring-boot-starters/spring-ai-starter-mongodb-atlas-store spring-ai-spring-boot-starters/spring-ai-starter-neo4j-store + spring-ai-spring-boot-starters/spring-ai-starter-oracle-store spring-ai-spring-boot-starters/spring-ai-starter-pgvector-store spring-ai-spring-boot-starters/spring-ai-starter-pinecone-store spring-ai-spring-boot-starters/spring-ai-starter-qdrant-store @@ -157,6 +159,7 @@ 3.0.1 0.1.4 2.20.11 + 23.4.0.24.05 42.7.2 2.3.4 0.8.0 @@ -181,7 +184,7 @@ 1.5.0 3.1.1 2.2.3 - 3.6.0 + 3.7.0 3.5.0 4.0.0-M13 diff --git a/spring-ai-bom/pom.xml b/spring-ai-bom/pom.xml index 60f2f11d0..19b569bc7 100644 --- a/spring-ai-bom/pom.xml +++ b/spring-ai-bom/pom.xml @@ -162,6 +162,12 @@ ${project.version} + + org.springframework.ai + spring-ai-oracle-store + ${project.version} + + org.springframework.ai spring-ai-pgvector-store @@ -296,6 +302,12 @@ ${project.version} + + org.springframework.ai + spring-ai-oracle-store-spring-boot-starter + ${project.version} + + org.springframework.ai spring-ai-pgvector-store-spring-boot-starter diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/nav.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/nav.adoc index b2bf14738..6e625eaa3 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/nav.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/nav.adoc @@ -62,6 +62,7 @@ *** xref:api/vectordbs/milvus.adoc[] *** xref:api/vectordbs/mongodb.adoc[] *** xref:api/vectordbs/neo4j.adoc[] +*** xref:api/vectordbs/oracle.adoc[Oracle DB AI Vector Search] *** xref:api/vectordbs/pgvector.adoc[] *** xref:api/vectordbs/pinecone.adoc[] *** xref:api/vectordbs/qdrant.adoc[] diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs.adoc index 2dc4d698b..9c9bfb8f3 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs.adoc @@ -18,6 +18,7 @@ The following sections describe the Spring AI interface for using multiple vecto The last section is intended to demystify the underlying approach of similarity searching in vector databases. +[[api-overview]] == API Overview This section serves as a guide to the `VectorStore` interface and its associated classes within the Spring AI framework. @@ -102,6 +103,7 @@ These are the available implementations of the `VectorStore` interface: * xref:api/vectordbs/milvus.adoc[Milvus Vector Store] - The https://milvus.io/[Milvus] vector store. * xref:api/vectordbs/mongodb.adoc[MongoDB Atlas Vector Store] - The https://www.mongodb.com/atlas/database[MongoDB Atlas] vector store. * xref:api/vectordbs/neo4j.adoc[Neo4j Vector Store] - The https://neo4j.com/[Neo4j] vector store. +* xref:api/vectordbs/oracle.adoc[OracleVectorStore] - The https://docs.oracle.com/en/database/oracle/oracle-database/23/vecse/overview-ai-vector-search.html[Oracle Database] vector store. * xref:api/vectordbs/pgvector.adoc[PgVectorStore] - The https://github.com/pgvector/pgvector[PostgreSQL/PGVector] vector store. * xref:api/vectordbs/pinecone.adoc[Pinecone Vector Store] - https://www.pinecone.io/[PineCone] vector store. * xref:api/vectordbs/qdrant.adoc[Qdrant Vector Store] - https://www.qdrant.tech/[Qdrant] vector store. diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/oracle.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/oracle.adoc new file mode 100644 index 000000000..4eabbb3db --- /dev/null +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/api/vectordbs/oracle.adoc @@ -0,0 +1,203 @@ += Oracle Database 23ai - AI Vector Search + +The link:https://docs.oracle.com/en/database/oracle/oracle-database/23/vecse/overview-ai-vector-search.html[AI Vector Search] capabilities of the Oracle Database 23ai (23.4+) are available as a Spring AI `VectorStore` to help you to store document embeddings and perform similarity searches. Of course, all other features are also available. + +TIP: The <> appendix shows how to start a database with a lightweight Docker container. + +== Auto-Configuration + +Start by adding the Oracle Vector Store boot starter dependency to your project: + +[source,xml] +---- + + org.springframework.ai + spring-ai-oracle-store-spring-boot-starter + +---- + +or to your Gradle `build.gradle` build file. + +[source,groovy] +---- +dependencies { + implementation 'org.springframework.ai:spring-ai-oracle-store-spring-boot-starter' +} +---- + +If you need this vector store to initialize the schema for you then you'll need to pass true for the `initializeSchema` boolean parameter in the appropriate constructor or by setting `...initialize-schema=true` in the `application.properties` file. + +NOTE: this is a breaking change! In earlier versions of Spring AI, this schema initialization happened by default. + +The Vector Store, also requires an `EmbeddingModel` instance to calculate embeddings for the documents. +You can pick one of the available xref:api/embeddings.adoc#available-implementations[EmbeddingModel Implementations]. + +For example to use the xref:api/embeddings/openai-embeddings.adoc[OpenAI EmbeddingModel] add the following dependency to your project: + +[source,xml] +---- + + org.springframework.ai + spring-ai-openai-spring-boot-starter + +---- + +or to your Gradle `build.gradle` build file. + +[source,groovy] +---- +dependencies { + implementation 'org.springframework.ai:spring-ai-openai-spring-boot-starter' +} +---- + +TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file. +Refer to the xref:getting-started.adoc#repositories[Repositories] section to add Milestone and/or Snapshot Repositories to your build file. + +To connect to and configure the `OracleVectorStore`, you need to provide access details for your database. +A simple configuration can either be provided via Spring Boot's `application.yml` + +[yml] +---- +spring: + datasource: + url: jdbc:oracle:thin:@//localhost:1521/freepdb1 + username: mlops + password: mlops + ai: + vectorstore: + oracle: + index-type: IVF + distance-type: COSINE + dimensions: 1536 +---- + +TIP: Check the list of xref:#oracle-properties[configuration parameters] to learn about the default values and configuration options. + +Now you can Auto-wire the `OracleVectorStore` in your application and use it: + +[source,java] +---- +@Autowired VectorStore vectorStore; + +// ... + +List documents = List.of( + new Document("Spring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!! Spring AI rocks!!", Map.of("meta1", "meta1")), + new Document("The World is Big and Salvation Lurks Around the Corner"), + new Document("You walk forward facing the past and you turn back toward the future.", Map.of("meta2", "meta2"))); + +// Add the documents to Oracle Vector Store +vectorStore.add(List.of(document)); + +// Retrieve documents similar to a query +List results = vectorStore.similaritySearch(SearchRequest.query("Spring").withTopK(5)); +---- + +[[oracle-properties]] +=== Configuration properties + +You can use the following properties in your Spring Boot configuration to customize the `OracleVectorStore`. + +[cols="2,5,1"] +|=== +|Property| Description | Default value + +|`spring.ai.vectorstore.oracle.index-type`| Nearest neighbor search index type. Options are `NONE` - exact nearest neighbor search, `IVF` - Inverted Flat File index. It has faster build times and uses less memory than HNSW, but has lower query performance (in terms of speed-recall tradeoff). `HNSW` - creates a multilayer graph. It has slower build times and uses more memory than IVF, but has better query performance (in terms of speed-recall tradeoff). | NONE +|`spring.ai.vectorstore.oracle.distance-type`| Search distance type among `COSINE` (default), `DOT`, `EUCLIDEAN`, `EUCLIDEAN_SQUARED`, and `MANHATTAN`. + +NOTE: If vectors are normalized, you can use `DOT` or `COSINE` for best performance.| COSINE +|`spring.ai.vectorstore.oracle.forced-normalization`| Allows enabling vector normalization (if true) before insertion and for similarity search. + +CAUTION: Setting this to true is a requirement to allow for xref:api/vectordbs.adoc#api-overview[search request similarity threshold]. + +NOTE: If vectors are normalized, you can use `DOT` or `COSINE` for best performance. | false +|`spring.ai.vectorstore.oracle.dimensions`| Embeddings dimension. If not specified explicitly the OracleVectorStore will allow the maximum: 65535. Dimensions are set to the embedding column on table creation. If you change the dimensions your would have to re-create the table as well. | 65535 +|`spring.ai.vectorstore.oracle.remove-existing-vector-store-table` | Drops the existing table on start up. | false +|`spring.ai.vectorstore.oracle.initialize-schema` | Whether to initialize the required schema. | false +|`spring.ai.vectorstore.oracle.search-accuracy` | Denote the requested accuracy target in the presence of index. Disabled by default. You need to provide an integer in the range [1,100] to override the default index accuracy (95). Using lower accuracy provides approximate similarity search trading off speed versus accuracy. | -1 (`DEFAULT_SEARCH_ACCURACY`) + +|=== + +== Metadata filtering + +You can leverage the generic, portable link:https://docs.spring.io/spring-ai/reference/api/vectordbs.html#_metadata_filters[metadata filters] with the `OracleVectorStore`. + +For example, you can use either the text expression language: + +[source,java] +---- +vectorStore.similaritySearch( + SearchRequest.defaults() + .withQuery("The World") + .withTopK(TOP_K) + .withSimilarityThreshold(SIMILARITY_THRESHOLD) + .withFilterExpression("author in ['john', 'jill'] && article_type == 'blog'")); +---- + +or programmatically using the `Filter.Expression` DSL: + +[source,java] +---- +FilterExpressionBuilder b = new FilterExpressionBuilder(); + +vectorStore.similaritySearch(SearchRequest.defaults() + .withQuery("The World") + .withTopK(TOP_K) + .withSimilarityThreshold(SIMILARITY_THRESHOLD) + .withFilterExpression(b.and( + b.in("author","john", "jill"), + b.eq("article_type", "blog")).build())); +---- + +NOTE: These filter expressions are converted into the equivalent `OracleVectorStore` filters. + +== Manual Configuration + +Instead of using the Spring Boot auto-configuration, you can manually configure the `OracleVectorStore`. +For this you need to add the Oracle JDBC driver and `JdbcTemplate` auto-configuration dependencies to your project: + +[source,xml] +---- + + org.springframework.boot + spring-boot-starter-jdbc + + + + com.oracle.database.jdbc + ojdbc11 + runtime + + + + org.springframework.ai + spring-ai-oracle-store + +---- + +TIP: Refer to the xref:getting-started.adoc#dependency-management[Dependency Management] section to add the Spring AI BOM to your build file. + +To configure the `OracleVectorStore` in your application, you can use the following setup: + +[source,java] +---- +@Bean +public VectorStore vectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel) { + return new OracleVectorStore(jdbcTemplate, embeddingModel, true); +} +---- + +== Run Oracle Database 23ai locally + +---- +docker run --rm --name oracle23ai -p 1521:1521 -e APP_USER=mlops -e APP_USER_PASSWORD=mlops -e ORACLE_PASSWORD=mlops gvenzl/oracle-free:23-slim +---- + +You can then connect to the database using: + +---- +sql mlops/mlops@localhost/freepdb1 +---- + + diff --git a/spring-ai-docs/src/main/antora/modules/ROOT/pages/index.adoc b/spring-ai-docs/src/main/antora/modules/ROOT/pages/index.adoc index 75c05e595..24aaa6a64 100644 --- a/spring-ai-docs/src/main/antora/modules/ROOT/pages/index.adoc +++ b/spring-ai-docs/src/main/antora/modules/ROOT/pages/index.adoc @@ -15,7 +15,7 @@ Spring AI provides the following features: * Supported Model types are Chat, Text to Image, Audio Transcription, Text to Speech, and more on the way. * Portable API across AI providers for all models. Both synchronous and stream API options are supported. Dropping down to access model specific features is also supported. * Mapping of AI Model output to POJOs. -* Support for all major Vector Database providers such as Apache Cassandra, Azure Vector Search, Chroma, Milvus, Neo4j, PostgreSQL/PGVector, PineCone, Qdrant, Redis, and Weaviate. +* Support for all major Vector Database providers such as Apache Cassandra, Azure Vector Search, Chroma, Milvus, Neo4j, Oracle, PostgreSQL/PGVector, PineCone, Qdrant, Redis, and Weaviate. * Portable API across Vector Store providers, including a novel SQL-like metadata filter API that is also portable. * Function calling. * Spring Boot Auto Configuration and Starters for AI Models and Vector Stores. diff --git a/spring-ai-spring-boot-autoconfigure/pom.xml b/spring-ai-spring-boot-autoconfigure/pom.xml index 84bd54a18..a37ec565d 100644 --- a/spring-ai-spring-boot-autoconfigure/pom.xml +++ b/spring-ai-spring-boot-autoconfigure/pom.xml @@ -101,6 +101,14 @@ true + + + org.springframework.ai + spring-ai-oracle-store + ${project.parent.version} + true + + org.springframework.ai diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/oracle/OracleAIVectorSearchStoreAutoConfiguration.java b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/oracle/OracleAIVectorSearchStoreAutoConfiguration.java new file mode 100644 index 000000000..540e4c1bf --- /dev/null +++ b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/oracle/OracleAIVectorSearchStoreAutoConfiguration.java @@ -0,0 +1,48 @@ +/* + * Copyright 2023 - 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.autoconfigure.vectorstore.oracle; + +import org.springframework.ai.embedding.EmbeddingModel; +import org.springframework.ai.vectorstore.OracleVectorStore; +import org.springframework.boot.autoconfigure.AutoConfiguration; +import org.springframework.boot.autoconfigure.condition.ConditionalOnClass; +import org.springframework.boot.autoconfigure.condition.ConditionalOnMissingBean; +import org.springframework.boot.autoconfigure.jdbc.JdbcTemplateAutoConfiguration; +import org.springframework.boot.context.properties.EnableConfigurationProperties; +import org.springframework.context.annotation.Bean; +import org.springframework.jdbc.core.JdbcTemplate; + +import javax.sql.DataSource; + +/** + * @author Loïc Lefèvre + */ +@AutoConfiguration(after = JdbcTemplateAutoConfiguration.class) +@ConditionalOnClass({ OracleVectorStore.class, DataSource.class, JdbcTemplate.class }) +@EnableConfigurationProperties(OracleAIVectorSearchStoreProperties.class) +public class OracleAIVectorSearchStoreAutoConfiguration { + + @Bean + @ConditionalOnMissingBean + public OracleVectorStore vectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel, + OracleAIVectorSearchStoreProperties properties) { + return new OracleVectorStore(jdbcTemplate, embeddingModel, properties.getTableName(), properties.getIndexType(), + properties.getDistanceType(), properties.getDimensions(), properties.getSearchAccuracy(), + properties.isInitializeSchema(), properties.isRemoveExistingVectorStoreTable(), + properties.isForcedNormalization()); + } + +} diff --git a/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/oracle/OracleAIVectorSearchStoreProperties.java b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/oracle/OracleAIVectorSearchStoreProperties.java new file mode 100644 index 000000000..67901d3d5 --- /dev/null +++ b/spring-ai-spring-boot-autoconfigure/src/main/java/org/springframework/ai/autoconfigure/vectorstore/oracle/OracleAIVectorSearchStoreProperties.java @@ -0,0 +1,102 @@ +/* + * Copyright 2023 - 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.autoconfigure.vectorstore.oracle; + +import org.springframework.ai.autoconfigure.CommonVectorStoreProperties; +import org.springframework.ai.vectorstore.OracleVectorStore; +import org.springframework.boot.context.properties.ConfigurationProperties; + +import static org.springframework.ai.vectorstore.OracleVectorStore.DEFAULT_SEARCH_ACCURACY; + +/** + * @author Loïc Lefèvre + */ +@ConfigurationProperties(OracleAIVectorSearchStoreProperties.CONFIG_PREFIX) +public class OracleAIVectorSearchStoreProperties extends CommonVectorStoreProperties { + + public static final String CONFIG_PREFIX = "spring.ai.vectorstore.oracle"; + + private String tableName = OracleVectorStore.DEFAULT_TABLE_NAME; + + private OracleVectorStore.OracleAIVectorSearchIndexType indexType = OracleVectorStore.DEFAULT_INDEX_TYPE; + + private OracleVectorStore.OracleAIVectorSearchDistanceType distanceType = OracleVectorStore.DEFAULT_DISTANCE_TYPE; + + private int dimensions = OracleVectorStore.DEFAULT_DIMENSIONS; + + private boolean removeExistingVectorStoreTable; + + private boolean forcedNormalization; + + private int searchAccuracy = DEFAULT_SEARCH_ACCURACY; + + public String getTableName() { + return tableName; + } + + public void setTableName(String tableName) { + this.tableName = tableName; + } + + public OracleVectorStore.OracleAIVectorSearchIndexType getIndexType() { + return indexType; + } + + public void setIndexType(OracleVectorStore.OracleAIVectorSearchIndexType indexType) { + this.indexType = indexType; + } + + public OracleVectorStore.OracleAIVectorSearchDistanceType getDistanceType() { + return distanceType; + } + + public void setDistanceType(OracleVectorStore.OracleAIVectorSearchDistanceType distanceType) { + this.distanceType = distanceType; + } + + public int getDimensions() { + return dimensions; + } + + public void setDimensions(int dimensions) { + this.dimensions = dimensions; + } + + public boolean isRemoveExistingVectorStoreTable() { + return removeExistingVectorStoreTable; + } + + public void setRemoveExistingVectorStoreTable(boolean removeExistingVectorStoreTable) { + this.removeExistingVectorStoreTable = removeExistingVectorStoreTable; + } + + public boolean isForcedNormalization() { + return forcedNormalization; + } + + public void setForcedNormalization(boolean forcedNormalization) { + this.forcedNormalization = forcedNormalization; + } + + public int getSearchAccuracy() { + return searchAccuracy; + } + + public void setSearchAccuracy(int searchAccuracy) { + this.searchAccuracy = searchAccuracy; + } + +} diff --git a/spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports b/spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports index c744be669..d2f5549df 100644 --- a/spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports +++ b/spring-ai-spring-boot-autoconfigure/src/main/resources/META-INF/spring/org.springframework.boot.autoconfigure.AutoConfiguration.imports @@ -15,6 +15,7 @@ org.springframework.ai.autoconfigure.bedrock.titan.BedrockTitanChatAutoConfigura org.springframework.ai.autoconfigure.bedrock.titan.BedrockTitanEmbeddingAutoConfiguration org.springframework.ai.autoconfigure.ollama.OllamaAutoConfiguration org.springframework.ai.autoconfigure.mistralai.MistralAiAutoConfiguration +org.springframework.ai.autoconfigure.vectorstore.oracle.OracleAIVectorSearchStoreAutoConfiguration org.springframework.ai.autoconfigure.vectorstore.pgvector.PgVectorStoreAutoConfiguration org.springframework.ai.autoconfigure.vectorstore.pinecone.PineconeVectorStoreAutoConfiguration org.springframework.ai.autoconfigure.vectorstore.milvus.MilvusVectorStoreAutoConfiguration diff --git a/spring-ai-spring-boot-starters/spring-ai-starter-oracle-store/pom.xml b/spring-ai-spring-boot-starters/spring-ai-starter-oracle-store/pom.xml new file mode 100644 index 000000000..62ef671aa --- /dev/null +++ b/spring-ai-spring-boot-starters/spring-ai-starter-oracle-store/pom.xml @@ -0,0 +1,42 @@ + + + 4.0.0 + + org.springframework.ai + spring-ai + 1.0.0-SNAPSHOT + ../../pom.xml + + spring-ai-oracle-store-spring-boot-starter + jar + Spring AI Starter - Oracle + Spring AI Oracle Vector Store Auto Configuration + https://github.com/spring-projects/spring-ai + + + https://github.com/spring-projects/spring-ai + git://github.com/spring-projects/spring-ai.git + git@github.com:spring-projects/spring-ai.git + + + + + + org.springframework.boot + spring-boot-starter + + + + org.springframework.ai + spring-ai-spring-boot-autoconfigure + ${project.parent.version} + + + + org.springframework.ai + spring-ai-oracle-store + ${project.parent.version} + + + + diff --git a/vector-stores/spring-ai-oracle-store/README.md b/vector-stores/spring-ai-oracle-store/README.md new file mode 100644 index 000000000..0484b1911 --- /dev/null +++ b/vector-stores/spring-ai-oracle-store/README.md @@ -0,0 +1 @@ +[Oracle AI Vector Search Documentation](https://docs.oracle.com/en/database/oracle/oracle-database/23/nfcoa/ai_vector_search.html) \ No newline at end of file diff --git a/vector-stores/spring-ai-oracle-store/pom.xml b/vector-stores/spring-ai-oracle-store/pom.xml new file mode 100644 index 000000000..816bcbfa8 --- /dev/null +++ b/vector-stores/spring-ai-oracle-store/pom.xml @@ -0,0 +1,101 @@ + + + 4.0.0 + + org.springframework.ai + spring-ai + 1.0.0-SNAPSHOT + ../../pom.xml + + spring-ai-oracle-store + jar + Spring AI Vector Store - Oracle + AI Vector Search from Oracle Database 23ai+ as a Spring AI Vector Store + https://github.com/spring-projects/spring-ai + + + https://github.com/spring-projects/spring-ai + git://github.com/spring-projects/spring-ai.git + git@github.com:spring-projects/spring-ai.git + + + + + org.springframework.ai + spring-ai-core + ${parent.version} + + + + org.slf4j + slf4j-api + 2.0.13 + + + + com.oracle.database.jdbc + ojdbc11 + ${oracle.version} + + + + com.oracle.database.jdbc + ucp + ${oracle.version} + + + + com.oracle.database.ha + simplefan + ${oracle.version} + + + + org.springframework + spring-jdbc + + + + + org.springframework.ai + spring-ai-transformers + ${parent.version} + test + + + + + org.springframework.ai + spring-ai-test + ${parent.version} + test + + + + org.springframework.boot + spring-boot-starter-test + test + + + + org.testcontainers + testcontainers + test + + + + org.testcontainers + oracle-free + test + + + + org.testcontainers + junit-jupiter + test + + + + + diff --git a/vector-stores/spring-ai-oracle-store/src/main/java/org/springframework/ai/vectorstore/ISOSQLJSONPathFilterExpressionConverter.java b/vector-stores/spring-ai-oracle-store/src/main/java/org/springframework/ai/vectorstore/ISOSQLJSONPathFilterExpressionConverter.java new file mode 100644 index 000000000..486928249 --- /dev/null +++ b/vector-stores/spring-ai-oracle-store/src/main/java/org/springframework/ai/vectorstore/ISOSQLJSONPathFilterExpressionConverter.java @@ -0,0 +1,88 @@ +package org.springframework.ai.vectorstore; + +import org.springframework.ai.vectorstore.filter.Filter; +import org.springframework.ai.vectorstore.filter.converter.AbstractFilterExpressionConverter; + +/** + * @author Loïc Lefèvre + * @see JSON + * Path Documentation + */ +public class IsoSqlJsonPathFilterExpressionConverter extends AbstractFilterExpressionConverter { + + @Override + protected String convertOperand(final Filter.Operand operand) { + final StringBuilder context = new StringBuilder(); + context.append("$?( "); + this.convertOperand(operand, context); + return context.append(" )").toString(); + } + + @Override + protected void doExpression(final Filter.Expression expression, final StringBuilder context) { + if (expression.type() == Filter.ExpressionType.NIN) { + context.append("!( "); + this.convertOperand(expression.left(), context); + context.append(" in "); + this.convertOperand(expression.right(), context); + context.append(" )"); + } + else { + this.convertOperand(expression.left(), context); + context.append(getOperationSymbol(expression)); + this.convertOperand(expression.right(), context); + } + } + + private String getOperationSymbol(final Filter.Expression exp) { + switch (exp.type()) { + case AND: + return " && "; + case OR: + return " || "; + case EQ: + return " == "; + case NE: + return " != "; + case LT: + return " < "; + case LTE: + return " <= "; + case GT: + return " > "; + case GTE: + return " >= "; + case IN: + return " in "; + default: + throw new RuntimeException("Not supported expression type: " + exp.type()); + } + } + + @Override + protected void doStartValueRange(Filter.Value listValue, StringBuilder context) { + context.append("( "); + } + + @Override + protected void doEndValueRange(Filter.Value listValue, StringBuilder context) { + context.append(" )"); + } + + @Override + protected void doKey(final Filter.Key key, final StringBuilder context) { + context.append("@.").append(key.key()); + } + + @Override + protected void doStartGroup(final Filter.Group group, final StringBuilder context) { + context.append("("); + } + + @Override + protected void doEndGroup(final Filter.Group group, final StringBuilder context) { + context.append(")"); + } + +} diff --git a/vector-stores/spring-ai-oracle-store/src/main/java/org/springframework/ai/vectorstore/OracleVectorStore.java b/vector-stores/spring-ai-oracle-store/src/main/java/org/springframework/ai/vectorstore/OracleVectorStore.java new file mode 100644 index 000000000..033ad048e --- /dev/null +++ b/vector-stores/spring-ai-oracle-store/src/main/java/org/springframework/ai/vectorstore/OracleVectorStore.java @@ -0,0 +1,602 @@ +/* + * Copyright 2023 - 2024 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.springframework.ai.vectorstore; + +import oracle.jdbc.OracleType; +import oracle.sql.VECTOR; +import oracle.sql.json.OracleJsonFactory; +import oracle.sql.json.OracleJsonGenerator; +import oracle.sql.json.OracleJsonObject; +import oracle.sql.json.OracleJsonValue; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.springframework.ai.document.Document; +import org.springframework.ai.embedding.EmbeddingModel; +import org.springframework.ai.vectorstore.filter.FilterExpressionConverter; +import org.springframework.beans.factory.InitializingBean; +import org.springframework.jdbc.core.BatchPreparedStatementSetter; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.jdbc.core.RowMapper; +import org.springframework.util.StringUtils; + +import java.io.ByteArrayOutputStream; +import java.sql.PreparedStatement; +import java.sql.ResultSet; +import java.sql.SQLException; +import java.sql.Statement; +import java.sql.Types; +import java.util.ArrayList; +import java.util.HashMap; +import java.util.List; +import java.util.Map; +import java.util.Optional; + +import static org.springframework.ai.vectorstore.OracleVectorStore.OracleAIVectorSearchDistanceType.DOT; +import static org.springframework.jdbc.core.StatementCreatorUtils.setParameterValue; + +/** + *

+ * Integration of Oracle database 23ai as a Vector Store. + *

+ *

+ * With the release 23ai (23.4), the Oracle database provides numerous features useful for + * artificial intelligence such as Vectors, Similarity search, Vector indexes, ONNX + * models... + *

+ *

+ * This Spring AI Vector store supports the following features: + *

    + *
  • Vectors with unspecified or fixed dimensions
  • + *
  • Distance type for similarity search (note that similarity threshold can be used + * only with distance type COSINE and DOT when ingested vectors are normalized, see + * forcedNormalization)
  • + *
  • Vector indexes (use IVF as of 23.4)
  • + *
  • Exact and Approximate similarity search
  • + *
  • Filter expression as SQL/JSON Path expression evaluation
  • + *
+ * + * @author Loïc Lefèvre + */ +public class OracleVectorStore implements VectorStore, InitializingBean { + + private static final Logger logger = LoggerFactory.getLogger(OracleVectorStore.class); + + public static final double SIMILARITY_THRESHOLD_EXACT_MATCH = 1.0d; + + public enum OracleAIVectorSearchIndexType { + + /** + * Performs exact nearest neighbor search. + */ + NONE, + + /** + *

+ * The default type of index created for an In-Memory Neighbor Graph vector index + * is Hierarchical Navigable Small World (HNSW). + *

+ * + *

+ * With Navigable Small World (NSW), the idea is to build a proximity graph where + * each vector in the graph connects to several others based on three + * characteristics: + *

    + *
  • The distance between vectors
  • + *
  • The maximum number of closest vector candidates considered at each step of + * the search during insertion (EFCONSTRUCTION)
  • + *
  • Within the maximum number of connections (NEIGHBORS) permitted per + * vector
  • + *
+ * + * @see Oracle + * Database documentation + */ + HNSW, + + /** + *

+ * The default type of index created for a Neighbor Partition vector index is + * Inverted File Flat (IVF) vector index. The IVF index is a technique designed to + * enhance search efficiency by narrowing the search area through the use of + * neighbor partitions or clusters. + *

+ * + * * @see Oracle + * Database documentation + */ + IVF; + + } + + public enum OracleAIVectorSearchDistanceType { + + /** + * Default metric. It calculates the cosine distane between two vectors. + */ + COSINE, + + /** + * Also called the inner product, calculates the negated dot product of two + * vectors. + */ + DOT, + + /** + * Also called L2_DISTANCE, calculates the Euclidean distance between two vectors. + */ + EUCLIDEAN, + + /** + * Also called L2_SQUARED is the Euclidean distance without taking the square + * root. + */ + EUCLIDEAN_SQUARED, + + /* + * Calculates the hamming distance between two vectors. Requires INT8 element + * type. + */ + // TODO: add HAMMING support, + + /** + * Also called L1_DISTANCE or taxicab distance, calculates the Manhattan distance. + */ + MANHATTAN + + } + + public static final String DEFAULT_TABLE_NAME = "SPRING_AI_VECTORS"; + + public static final OracleAIVectorSearchIndexType DEFAULT_INDEX_TYPE = OracleAIVectorSearchIndexType.IVF; + + public static final OracleAIVectorSearchDistanceType DEFAULT_DISTANCE_TYPE = OracleAIVectorSearchDistanceType.COSINE; + + public static final int DEFAULT_DIMENSIONS = -1; + + public static final int DEFAULT_SEARCH_ACCURACY = -1; + + private final JdbcTemplate jdbcTemplate; + + private final EmbeddingModel embeddingModel; + + private final boolean initializeSchema; + + private final boolean removeExistingVectorStoreTable; + + public final FilterExpressionConverter filterExpressionConverter = new IsoSqlJsonPathFilterExpressionConverter(); + + /** + * Table name where vectors will be stored. + */ + private final String tableName; + + /** + * Index type used to index the vectors. It can impact performance and database memory + * consumption. + */ + private final OracleAIVectorSearchIndexType indexType; + + /** + * Distance type to use for computing vector distances. + */ + private final OracleAIVectorSearchDistanceType distanceType; + + /** + * Expected number of dimensions for vectors. Enforcing vector dimensions is very + * useful to ensure future vector distance computations will be relevant. + */ + private final int dimensions; + + private final boolean forcedNormalization; + + private final int searchAccuracy; + + public OracleVectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel) { + this(jdbcTemplate, embeddingModel, DEFAULT_TABLE_NAME, DEFAULT_INDEX_TYPE, DEFAULT_DISTANCE_TYPE, + DEFAULT_DIMENSIONS, DEFAULT_SEARCH_ACCURACY, false, false, false); + } + + public OracleVectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel, boolean initializeSchema) { + this(jdbcTemplate, embeddingModel, DEFAULT_TABLE_NAME, DEFAULT_INDEX_TYPE, DEFAULT_DISTANCE_TYPE, + DEFAULT_DIMENSIONS, DEFAULT_SEARCH_ACCURACY, initializeSchema, false, false); + } + + public OracleVectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel, String tableName, + OracleAIVectorSearchIndexType indexType, OracleAIVectorSearchDistanceType distanceType, int dimensions, + int searchAccuracy, boolean initializeSchema, boolean removeExistingVectorStoreTable, + boolean forcedNormalization) { + if (dimensions != DEFAULT_DIMENSIONS) { + if (dimensions <= 0) { + throw new RuntimeException("Number of dimensions must be strictly positive"); + } + if (dimensions > 65535) { + throw new RuntimeException("Number of dimensions must be at most 65535"); + } + } + + if (searchAccuracy != DEFAULT_SEARCH_ACCURACY) { + if (searchAccuracy < 1) { + throw new RuntimeException("Search accuracy must be greater or equals to 1"); + } + if (searchAccuracy > 100) { + throw new RuntimeException("Search accuracy must be lower or equals to 100"); + } + } + + this.jdbcTemplate = jdbcTemplate; + this.embeddingModel = embeddingModel; + this.tableName = tableName; + this.indexType = indexType; + this.distanceType = distanceType; + this.dimensions = dimensions; + this.searchAccuracy = searchAccuracy; + this.initializeSchema = initializeSchema; + this.removeExistingVectorStoreTable = removeExistingVectorStoreTable; + this.forcedNormalization = forcedNormalization; + } + + @Override + public void add(final List documents) { + this.jdbcTemplate.batchUpdate(getIngestStatement(), new BatchPreparedStatementSetter() { + @Override + public void setValues(PreparedStatement ps, int i) throws SQLException { + final Document document = documents.get(i); + final String content = document.getContent(); + final byte[] json = toJson(document.getMetadata()); + final VECTOR embeddingVector = toVECTOR(embeddingModel.embed(document)); + + setParameterValue(ps, 1, Types.VARCHAR, document.getId()); + setParameterValue(ps, 2, Types.VARCHAR, content); + setParameterValue(ps, 3, OracleType.JSON.getVendorTypeNumber(), json); + setParameterValue(ps, 4, OracleType.VECTOR.getVendorTypeNumber(), embeddingVector); + } + + @Override + public int getBatchSize() { + return documents.size(); + } + }); + } + + private String getIngestStatement() { + return String + .format(""" + merge into %s target using (values(?, ?, ?, ?)) source (id, content, metadata, embedding) on (target.id = source.id) + when matched then update set target.content = source.content, target.metadata = source.metadata, target.embedding = source.embedding + when not matched then insert (target.id, target.content, target.metadata, target.embedding) values (source.id, source.content, source.metadata, source.embedding)""", + tableName); + } + + private final OracleJsonFactory osonFactory = new OracleJsonFactory(); + + private final ByteArrayOutputStream out = new ByteArrayOutputStream(); + + /** + * Bind binary JSON from the client. + * @param m map of metadata + * @return the binary JSON ready to be inserted + */ + private byte[] toJson(final Map m) { + out.reset(); + try (OracleJsonGenerator gen = osonFactory.createJsonBinaryGenerator(out)) { + gen.writeStartObject(); + for (String key : m.keySet()) { + final Object o = m.get(key); + if (o instanceof String) { + gen.write(key, (String) o); + } + else if (o instanceof Integer) { + gen.write(key, (Integer) o); + } + else if (o instanceof Float) { + gen.write(key, (Float) o); + } + else if (o instanceof Double) { + gen.write(key, (Double) o); + } + else if (o instanceof Boolean) { + gen.write(key, (Boolean) o); + } + } + gen.writeEnd(); + } + + return out.toByteArray(); + } + + /** + * Converts a list of Double values into an Oracle VECTOR object ready to be inserted. + * Optionally normalize the vector beforehand (see forcedNormalization). + * @param doubleList + * @return + * @throws SQLException + */ + private VECTOR toVECTOR(final List doubleList) throws SQLException { + final double[] doubles = new double[doubleList.size()]; + int i = 0; + for (double d : doubleList) { + doubles[i++] = d; + } + + if (forcedNormalization) { + return VECTOR.ofFloat64Values(normalize(doubles)); + } + + return VECTOR.ofFloat64Values(doubles); + } + + /** + * Normalize a vector if requested. + * @param v vector to normalize + * @return the vector normalized + */ + private double[] normalize(final double[] v) { + double squaredSum = 0d; + + for (double e : v) { + squaredSum += e * e; + } + + final double magnitude = Math.sqrt(squaredSum); + + if (magnitude > 0) { + final double multiplier = 1d / magnitude; + final int length = v.length; + for (int i = 0; i < length; i++) { + v[i] *= multiplier; + } + } + + return v; + } + + @Override + public Optional delete(final List idList) { + final String sql = String.format("delete from %s where id=?", tableName); + final int[] argTypes = { Types.VARCHAR }; + + final List batchArgs = new ArrayList<>(); + for (String id : idList) { + batchArgs.add(new Object[] { id }); + } + + final int[] deleteCounts = jdbcTemplate.batchUpdate(sql, batchArgs, argTypes); + + int deleteCount = 0; + for (int detailedResult : deleteCounts) { + switch (detailedResult) { + case Statement.EXECUTE_FAILED: + break; + case 1: + case Statement.SUCCESS_NO_INFO: + deleteCount++; + break; + } + } + + return Optional.of(deleteCount == idList.size()); + } + + private static class DocumentRowMapper implements RowMapper { + + @Override + public Document mapRow(ResultSet rs, int rowNum) throws SQLException { + final Map metadata = getMap(rs.getObject(3, OracleJsonValue.class)); + metadata.put("distance", rs.getDouble(5)); + + final Document document = new Document(rs.getString(1), rs.getString(2), metadata); + final double[] embedding = rs.getObject(4, double[].class); + document.setEmbedding(toDoubleList(embedding)); + return document; + } + + private Map getMap(OracleJsonValue value) { + final Map result = new HashMap<>(); + + if (value != null) { + final OracleJsonObject json = value.asJsonObject(); + for (String key : json.keySet()) { + result.put(key, json.get(key)); + } + } + + return result; + } + + private List toDoubleList(final double[] embeddings) { + final List result = new ArrayList<>(embeddings.length); + for (double v : embeddings) { + result.add(v); + } + return result; + } + + } + + @Override + public List similaritySearch(SearchRequest request) { + try { + // From the provided query, generate a vector using the embedding model + final VECTOR embeddingVector = toVECTOR(embeddingModel.embed(request.getQuery())); + + if (logger.isDebugEnabled()) { + this.jdbcTemplate.batchUpdate("insert into debug(embedding) values(?)", + new BatchPreparedStatementSetter() { + @Override + public void setValues(PreparedStatement ps, int i) throws SQLException { + setParameterValue(ps, 1, OracleType.VECTOR.getVendorTypeNumber(), embeddingVector); + } + + @Override + public int getBatchSize() { + return 1; + } + }); + } + + final String nativeFilterExpression = (request.getFilterExpression() != null) + ? this.filterExpressionConverter.convertExpression(request.getFilterExpression()) : ""; + + String jsonPathFilter = ""; + + if (request.getSimilarityThreshold() == SearchRequest.SIMILARITY_THRESHOLD_ACCEPT_ALL) { + if (StringUtils.hasText(nativeFilterExpression)) { + jsonPathFilter = String.format("where JSON_EXISTS( metadata, '%s' )\n", nativeFilterExpression); + } + + final String sql = searchAccuracy == DEFAULT_SEARCH_ACCURACY ? String.format(""" + select id, content, metadata, embedding, %sVECTOR_DISTANCE(embedding, ?, %s)%s as distance + from %s + %sorder by distance + fetch first %d rows only""", distanceType == DOT ? "(1+" : "", distanceType.name(), + distanceType == DOT ? ")/2" : "", tableName, jsonPathFilter, request.getTopK()) + : String.format( + """ + select id, content, metadata, embedding, %sVECTOR_DISTANCE(embedding, ?, %s)%s as distance + from %s + %sorder by distance + fetch APPROXIMATE first %d rows only WITH TARGET ACCURACY %d""", + distanceType == DOT ? "(1+" : "", distanceType.name(), distanceType == DOT ? ")/2" : "", + tableName, jsonPathFilter, request.getTopK(), searchAccuracy); + + logger.debug("SQL query: " + sql); + + return this.jdbcTemplate.query(sql, new DocumentRowMapper(), embeddingVector); + } + else if (request.getSimilarityThreshold() == SIMILARITY_THRESHOLD_EXACT_MATCH) { + if (StringUtils.hasText(nativeFilterExpression)) { + jsonPathFilter = String.format("where JSON_EXISTS( metadata, '%s' )\n", nativeFilterExpression); + } + + final String sql = String.format(""" + select id, content, metadata, embedding, %sVECTOR_DISTANCE(embedding, ?, %s)%s as distance + from %s + %sorder by distance + fetch EXACT first %d rows only""", distanceType == DOT ? "(1+" : "", distanceType.name(), + distanceType == DOT ? ")/2" : "", tableName, jsonPathFilter, request.getTopK()); + + logger.debug("SQL query: " + sql); + + return this.jdbcTemplate.query(sql, new DocumentRowMapper(), embeddingVector); + } + else { + if (!forcedNormalization + || (distanceType != OracleAIVectorSearchDistanceType.COSINE && distanceType != DOT)) { + throw new RuntimeException( + "Similarity threshold filtering requires all vectors to be normalized, see the forcedNormalization parameter for this Vector store. Also only COSINE and DOT distance types are supported."); + } + + final double distance = distanceType == DOT ? (1d - request.getSimilarityThreshold()) * 2d - 1d + : 1d - request.getSimilarityThreshold(); + + if (StringUtils.hasText(nativeFilterExpression)) { + jsonPathFilter = String.format(" and JSON_EXISTS( metadata, '%s' )", nativeFilterExpression); + } + + final String sql = distanceType == DOT ? (searchAccuracy == DEFAULT_SEARCH_ACCURACY ? String.format(""" + select id, content, metadata, embedding, (1+VECTOR_DISTANCE(embedding, ?, DOT))/2 as distance + from %s + where VECTOR_DISTANCE(embedding, ?, DOT) <= ?%s + order by distance + fetch first %d rows only""", tableName, jsonPathFilter, request.getTopK()) : String.format(""" + select id, content, metadata, embedding, (1+VECTOR_DISTANCE(embedding, ?, DOT))/2 as distance + from %s + where VECTOR_DISTANCE(embedding, ?, DOT) <= ?%s + order by distance + fetch APPROXIMATE first %d rows only WITH TARGET ACCURACY %d""", tableName, jsonPathFilter, + request.getTopK(), searchAccuracy) + + ) : (searchAccuracy == DEFAULT_SEARCH_ACCURACY ? String.format(""" + select id, content, metadata, embedding, VECTOR_DISTANCE(embedding, ?, COSINE) as distance + from %s + where VECTOR_DISTANCE(embedding, ?, COSINE) <= ?%s + order by distance + fetch first %d rows only""", tableName, jsonPathFilter, request.getTopK()) : String.format(""" + select id, content, metadata, embedding, VECTOR_DISTANCE(embedding, ?, COSINE) as distance + from %s + where VECTOR_DISTANCE(embedding, ?, COSINE) <= ?%s + order by distance + fetch APPROXIMATE first %d rows only WITH TARGET ACCURACY %d""", tableName, jsonPathFilter, + request.getTopK(), searchAccuracy)); + + logger.debug("SQL query: " + sql); + + return this.jdbcTemplate.query(sql, new DocumentRowMapper(), embeddingVector, embeddingVector, + distance); + } + } + catch (SQLException sqle) { + throw new RuntimeException(sqle); + } + } + + @Override + public void afterPropertiesSet() throws Exception { + if (this.initializeSchema) { + // Remove existing VectorStoreTable + if (this.removeExistingVectorStoreTable) { + this.jdbcTemplate.execute(String.format("drop table if exists %s purge", tableName)); + } + + this.jdbcTemplate.execute(String.format(""" + create table if not exists %s ( + id varchar2(36) default sys_guid() primary key, + content clob not null, + metadata json not null, + embedding vector(%s,FLOAT64) annotations(Distance '%s', IndexType '%s') + )""", tableName, dimensions == DEFAULT_DIMENSIONS ? "*" : String.valueOf(dimensions), + distanceType.name(), indexType.name())); + + if (logger.isDebugEnabled()) { + this.jdbcTemplate.execute(String.format(""" + create table if not exists debug ( + id varchar2(36) default sys_guid() primary key, + embedding vector(%s,FLOAT64) annotations(Distance '%s') + )""", dimensions == DEFAULT_DIMENSIONS ? "*" : String.valueOf(dimensions), + distanceType.name())); + } + + switch (indexType) { + case IVF: + this.jdbcTemplate.execute(String.format(""" + create vector index if not exists vector_index_%s on %s (embedding) + organization neighbor partitions + distance %s + with target accuracy %d + parameters (type IVF, neighbor partitions 10)""", tableName, tableName, + distanceType.name(), searchAccuracy == DEFAULT_SEARCH_ACCURACY ? 95 : searchAccuracy)); + break; + + /* + * TODO: Enable for 23.5 case HNSW: + * this.jdbcTemplate.execute(String.format(""" create vector index if not + * exists vector_index_%s on %s (embedding) organization inmemory neighbor + * graph distance %s with target accuracy %d parameters (type HNSW, + * neighbors 40, efconstruction 500)""", tableName, tableName, + * distanceType.name(), searchAccuracy == DEFAULT_SEARCH_ACCURACY ? 95 : + * searchAccuracy)); break; + */ + } + } + } + + public String getTableName() { + return tableName; + } + +} diff --git a/vector-stores/spring-ai-oracle-store/src/test/java/org/springframework/ai/vectorstore/IsoSqlJsonPathFilterExpressionConverterTests.java b/vector-stores/spring-ai-oracle-store/src/test/java/org/springframework/ai/vectorstore/IsoSqlJsonPathFilterExpressionConverterTests.java new file mode 100644 index 000000000..e052a6b05 --- /dev/null +++ b/vector-stores/spring-ai-oracle-store/src/test/java/org/springframework/ai/vectorstore/IsoSqlJsonPathFilterExpressionConverterTests.java @@ -0,0 +1,29 @@ +package org.springframework.ai.vectorstore; + +import static org.assertj.core.api.Assertions.assertThat; + +import org.junit.jupiter.api.Test; +import org.springframework.ai.vectorstore.filter.Filter; +import org.springframework.ai.vectorstore.filter.FilterExpressionTextParser; + +public class IsoSqlJsonPathFilterExpressionConverterTests { + + @Test + public void testNIN() { + final Filter.Expression e = new FilterExpressionTextParser().parse("weather nin [\"windy\", \"rainy\"]"); + + final String jsonPathExpression = new IsoSqlJsonPathFilterExpressionConverter().convertExpression(e); + + assertThat(jsonPathExpression).isEqualTo("$?( !( @.weather in ( \"windy\",\"rainy\" ) ) )"); + } + + @Test + public void testNOT() { + final Filter.Expression e = new FilterExpressionTextParser().parse("NOT( weather in [\"windy\", \"rainy\"] )"); + + final String jsonPathExpression = new IsoSqlJsonPathFilterExpressionConverter().convertExpression(e); + + assertThat(jsonPathExpression).isEqualTo("$?( (!( @.weather in ( \"windy\",\"rainy\" ) )) )"); + } + +} diff --git a/vector-stores/spring-ai-oracle-store/src/test/java/org/springframework/ai/vectorstore/OracleVectorStoreIT.java b/vector-stores/spring-ai-oracle-store/src/test/java/org/springframework/ai/vectorstore/OracleVectorStoreIT.java new file mode 100644 index 000000000..7d49ceb11 --- /dev/null +++ b/vector-stores/spring-ai-oracle-store/src/test/java/org/springframework/ai/vectorstore/OracleVectorStoreIT.java @@ -0,0 +1,328 @@ +package org.springframework.ai.vectorstore; + +import oracle.jdbc.pool.OracleDataSource; +import org.junit.Assert; +import org.junit.jupiter.params.ParameterizedTest; +import org.junit.jupiter.params.provider.CsvSource; +import org.junit.jupiter.params.provider.ValueSource; +import org.springframework.ai.document.Document; +import org.springframework.ai.embedding.EmbeddingModel; +import org.springframework.ai.transformers.TransformersEmbeddingModel; +import org.springframework.ai.vectorstore.filter.FilterExpressionTextParser; +import org.springframework.beans.factory.annotation.Value; +import org.springframework.boot.SpringBootConfiguration; +import org.springframework.boot.autoconfigure.EnableAutoConfiguration; +import org.springframework.boot.autoconfigure.jdbc.DataSourceAutoConfiguration; +import org.springframework.boot.autoconfigure.jdbc.DataSourceProperties; +import org.springframework.boot.context.properties.ConfigurationProperties; +import org.springframework.boot.test.context.runner.ApplicationContextRunner; +import org.springframework.context.ApplicationContext; +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Primary; +import org.springframework.core.io.DefaultResourceLoader; +import org.springframework.jdbc.core.JdbcTemplate; +import org.springframework.util.CollectionUtils; +import org.testcontainers.junit.jupiter.Container; +import org.testcontainers.junit.jupiter.Testcontainers; +import org.testcontainers.oracle.OracleContainer; +import org.testcontainers.utility.MountableFile; + +import javax.sql.DataSource; +import java.io.IOException; +import java.nio.charset.StandardCharsets; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; +import java.util.Map; +import java.util.UUID; + +import static org.assertj.core.api.Assertions.assertThat; +import static org.springframework.ai.vectorstore.OracleVectorStore.DEFAULT_SEARCH_ACCURACY; + +@Testcontainers +public class OracleVectorStoreIT { + + @Container + static OracleContainer oracle23aiContainer = new OracleContainer("gvenzl/oracle-free:23-slim") + .withCopyFileToContainer(MountableFile.forClasspathResource("/initialize.sql"), + "/container-entrypoint-initdb.d/initialize.sql"); + + final List documents = List.of( + new Document(getText("classpath:/test/data/spring.ai.txt"), Map.of("meta1", "meta1")), + new Document(getText("classpath:/test/data/time.shelter.txt")), + new Document(getText("classpath:/test/data/great.depression.txt"), Map.of("meta2", "meta2"))); + + public static String getText(final String uri) { + try { + return new DefaultResourceLoader().getResource(uri).getContentAsString(StandardCharsets.UTF_8); + } + catch (IOException e) { + throw new RuntimeException(e); + } + } + + private final ApplicationContextRunner contextRunner = new ApplicationContextRunner() + .withUserConfiguration(TestClient.class) + .withPropertyValues("test.spring.ai.vectorstore.oracle.distanceType=COSINE_DISTANCE", + "test.spring.ai.vectorstore.oracle.dimensions=384", + // JdbcTemplate configuration + String.format("app.datasource.url=%s", oracle23aiContainer.getJdbcUrl()), + String.format("app.datasource.username=%s", oracle23aiContainer.getUsername()), + String.format("app.datasource.password=%s", oracle23aiContainer.getPassword()), + "app.datasource.type=oracle.jdbc.pool.OracleDataSource"); + + @SpringBootConfiguration + @EnableAutoConfiguration(exclude = { DataSourceAutoConfiguration.class }) + public static class TestClient { + + @Value("${test.spring.ai.vectorstore.oracle.distanceType}") + OracleVectorStore.OracleAIVectorSearchDistanceType distanceType; + + @Value("${test.spring.ai.vectorstore.oracle.searchAccuracy}") + int searchAccuracy; + + @Bean + public VectorStore vectorStore(JdbcTemplate jdbcTemplate, EmbeddingModel embeddingModel) { + return new OracleVectorStore(jdbcTemplate, embeddingModel, OracleVectorStore.DEFAULT_TABLE_NAME, + OracleVectorStore.OracleAIVectorSearchIndexType.IVF, distanceType, 384, searchAccuracy, true, true, + true); + } + + @Bean + public JdbcTemplate myJdbcTemplate(DataSource dataSource) { + return new JdbcTemplate(dataSource); + } + + @Bean + @Primary + @ConfigurationProperties("app.datasource") + public DataSourceProperties dataSourceProperties() { + return new DataSourceProperties(); + } + + @Bean + public OracleDataSource dataSource(DataSourceProperties dataSourceProperties) { + return dataSourceProperties.initializeDataSourceBuilder().type(OracleDataSource.class).build(); + } + + @Bean + public EmbeddingModel embeddingModel() { + try { + TransformersEmbeddingModel tem = new TransformersEmbeddingModel(); + tem.afterPropertiesSet(); + return tem; + } + catch (Exception e) { + throw new RuntimeException("Failed initializing embedding model", e); + } + } + + } + + private static void dropTable(ApplicationContext context, String tableName) { + JdbcTemplate jdbcTemplate = context.getBean(JdbcTemplate.class); + jdbcTemplate.execute("DROP TABLE IF EXISTS " + tableName + " PURGE"); + } + + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { "COSINE", "DOT", "EUCLIDEAN", "EUCLIDEAN_SQUARED", "MANHATTAN" }) + public void addAndSearch(String distanceType) { + contextRunner.withPropertyValues("test.spring.ai.vectorstore.oracle.distanceType=" + distanceType) + .withPropertyValues("test.spring.ai.vectorstore.oracle.searchAccuracy=" + DEFAULT_SEARCH_ACCURACY) + .run(context -> { + + VectorStore vectorStore = context.getBean(VectorStore.class); + + vectorStore.add(documents); + + List results = vectorStore + .similaritySearch(SearchRequest.query("What is Great Depression").withTopK(1)); + + assertThat(results).hasSize(1); + Document resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(documents.get(2).getId()); + assertThat(resultDoc.getMetadata()).containsKeys("meta2", "distance"); + + // Remove all documents from the store + vectorStore.delete(documents.stream().map(doc -> doc.getId()).toList()); + + List results2 = vectorStore + .similaritySearch(SearchRequest.query("Great Depression").withTopK(1)); + assertThat(results2).hasSize(0); + + dropTable(context, ((OracleVectorStore) vectorStore).getTableName()); + }); + } + + @ParameterizedTest(name = "Distance {0}, search accuracy {1} : {displayName} ") + @CsvSource({ "COSINE,-1", "DOT,-1", "EUCLIDEAN,-1", "EUCLIDEAN_SQUARED,-1", "MANHATTAN,-1", "COSINE,75", "DOT,80", + "EUCLIDEAN,60", "EUCLIDEAN_SQUARED,30", "MANHATTAN,42" }) + public void searchWithFilters(String distanceType, int searchAccuracy) { + contextRunner.withPropertyValues("test.spring.ai.vectorstore.oracle.distanceType=" + distanceType) + .withPropertyValues("test.spring.ai.vectorstore.oracle.searchAccuracy=" + searchAccuracy) + .run(context -> { + + VectorStore vectorStore = context.getBean(VectorStore.class); + + var bgDocument = new Document("The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "BG", "year", 2020, "foo bar 1", "bar.foo")); + var nlDocument = new Document("The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "NL")); + var bgDocument2 = new Document("The World is Big and Salvation Lurks Around the Corner", + Map.of("country", "BG", "year", 2023)); + + vectorStore.add(List.of(bgDocument, nlDocument, bgDocument2)); + + SearchRequest searchRequest = SearchRequest.query("The World").withTopK(5).withSimilarityThresholdAll(); + + List results = vectorStore.similaritySearch(searchRequest); + + assertThat(results).hasSize(3); + + results = vectorStore.similaritySearch(searchRequest.withFilterExpression("country == 'NL'")); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getId()).isEqualTo(nlDocument.getId()); + + results = vectorStore.similaritySearch(searchRequest.withFilterExpression("country == 'BG'")); + + assertThat(results).hasSize(2); + assertThat(results.get(0).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + assertThat(results.get(1).getId()).isIn(bgDocument.getId(), bgDocument2.getId()); + + results = vectorStore + .similaritySearch(searchRequest.withFilterExpression("country == 'BG' && year == 2020")); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getId()).isEqualTo(bgDocument.getId()); + + results = vectorStore.similaritySearch( + searchRequest.withFilterExpression("(country == 'BG' && year == 2020) || (country == 'NL')")); + + assertThat(results).hasSize(2); + assertThat(results.get(0).getId()).isIn(bgDocument.getId(), nlDocument.getId()); + assertThat(results.get(1).getId()).isIn(bgDocument.getId(), nlDocument.getId()); + + results = vectorStore.similaritySearch(searchRequest + .withFilterExpression("NOT((country == 'BG' && year == 2020) || (country == 'NL'))")); + + assertThat(results).hasSize(1); + assertThat(results.get(0).getId()).isEqualTo(bgDocument2.getId()); + + results = vectorStore.similaritySearch(SearchRequest.query("The World") + .withTopK(5) + .withSimilarityThresholdAll() + .withFilterExpression("\"foo bar 1\" == 'bar.foo'")); + assertThat(results).hasSize(1); + assertThat(results.get(0).getId()).isEqualTo(bgDocument.getId()); + + try { + vectorStore.similaritySearch(searchRequest.withFilterExpression("country == NL")); + Assert.fail("Invalid filter expression should have been cached!"); + } + catch (FilterExpressionTextParser.FilterExpressionParseException e) { + assertThat(e.getMessage()).contains("Line: 1:17, Error: no viable alternative at input 'NL'"); + } + + // Remove all documents from the store + dropTable(context, ((OracleVectorStore) vectorStore).getTableName()); + }); + } + + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { "COSINE", "DOT", "EUCLIDEAN", "EUCLIDEAN_SQUARED", "MANHATTAN" }) + public void documentUpdate(String distanceType) { + contextRunner.withPropertyValues("test.spring.ai.vectorstore.oracle.distanceType=" + distanceType) + .withPropertyValues("test.spring.ai.vectorstore.oracle.searchAccuracy=" + DEFAULT_SEARCH_ACCURACY) + .run(context -> { + VectorStore vectorStore = context.getBean(VectorStore.class); + + Document document = new Document(UUID.randomUUID().toString(), "Spring AI rocks!!", + Collections.singletonMap("meta1", "meta1")); + + vectorStore.add(List.of(document)); + + List results = vectorStore.similaritySearch(SearchRequest.query("Spring").withTopK(5)); + + assertThat(results).hasSize(1); + Document resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(document.getId()); + + assertThat(resultDoc.getContent()).isEqualTo("Spring AI rocks!!"); + assertThat(resultDoc.getMetadata()).containsKeys("meta1", "distance"); + + Document sameIdDocument = new Document(document.getId(), + "The World is Big and Salvation Lurks Around the Corner", + Collections.singletonMap("meta2", "meta2")); + + vectorStore.add(List.of(sameIdDocument)); + + results = vectorStore.similaritySearch(SearchRequest.query("FooBar").withTopK(5)); + assertThat(results).hasSize(1); + resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(document.getId()); + assertThat(resultDoc.getContent()).isEqualTo("The World is Big and Salvation Lurks Around the Corner"); + assertThat(resultDoc.getMetadata()).containsKeys("meta2", "distance"); + + dropTable(context, ((OracleVectorStore) vectorStore).getTableName()); + }); + } + + @ParameterizedTest(name = "{0} : {displayName} ") + @ValueSource(strings = { "COSINE", "DOT" }) + public void searchWithThreshold(String distanceType) { + contextRunner.withPropertyValues("test.spring.ai.vectorstore.oracle.distanceType=" + distanceType) + .withPropertyValues("test.spring.ai.vectorstore.oracle.searchAccuracy=" + DEFAULT_SEARCH_ACCURACY) + .run(context -> { + + VectorStore vectorStore = context.getBean(VectorStore.class); + + vectorStore.add(documents); + + List fullResult = vectorStore + .similaritySearch(SearchRequest.query("Time Shelter").withTopK(5).withSimilarityThresholdAll()); + + assertThat(fullResult).hasSize(3); + + assertThat(isSortedByDistance(fullResult)).isTrue(); + + List distances = fullResult.stream() + .map(doc -> (Double) doc.getMetadata().get("distance")) + .toList(); + + double threshold = (distances.get(0) + distances.get(1)) / 2d; + + List results = vectorStore.similaritySearch( + SearchRequest.query("Time Shelter").withTopK(5).withSimilarityThreshold(1d - threshold)); + + assertThat(results).hasSize(1); + Document resultDoc = results.get(0); + assertThat(resultDoc.getId()).isEqualTo(documents.get(1).getId()); + + dropTable(context, ((OracleVectorStore) vectorStore).getTableName()); + }); + } + + private static boolean isSortedByDistance(final List documents) { + final List distances = documents.stream() + .map(doc -> (Double) doc.getMetadata().get("distance")) + .toList(); + + if (CollectionUtils.isEmpty(distances) || distances.size() == 1) { + return true; + } + + Iterator iter = distances.iterator(); + Double current; + Double previous = iter.next(); + while (iter.hasNext()) { + current = iter.next(); + if (previous > current) { + return false; + } + previous = current; + } + return true; + } + +} diff --git a/vector-stores/spring-ai-oracle-store/src/test/resources/initialize.sql b/vector-stores/spring-ai-oracle-store/src/test/resources/initialize.sql new file mode 100644 index 000000000..ac38a1965 --- /dev/null +++ b/vector-stores/spring-ai-oracle-store/src/test/resources/initialize.sql @@ -0,0 +1,10 @@ +-- Exit on any errors +WHENEVER SQLERROR EXIT SQL.SQLCODE + +-- Configure the size of the Vector Pool to 1 GiB. +ALTER SYSTEM SET vector_memory_size=1G SCOPE=SPFILE; + +SHUTDOWN ABORT; +STARTUP; + +exit;