From 09317bc6de13b4cfb0071bf356ff7d178b8fc11e Mon Sep 17 00:00:00 2001 From: Christoph Strobl Date: Wed, 27 Aug 2014 14:53:29 +0200 Subject: [PATCH] #8 - Add MongoDB text search example. The sample reads the Spring IO blog Atom feed and performs text search on it. We use manual index creation and query via MongoTemplate as well as automatic index creation and derived queries via repositories. Original pull request: #10. --- README.md | 1 + mongodb/pom.xml | 1 + mongodb/text-search/README.md | 32 +++++ mongodb/text-search/pom.xml | 26 ++++ .../mongodb/textsearch/BlogPost.java | 82 ++++++++++++ .../textsearch/BlogPostRepository.java | 31 +++++ .../textsearch/MongoTestConfiguration.java | 70 +++++++++++ .../textsearch/TextSearchRepositoryTests.java | 82 ++++++++++++ .../textsearch/TextSearchTemplateTests.java | 118 ++++++++++++++++++ .../mongodb/util/BlogPostInitializer.java | 103 +++++++++++++++ .../mongodb/util/ConsoleResultPrinter.java | 42 +++++++ .../src/test/resources/logback.xml | 14 +++ 12 files changed, 602 insertions(+) create mode 100644 mongodb/text-search/README.md create mode 100644 mongodb/text-search/pom.xml create mode 100644 mongodb/text-search/src/main/java/example/springdata/mongodb/textsearch/BlogPost.java create mode 100644 mongodb/text-search/src/main/java/example/springdata/mongodb/textsearch/BlogPostRepository.java create mode 100644 mongodb/text-search/src/test/java/example/springdata/mongodb/textsearch/MongoTestConfiguration.java create mode 100644 mongodb/text-search/src/test/java/example/springdata/mongodb/textsearch/TextSearchRepositoryTests.java create mode 100644 mongodb/text-search/src/test/java/example/springdata/mongodb/textsearch/TextSearchTemplateTests.java create mode 100644 mongodb/text-search/src/test/java/example/springdata/mongodb/util/BlogPostInitializer.java create mode 100644 mongodb/text-search/src/test/java/example/springdata/mongodb/util/ConsoleResultPrinter.java create mode 100644 mongodb/text-search/src/test/resources/logback.xml diff --git a/README.md b/README.md index b6bbe634..ea4886e8 100644 --- a/README.md +++ b/README.md @@ -16,6 +16,7 @@ We have separate folders for the samples of individual modules: * `geo-spatial` - Example project for general repository functionality (including geo-spatial functionality) and Querydsl integration * `aggregation` - Example project to showcase the MongoDB aggregation framework support. +* `text-search` - Example project showing usage of MongoDB text search feature. ## Spring Data REST diff --git a/mongodb/pom.xml b/mongodb/pom.xml index f084df79..7a0a644c 100644 --- a/mongodb/pom.xml +++ b/mongodb/pom.xml @@ -19,6 +19,7 @@ geo-spatial aggregation + text-search diff --git a/mongodb/text-search/README.md b/mongodb/text-search/README.md new file mode 100644 index 00000000..11b56457 --- /dev/null +++ b/mongodb/text-search/README.md @@ -0,0 +1,32 @@ +# Spring Data MongoDB - Text Search Examples + +This project contains samples of text search specific features of Spring Data Mongodb. + +## Support for Text Index + +Define text index structures manually (like below) or use `@TextIndexed` to mark content to be indexed for full text search. + +```java +TextIndexDefinition textIndex = new TextIndexDefinitionBuilder() + .onField("title", 3F) + .onField("content", 2F) + .onField("categories") + .build(); + +template.indexOps(BlogPost.class).ensureIndex(textIndex); +``` + +## Support for full text repository queries + +Use derived finder methods to search for terms and phrases. + +```java +interface BlogPostRepository extends CrudRepository { + + // page through results for full text query + Page findBy(TextCriteria criteria, Pageable page); + + // find all matching documents and sort by relevance + List findAllByOrderByScoreDesc(TextCriteria criteria); +} +``` \ No newline at end of file diff --git a/mongodb/text-search/pom.xml b/mongodb/text-search/pom.xml new file mode 100644 index 00000000..23be76f5 --- /dev/null +++ b/mongodb/text-search/pom.xml @@ -0,0 +1,26 @@ + + 4.0.0 + + spring-data-mongodb-text-search + + Spring Data MongoDB - Text Search + + + org.springframework.data.examples + spring-data-mongodb-examples + 1.0.0.BUILD-SNAPSHOT + + + + + org.springframework.boot + spring-boot-starter-web + + + net.java.dev.rome + rome + 1.0.0 + + + \ No newline at end of file diff --git a/mongodb/text-search/src/main/java/example/springdata/mongodb/textsearch/BlogPost.java b/mongodb/text-search/src/main/java/example/springdata/mongodb/textsearch/BlogPost.java new file mode 100644 index 00000000..b278c77f --- /dev/null +++ b/mongodb/text-search/src/main/java/example/springdata/mongodb/textsearch/BlogPost.java @@ -0,0 +1,82 @@ +/* + * Copyright 2014 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.mongodb.textsearch; + +import java.util.List; + +import org.springframework.data.annotation.Id; +import org.springframework.data.mongodb.core.index.TextIndexed; +import org.springframework.data.mongodb.core.mapping.Document; +import org.springframework.data.mongodb.core.mapping.TextScore; + +/** + * @author Christoph Strobl + */ +@Document +public class BlogPost { + + private @Id String id; + private @TextIndexed(weight = 3) String title; + private @TextIndexed(weight = 2) String content; + private @TextIndexed List categories; + private @TextScore Float score; + + public String getId() { + return id; + } + + public void setId(String id) { + this.id = id; + } + + public String getTitle() { + return title; + } + + public void setTitle(String title) { + this.title = title; + } + + public String getContent() { + return content; + } + + public void setContent(String content) { + this.content = content; + } + + public List getCategories() { + return categories; + } + + public void setCategories(List categories) { + this.categories = categories; + } + + public Float getScore() { + return score; + } + + public void setScore(Float score) { + this.score = score; + } + + @Override + public String toString() { + return "BlogPost [score=" + score + ", id=" + id + ", title=" + title + ", categories=" + getCategories() + "]"; + } + +} diff --git a/mongodb/text-search/src/main/java/example/springdata/mongodb/textsearch/BlogPostRepository.java b/mongodb/text-search/src/main/java/example/springdata/mongodb/textsearch/BlogPostRepository.java new file mode 100644 index 00000000..d046130a --- /dev/null +++ b/mongodb/text-search/src/main/java/example/springdata/mongodb/textsearch/BlogPostRepository.java @@ -0,0 +1,31 @@ +/* + * Copyright 2014 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.mongodb.textsearch; + +import java.util.List; + +import org.springframework.data.mongodb.core.query.TextCriteria; +import org.springframework.data.repository.CrudRepository; + +/** + * @author Christoph Strobl + */ +public interface BlogPostRepository extends CrudRepository { + + List findAllBy(TextCriteria criteria); + + List findAllByOrderByScoreDesc(TextCriteria criteria); +} diff --git a/mongodb/text-search/src/test/java/example/springdata/mongodb/textsearch/MongoTestConfiguration.java b/mongodb/text-search/src/test/java/example/springdata/mongodb/textsearch/MongoTestConfiguration.java new file mode 100644 index 00000000..f92a8fd2 --- /dev/null +++ b/mongodb/text-search/src/test/java/example/springdata/mongodb/textsearch/MongoTestConfiguration.java @@ -0,0 +1,70 @@ +/* + * Copyright 2014 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.mongodb.textsearch; + +import javax.annotation.PreDestroy; + +import org.springframework.context.annotation.Bean; +import org.springframework.context.annotation.Configuration; +import org.springframework.data.mongodb.config.AbstractMongoConfiguration; +import org.springframework.data.mongodb.repository.config.EnableMongoRepositories; + +import com.mongodb.Mongo; +import com.mongodb.MongoClient; + +import example.springdata.mongodb.util.BlogPostInitializer; + +/** + * @author Christoph Strobl + */ +@Configuration +@EnableMongoRepositories +public class MongoTestConfiguration extends AbstractMongoConfiguration { + + static final String DATABASE_NAME = "s2gx2014-blog"; + static final String BLOG_POST_ATOM_FEED_SOURCE = "https://spring.io/blog.atom"; + + @Override + protected String getDatabaseName() { + return DATABASE_NAME; + } + + @Override + public Mongo mongo() throws Exception { + return new MongoClient(); + } + + /** + * Initializes the repository with a predefined set of entities. + * + * @return + */ + @Bean + public BlogPostInitializer initializer() { + return new BlogPostInitializer(BLOG_POST_ATOM_FEED_SOURCE); + } + + /** + * Clean up after execution by dropping used test db instance. + * + * @throws Exception + */ + @PreDestroy + public void dropTestDB() throws Exception { + mongo().dropDatabase(getDatabaseName()); + } + +} diff --git a/mongodb/text-search/src/test/java/example/springdata/mongodb/textsearch/TextSearchRepositoryTests.java b/mongodb/text-search/src/test/java/example/springdata/mongodb/textsearch/TextSearchRepositoryTests.java new file mode 100644 index 00000000..f56d456e --- /dev/null +++ b/mongodb/text-search/src/test/java/example/springdata/mongodb/textsearch/TextSearchRepositoryTests.java @@ -0,0 +1,82 @@ +/* + * Copyright 2014 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.mongodb.textsearch; + +import static example.springdata.mongodb.util.ConsoleResultPrinter.*; + +import java.util.List; + +import org.junit.Test; +import org.junit.runner.RunWith; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.data.mongodb.core.query.TextCriteria; +import org.springframework.test.context.ContextConfiguration; +import org.springframework.test.context.junit4.SpringJUnit4ClassRunner; + +/** + * @author Christoph Strobl + */ +@RunWith(SpringJUnit4ClassRunner.class) +@ContextConfiguration(classes = { MongoTestConfiguration.class }) +public class TextSearchRepositoryTests { + + @Autowired BlogPostRepository repo; + + /** + * Show how to do simple matching.
+ * Note that text search is case insensitive and will also find entries like {@literal releases}. + */ + @Test + public void findAllBlogPostsWithRelease() { + + TextCriteria criteria = TextCriteria.forDefaultLanguage().matchingAny("release"); + List blogPosts = repo.findAllBy(criteria); + printResult(blogPosts, criteria); + } + + /** + * Simple matching using negation. + */ + @Test + public void findAllBlogPostsWithReleaseButHeyIDoWantTheEngineeringStuff() { + + TextCriteria criteria = TextCriteria.forDefaultLanguage().matchingAny("release").notMatching("engineering"); + List blogPosts = repo.findAllBy(criteria); + printResult(blogPosts, criteria); + } + + /** + * Phrase matching looks for the whole phrase as one. + */ + @Test + public void findAllBlogPostsByPhrase() { + + TextCriteria criteria = TextCriteria.forDefaultLanguage().matchingPhrase("release candidate"); + List blogPosts = repo.findAllBy(criteria); + printResult(blogPosts, criteria); + } + + /** + * Sort by relevance relying on the value marked with {@link org.springframework.data.mongodb.core.mapping.TextScore}. + */ + @Test + public void findAllBlogPostsByPhraseSortByScore() { + + TextCriteria criteria = TextCriteria.forDefaultLanguage().matchingPhrase("release candidate"); + List blogPosts = repo.findAllByOrderByScoreDesc(criteria); + printResult(blogPosts, criteria); + } +} diff --git a/mongodb/text-search/src/test/java/example/springdata/mongodb/textsearch/TextSearchTemplateTests.java b/mongodb/text-search/src/test/java/example/springdata/mongodb/textsearch/TextSearchTemplateTests.java new file mode 100644 index 00000000..5f2706e8 --- /dev/null +++ b/mongodb/text-search/src/test/java/example/springdata/mongodb/textsearch/TextSearchTemplateTests.java @@ -0,0 +1,118 @@ +/* + * Copyright 2014 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.mongodb.textsearch; + +import static example.springdata.mongodb.util.ConsoleResultPrinter.*; +import static org.springframework.data.mongodb.core.query.Query.*; + +import java.util.List; + +import org.junit.Before; +import org.junit.Test; +import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.data.mongodb.core.index.TextIndexDefinition; +import org.springframework.data.mongodb.core.index.TextIndexDefinition.TextIndexDefinitionBuilder; +import org.springframework.data.mongodb.core.query.TextCriteria; +import org.springframework.data.mongodb.core.query.TextQuery; + +import com.mongodb.MongoClient; + +import example.springdata.mongodb.util.BlogPostInitializer; + +/** + * @author Christoph Strobl + */ +public class TextSearchTemplateTests { + + MongoTemplate template; + + @Before + public void setUp() throws Exception { + + template = new MongoTemplate(new MongoClient(), MongoTestConfiguration.DATABASE_NAME); + template.dropCollection(BlogPost.class); + + createIndex(); + loadTestData(); + } + + /** + * Show how to do simple matching.
+ * Note that text search is case insensitive and will also find entries like {@literal releases}. + */ + @Test + public void findAllBlogPostsWithRelease() { + + TextCriteria criteria = TextCriteria.forDefaultLanguage().matchingAny("release"); + List blogPosts = template.find(query(criteria), BlogPost.class); + printResult(blogPosts, criteria); + } + + /** + * Sort by relevance relying on the value marked with {@link org.springframework.data.mongodb.core.mapping.TextScore}. + */ + @Test + public void findAllBlogPostsByPhraseSortByScore() { + + TextCriteria criteria = TextCriteria.forDefaultLanguage().matchingPhrase("release"); + + TextQuery query = new TextQuery(criteria); + query.setScoreFieldName("score"); + query.sortByScore(); + + List blogPosts = template.find(query, BlogPost.class); + printResult(blogPosts, criteria); + } + + /** + * Creates the mongodb text index for {@link BlogPost}.
+ * + *
+	 * 
+	 * db.collection.ensureIndex(
+	 * {
+	 *     "title" : "text" 
+	 *     "content" : "text"
+	 *     "categories" : "text",
+	 * },
+	 * {
+	 *     weights : {
+	 *         "title" : 3,
+	 *         "content" : 2
+	 *     }
+	 * }
+	 * )
+	 * 
+	 * 
+ */ + private void createIndex() { + + TextIndexDefinition textIndex = new TextIndexDefinitionBuilder()// + .onField("title", 3F) // + .onField("content", 2F) // + .onField("categories") // + .build(); + + template.indexOps(BlogPost.class).ensureIndex(textIndex); + } + + private void loadTestData() throws Exception { + + BlogPostInitializer initializer = new BlogPostInitializer(MongoTestConfiguration.BLOG_POST_ATOM_FEED_SOURCE); + initializer.initialize(this.template); + } + +} diff --git a/mongodb/text-search/src/test/java/example/springdata/mongodb/util/BlogPostInitializer.java b/mongodb/text-search/src/test/java/example/springdata/mongodb/util/BlogPostInitializer.java new file mode 100644 index 00000000..91e6fe07 --- /dev/null +++ b/mongodb/text-search/src/test/java/example/springdata/mongodb/util/BlogPostInitializer.java @@ -0,0 +1,103 @@ +/* + * Copyright 2014 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.mongodb.util; + +import java.util.ArrayList; +import java.util.List; + +import org.springframework.beans.factory.InitializingBean; +import org.springframework.beans.factory.annotation.Autowired; +import org.springframework.core.convert.converter.Converter; +import org.springframework.data.mongodb.core.MongoTemplate; +import org.springframework.http.ResponseEntity; +import org.springframework.web.client.RestTemplate; + +import com.sun.syndication.feed.atom.Category; +import com.sun.syndication.feed.atom.Content; +import com.sun.syndication.feed.atom.Entry; +import com.sun.syndication.feed.atom.Feed; + +import example.springdata.mongodb.textsearch.BlogPost; + +/** + * @author Christoph Strobl + */ +public class BlogPostInitializer implements InitializingBean { + + private final String url; + private final RestTemplate restTemplate; + private final Converter converter; + + @Autowired MongoTemplate mongoTemplate; + + public BlogPostInitializer(String url) { + + restTemplate = new RestTemplate(); + this.converter = new EntryConverter(); + this.url = url; + } + + public void initialize(MongoTemplate mongoTemplate) { + + ResponseEntity feed = restTemplate.getForEntity(url, Feed.class); + if (feed.hasBody()) { + for (Object entry : feed.getBody().getEntries()) { + if (entry instanceof Entry) { + mongoTemplate.save(converter.convert((Entry) entry)); + } + } + } + } + + @Override + public void afterPropertiesSet() throws Exception { + initialize(this.mongoTemplate); + } + + /** + * {@link Converter} implementation capable of converting atom feed {@link Entry} into {@link BlogPost}. + * + * @author Christoph Strobl + */ + static class EntryConverter implements Converter { + + @Override + public BlogPost convert(Entry source) { + + BlogPost post = new BlogPost(); + + post.setId(source.getId()); + post.setTitle(source.getTitle()); + + for (Object content : source.getContents()) { + if (content instanceof Content) { + post.setContent(((Content) content).getValue()); + } + } + + List categories = new ArrayList(); + for (Object category : source.getCategories()) { + if (category instanceof Category) { + categories.add(((Category) category).getLabel()); + } + } + post.setCategories(categories); + + return post; + } + } + +} diff --git a/mongodb/text-search/src/test/java/example/springdata/mongodb/util/ConsoleResultPrinter.java b/mongodb/text-search/src/test/java/example/springdata/mongodb/util/ConsoleResultPrinter.java new file mode 100644 index 00000000..c76577a8 --- /dev/null +++ b/mongodb/text-search/src/test/java/example/springdata/mongodb/util/ConsoleResultPrinter.java @@ -0,0 +1,42 @@ +/* + * Copyright 2014 the original author or authors. + * + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package example.springdata.mongodb.util; + +import java.util.Collection; + +import org.springframework.data.mongodb.core.query.TextCriteria; + +import example.springdata.mongodb.textsearch.BlogPost; + +/** + * Just a little helper for showing {@link BlogPost}s output on the console. + * + * @author Christoph Strobl + */ +public class ConsoleResultPrinter { + + public static void printResult(Collection blogPosts, TextCriteria criteria) { + + System.out.println(String.format("XXXXXXXXXXXX -- Found %s blogPosts matching '%s' --XXXXXXXXXXXX", + blogPosts.size(), criteria != null ? criteria.getCriteriaObject() : "")); + + for (BlogPost blogPost : blogPosts) { + System.out.println(blogPost); + } + System.out.println("XXXXXXXXXXXX -- XXXXXXXXXXXX -- XXXXXXXXXXXX\r\n"); + } + +} diff --git a/mongodb/text-search/src/test/resources/logback.xml b/mongodb/text-search/src/test/resources/logback.xml new file mode 100644 index 00000000..61c86dd9 --- /dev/null +++ b/mongodb/text-search/src/test/resources/logback.xml @@ -0,0 +1,14 @@ + + + + + + %d %5p %40.40c:%4L - %m%n + + + + + + + + \ No newline at end of file