Introduce checkstyle plugin
- Based on https://github.com/spring-io/spring-javaformat - In this iteration, checkstyles are only enabled for spring-ai-core
This commit is contained in:
committed by
Mark Pollack
parent
33a72417e1
commit
8e758dbd00
@@ -1,4 +1,20 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
~ Copyright 2023-2024 the original author or authors.
|
||||
~
|
||||
~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~ you may not use this file except in compliance with the License.
|
||||
~ You may obtain a copy of the License at
|
||||
~
|
||||
~ https://www.apache.org/licenses/LICENSE-2.0
|
||||
~
|
||||
~ Unless required by applicable law or agreed to in writing, software
|
||||
~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~ See the License for the specific language governing permissions and
|
||||
~ limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
|
||||
@@ -1,18 +1,45 @@
|
||||
package org.springframework.ai.reader.markdown;
|
||||
/*
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.commonmark.node.*;
|
||||
import org.commonmark.parser.Parser;
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.document.DocumentReader;
|
||||
import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig;
|
||||
import org.springframework.core.io.DefaultResourceLoader;
|
||||
import org.springframework.core.io.Resource;
|
||||
package org.springframework.ai.reader.markdown;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.io.InputStreamReader;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
|
||||
import org.commonmark.node.AbstractVisitor;
|
||||
import org.commonmark.node.BlockQuote;
|
||||
import org.commonmark.node.Code;
|
||||
import org.commonmark.node.FencedCodeBlock;
|
||||
import org.commonmark.node.HardLineBreak;
|
||||
import org.commonmark.node.Heading;
|
||||
import org.commonmark.node.ListItem;
|
||||
import org.commonmark.node.Node;
|
||||
import org.commonmark.node.SoftLineBreak;
|
||||
import org.commonmark.node.Text;
|
||||
import org.commonmark.node.ThematicBreak;
|
||||
import org.commonmark.parser.Parser;
|
||||
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.document.DocumentReader;
|
||||
import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig;
|
||||
import org.springframework.core.io.DefaultResourceLoader;
|
||||
import org.springframework.core.io.Resource;
|
||||
|
||||
/**
|
||||
* Reads the given Markdown resource and groups headers, paragraphs, or text divided by
|
||||
* horizontal lines (depending on the
|
||||
@@ -58,10 +85,10 @@ public class MarkdownDocumentReader implements DocumentReader {
|
||||
*/
|
||||
@Override
|
||||
public List<Document> get() {
|
||||
try (var input = markdownResource.getInputStream()) {
|
||||
Node node = parser.parseReader(new InputStreamReader(input));
|
||||
try (var input = this.markdownResource.getInputStream()) {
|
||||
Node node = this.parser.parseReader(new InputStreamReader(input));
|
||||
|
||||
DocumentVisitor documentVisitor = new DocumentVisitor(config);
|
||||
DocumentVisitor documentVisitor = new DocumentVisitor(this.config);
|
||||
node.accept(documentVisitor);
|
||||
|
||||
return documentVisitor.getDocuments();
|
||||
@@ -90,7 +117,7 @@ public class MarkdownDocumentReader implements DocumentReader {
|
||||
|
||||
@Override
|
||||
public void visit(org.commonmark.node.Document document) {
|
||||
currentDocumentBuilder = Document.builder();
|
||||
this.currentDocumentBuilder = Document.builder();
|
||||
super.visit(document);
|
||||
}
|
||||
|
||||
@@ -102,7 +129,7 @@ public class MarkdownDocumentReader implements DocumentReader {
|
||||
|
||||
@Override
|
||||
public void visit(ThematicBreak thematicBreak) {
|
||||
if (config.horizontalRuleCreateDocument) {
|
||||
if (this.config.horizontalRuleCreateDocument) {
|
||||
buildAndFlush();
|
||||
}
|
||||
super.visit(thematicBreak);
|
||||
@@ -128,32 +155,32 @@ public class MarkdownDocumentReader implements DocumentReader {
|
||||
|
||||
@Override
|
||||
public void visit(BlockQuote blockQuote) {
|
||||
if (!config.includeBlockquote) {
|
||||
if (!this.config.includeBlockquote) {
|
||||
buildAndFlush();
|
||||
}
|
||||
|
||||
translateLineBreakToSpace();
|
||||
currentDocumentBuilder.withMetadata("category", "blockquote");
|
||||
this.currentDocumentBuilder.withMetadata("category", "blockquote");
|
||||
super.visit(blockQuote);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(Code code) {
|
||||
currentParagraphs.add(code.getLiteral());
|
||||
currentDocumentBuilder.withMetadata("category", "code_inline");
|
||||
this.currentParagraphs.add(code.getLiteral());
|
||||
this.currentDocumentBuilder.withMetadata("category", "code_inline");
|
||||
super.visit(code);
|
||||
}
|
||||
|
||||
@Override
|
||||
public void visit(FencedCodeBlock fencedCodeBlock) {
|
||||
if (!config.includeCodeBlock) {
|
||||
if (!this.config.includeCodeBlock) {
|
||||
buildAndFlush();
|
||||
}
|
||||
|
||||
translateLineBreakToSpace();
|
||||
currentParagraphs.add(fencedCodeBlock.getLiteral());
|
||||
currentDocumentBuilder.withMetadata("category", "code_block");
|
||||
currentDocumentBuilder.withMetadata("lang", fencedCodeBlock.getInfo());
|
||||
this.currentParagraphs.add(fencedCodeBlock.getLiteral());
|
||||
this.currentDocumentBuilder.withMetadata("category", "code_block");
|
||||
this.currentDocumentBuilder.withMetadata("lang", fencedCodeBlock.getInfo());
|
||||
|
||||
buildAndFlush();
|
||||
|
||||
@@ -163,11 +190,11 @@ public class MarkdownDocumentReader implements DocumentReader {
|
||||
@Override
|
||||
public void visit(Text text) {
|
||||
if (text.getParent() instanceof Heading heading) {
|
||||
currentDocumentBuilder.withMetadata("category", "header_%d".formatted(heading.getLevel()))
|
||||
this.currentDocumentBuilder.withMetadata("category", "header_%d".formatted(heading.getLevel()))
|
||||
.withMetadata("title", text.getLiteral());
|
||||
}
|
||||
else {
|
||||
currentParagraphs.add(text.getLiteral());
|
||||
this.currentParagraphs.add(text.getLiteral());
|
||||
}
|
||||
|
||||
super.visit(text);
|
||||
@@ -176,29 +203,29 @@ public class MarkdownDocumentReader implements DocumentReader {
|
||||
public List<Document> getDocuments() {
|
||||
buildAndFlush();
|
||||
|
||||
return documents;
|
||||
return this.documents;
|
||||
}
|
||||
|
||||
private void buildAndFlush() {
|
||||
if (!currentParagraphs.isEmpty()) {
|
||||
String content = String.join("", currentParagraphs);
|
||||
if (!this.currentParagraphs.isEmpty()) {
|
||||
String content = String.join("", this.currentParagraphs);
|
||||
|
||||
Document.Builder builder = currentDocumentBuilder.withContent(content);
|
||||
Document.Builder builder = this.currentDocumentBuilder.withContent(content);
|
||||
|
||||
config.additionalMetadata.forEach(builder::withMetadata);
|
||||
this.config.additionalMetadata.forEach(builder::withMetadata);
|
||||
|
||||
Document document = builder.build();
|
||||
|
||||
documents.add(document);
|
||||
this.documents.add(document);
|
||||
|
||||
currentParagraphs.clear();
|
||||
this.currentParagraphs.clear();
|
||||
}
|
||||
currentDocumentBuilder = Document.builder();
|
||||
this.currentDocumentBuilder = Document.builder();
|
||||
}
|
||||
|
||||
private void translateLineBreakToSpace() {
|
||||
if (!currentParagraphs.isEmpty()) {
|
||||
currentParagraphs.add(" ");
|
||||
if (!this.currentParagraphs.isEmpty()) {
|
||||
this.currentParagraphs.add(" ");
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
@@ -1,12 +1,28 @@
|
||||
/*
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.reader.markdown.config;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.reader.markdown.MarkdownDocumentReader;
|
||||
import org.springframework.util.Assert;
|
||||
|
||||
import java.util.HashMap;
|
||||
import java.util.Map;
|
||||
|
||||
/**
|
||||
* Common configuration for the {@link MarkdownDocumentReader}.
|
||||
*
|
||||
@@ -23,10 +39,10 @@ public class MarkdownDocumentReaderConfig {
|
||||
public final Map<String, Object> additionalMetadata;
|
||||
|
||||
public MarkdownDocumentReaderConfig(Builder builder) {
|
||||
horizontalRuleCreateDocument = builder.horizontalRuleCreateDocument;
|
||||
includeCodeBlock = builder.includeCodeBlock;
|
||||
includeBlockquote = builder.includeBlockquote;
|
||||
additionalMetadata = builder.additionalMetadata;
|
||||
this.horizontalRuleCreateDocument = builder.horizontalRuleCreateDocument;
|
||||
this.includeCodeBlock = builder.includeCodeBlock;
|
||||
this.includeBlockquote = builder.includeBlockquote;
|
||||
this.additionalMetadata = builder.additionalMetadata;
|
||||
}
|
||||
|
||||
/**
|
||||
|
||||
@@ -1,12 +1,29 @@
|
||||
package org.springframework.ai.reader.markdown;
|
||||
/*
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
* WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig;
|
||||
package org.springframework.ai.reader.markdown;
|
||||
|
||||
import java.util.List;
|
||||
import java.util.Map;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.reader.markdown.config.MarkdownDocumentReaderConfig;
|
||||
|
||||
import static org.assertj.core.api.Assertions.assertThat;
|
||||
import static org.assertj.core.groups.Tuple.tuple;
|
||||
|
||||
|
||||
@@ -1,4 +1,20 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
~ Copyright 2023-2024 the original author or authors.
|
||||
~
|
||||
~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~ you may not use this file except in compliance with the License.
|
||||
~ You may obtain a copy of the License at
|
||||
~
|
||||
~ https://www.apache.org/licenses/LICENSE-2.0
|
||||
~
|
||||
~ Unless required by applicable law or agreed to in writing, software
|
||||
~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~ See the License for the specific language governing permissions and
|
||||
~ limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@@ -13,9 +13,10 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.reader.pdf;
|
||||
|
||||
import java.awt.Rectangle;
|
||||
import java.awt.*;
|
||||
import java.io.IOException;
|
||||
import java.util.ArrayList;
|
||||
import java.util.List;
|
||||
@@ -24,9 +25,9 @@ import java.util.stream.Collectors;
|
||||
import org.apache.pdfbox.pdfparser.PDFParser;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
import org.apache.pdfbox.pdmodel.PDPage;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.document.DocumentReader;
|
||||
import org.springframework.ai.reader.pdf.config.PdfDocumentReaderConfig;
|
||||
@@ -46,22 +47,22 @@ import org.springframework.util.StringUtils;
|
||||
*/
|
||||
public class PagePdfDocumentReader implements DocumentReader {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private static final String PDF_PAGE_REGION = "pdfPageRegion";
|
||||
|
||||
public static final String METADATA_START_PAGE_NUMBER = "page_number";
|
||||
|
||||
public static final String METADATA_END_PAGE_NUMBER = "end_page_number";
|
||||
|
||||
public static final String METADATA_FILE_NAME = "file_name";
|
||||
|
||||
private static final String PDF_PAGE_REGION = "pdfPageRegion";
|
||||
|
||||
protected final PDDocument document;
|
||||
|
||||
private PdfDocumentReaderConfig config;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
protected String resourceFileName;
|
||||
|
||||
private PdfDocumentReaderConfig config;
|
||||
|
||||
public PagePdfDocumentReader(String resourceUrl) {
|
||||
this(new DefaultResourceLoader().getResource(resourceUrl));
|
||||
}
|
||||
@@ -103,15 +104,15 @@ public class PagePdfDocumentReader implements DocumentReader {
|
||||
|
||||
int totalPages = this.document.getDocumentCatalog().getPages().getCount();
|
||||
int logFrequency = totalPages > 10 ? totalPages / 10 : 1; // if less than 10
|
||||
// pages, print
|
||||
// each iteration
|
||||
// pages, print
|
||||
// each iteration
|
||||
int counter = 0;
|
||||
|
||||
PDPage lastPage = this.document.getDocumentCatalog().getPages().iterator().next();
|
||||
for (PDPage page : this.document.getDocumentCatalog().getPages()) {
|
||||
lastPage = page;
|
||||
if (counter % logFrequency == 0 && counter / logFrequency < 10) {
|
||||
logger.info("Processing PDF page: {}", (counter + 1));
|
||||
this.logger.info("Processing PDF page: {}", (counter + 1));
|
||||
}
|
||||
counter++;
|
||||
|
||||
@@ -153,7 +154,7 @@ public class PagePdfDocumentReader implements DocumentReader {
|
||||
readDocuments.add(toDocument(lastPage, pageTextGroupList.stream().collect(Collectors.joining()),
|
||||
startPageNumber, pageNumber));
|
||||
}
|
||||
logger.info("Processing {} pages", totalPages);
|
||||
this.logger.info("Processing {} pages", totalPages);
|
||||
return readDocuments;
|
||||
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@@ -13,18 +13,19 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.reader.pdf;
|
||||
|
||||
import java.awt.Rectangle;
|
||||
import java.awt.*;
|
||||
import java.util.ArrayList;
|
||||
import java.util.Iterator;
|
||||
import java.util.List;
|
||||
|
||||
import org.apache.pdfbox.pdfparser.PDFParser;
|
||||
import org.apache.pdfbox.pdmodel.PDDocument;
|
||||
|
||||
import org.slf4j.Logger;
|
||||
import org.slf4j.LoggerFactory;
|
||||
|
||||
import org.springframework.ai.document.Document;
|
||||
import org.springframework.ai.document.DocumentReader;
|
||||
import org.springframework.ai.reader.pdf.config.ParagraphManager;
|
||||
@@ -48,8 +49,6 @@ import org.springframework.util.StringUtils;
|
||||
*/
|
||||
public class ParagraphPdfDocumentReader implements DocumentReader {
|
||||
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
// Constants for metadata keys
|
||||
private static final String METADATA_START_PAGE = "page_number";
|
||||
|
||||
@@ -61,14 +60,16 @@ public class ParagraphPdfDocumentReader implements DocumentReader {
|
||||
|
||||
private static final String METADATA_FILE_NAME = "file_name";
|
||||
|
||||
private final ParagraphManager paragraphTextExtractor;
|
||||
|
||||
protected final PDDocument document;
|
||||
|
||||
private PdfDocumentReaderConfig config;
|
||||
private final Logger logger = LoggerFactory.getLogger(getClass());
|
||||
|
||||
private final ParagraphManager paragraphTextExtractor;
|
||||
|
||||
protected String resourceFileName;
|
||||
|
||||
private PdfDocumentReaderConfig config;
|
||||
|
||||
/**
|
||||
* Constructs a ParagraphPdfDocumentReader using a resource URL.
|
||||
* @param resourceUrl The URL of the PDF resource.
|
||||
@@ -132,7 +133,7 @@ public class ParagraphPdfDocumentReader implements DocumentReader {
|
||||
List<Document> documents = new ArrayList<>(paragraphs.size());
|
||||
|
||||
if (!CollectionUtils.isEmpty(paragraphs)) {
|
||||
logger.info("Start processing paragraphs from PDF");
|
||||
this.logger.info("Start processing paragraphs from PDF");
|
||||
Iterator<Paragraph> itr = paragraphs.iterator();
|
||||
|
||||
var current = itr.next();
|
||||
@@ -151,7 +152,7 @@ public class ParagraphPdfDocumentReader implements DocumentReader {
|
||||
}
|
||||
}
|
||||
}
|
||||
logger.info("End processing paragraphs from PDF");
|
||||
this.logger.info("End processing paragraphs from PDF");
|
||||
return documents;
|
||||
}
|
||||
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@@ -13,15 +13,16 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.reader.pdf.aot;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
import org.springframework.aot.hint.RuntimeHints;
|
||||
import org.springframework.aot.hint.RuntimeHintsRegistrar;
|
||||
import org.springframework.core.io.support.PathMatchingResourcePatternResolver;
|
||||
|
||||
import java.io.IOException;
|
||||
import java.util.Set;
|
||||
|
||||
/**
|
||||
* The PdfReaderRuntimeHints class is responsible for registering runtime hints for PDFBox
|
||||
* resources.
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@@ -13,6 +13,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.reader.pdf.config;
|
||||
|
||||
import java.io.IOException;
|
||||
@@ -39,34 +40,6 @@ import org.springframework.util.CollectionUtils;
|
||||
*/
|
||||
public class ParagraphManager {
|
||||
|
||||
/**
|
||||
* Represents a document paragraph metadata and hierarchy.
|
||||
*
|
||||
* @param parent Parent paragraph that will contain a children paragraphs.
|
||||
* @param title Paragraph title as it appears in the PDF document.
|
||||
* @param level The TOC deepness level for this paragraph. The root is at level 0.
|
||||
* @param startPageNumber The page number in the PDF where this paragraph begins.
|
||||
* @param endPageNumber The page number in the PDF where this paragraph ends.
|
||||
* @param children Sub-paragraphs for this paragraph.
|
||||
*/
|
||||
public record Paragraph(Paragraph parent, String title, int level, int startPageNumber, int endPageNumber,
|
||||
int position, List<Paragraph> children) {
|
||||
|
||||
public Paragraph(Paragraph parent, String title, int level, int startPageNumber, int endPageNumber,
|
||||
int position) {
|
||||
this(parent, title, level, startPageNumber, endPageNumber, position, new ArrayList<>());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String indent = (level < 0) ? "" : new String(new char[level * 2]).replace('\0', ' ');
|
||||
|
||||
return indent + " " + level + ") " + title + " [" + startPageNumber + "," + endPageNumber + "], children = "
|
||||
+ children.size() + ", pos = " + position;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
/**
|
||||
* Root of the paragraphs tree.
|
||||
*/
|
||||
@@ -90,7 +63,7 @@ public class ParagraphManager {
|
||||
new Paragraph(null, "root", -1, 1, this.document.getNumberOfPages(), 0),
|
||||
this.document.getDocumentCatalog().getDocumentOutline(), 0);
|
||||
|
||||
printParagraph(rootParagraph, System.out);
|
||||
printParagraph(this.rootParagraph, System.out);
|
||||
}
|
||||
catch (Exception e) {
|
||||
throw new RuntimeException(e);
|
||||
@@ -203,4 +176,32 @@ public class ParagraphManager {
|
||||
return resultList;
|
||||
}
|
||||
|
||||
/**
|
||||
* Represents a document paragraph metadata and hierarchy.
|
||||
*
|
||||
* @param parent Parent paragraph that will contain a children paragraphs.
|
||||
* @param title Paragraph title as it appears in the PDF document.
|
||||
* @param level The TOC deepness level for this paragraph. The root is at level 0.
|
||||
* @param startPageNumber The page number in the PDF where this paragraph begins.
|
||||
* @param endPageNumber The page number in the PDF where this paragraph ends.
|
||||
* @param children Sub-paragraphs for this paragraph.
|
||||
*/
|
||||
public record Paragraph(Paragraph parent, String title, int level, int startPageNumber, int endPageNumber,
|
||||
int position, List<Paragraph> children) {
|
||||
|
||||
public Paragraph(Paragraph parent, String title, int level, int startPageNumber, int endPageNumber,
|
||||
int position) {
|
||||
this(parent, title, level, startPageNumber, endPageNumber, position, new ArrayList<>());
|
||||
}
|
||||
|
||||
@Override
|
||||
public String toString() {
|
||||
String indent = (this.level < 0) ? "" : new String(new char[this.level * 2]).replace('\0', ' ');
|
||||
|
||||
return indent + " " + this.level + ") " + this.title + " [" + this.startPageNumber + ","
|
||||
+ this.endPageNumber + "], children = " + this.children.size() + ", pos = " + this.position;
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@@ -13,6 +13,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.reader.pdf.config;
|
||||
|
||||
import org.springframework.ai.reader.ExtractedTextFormatter;
|
||||
@@ -40,6 +41,14 @@ public class PdfDocumentReaderConfig {
|
||||
|
||||
public final ExtractedTextFormatter pageExtractedTextFormatter;
|
||||
|
||||
private PdfDocumentReaderConfig(PdfDocumentReaderConfig.Builder builder) {
|
||||
this.pagesPerDocument = builder.pagesPerDocument;
|
||||
this.pageBottomMargin = builder.pageBottomMargin;
|
||||
this.pageTopMargin = builder.pageTopMargin;
|
||||
this.pageExtractedTextFormatter = builder.pageExtractedTextFormatter;
|
||||
this.reversedParagraphPosition = builder.reversedParagraphPosition;
|
||||
}
|
||||
|
||||
/**
|
||||
* Start building a new configuration.
|
||||
* @return The entry point for creating a new configuration.
|
||||
@@ -56,14 +65,6 @@ public class PdfDocumentReaderConfig {
|
||||
return builder().build();
|
||||
}
|
||||
|
||||
private PdfDocumentReaderConfig(PdfDocumentReaderConfig.Builder builder) {
|
||||
this.pagesPerDocument = builder.pagesPerDocument;
|
||||
this.pageBottomMargin = builder.pageBottomMargin;
|
||||
this.pageTopMargin = builder.pageTopMargin;
|
||||
this.pageExtractedTextFormatter = builder.pageExtractedTextFormatter;
|
||||
this.reversedParagraphPosition = builder.reversedParagraphPosition;
|
||||
}
|
||||
|
||||
public static class Builder {
|
||||
|
||||
private int pagesPerDocument = 1;
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@@ -180,8 +180,9 @@ public class ForkPDFLayoutTextStripper extends PDFTextStripper {
|
||||
double height = textPosition.getHeight();
|
||||
int numberOfLines = (int) (Math.floor(textYPosition - previousTextYPosition) / height);
|
||||
numberOfLines = Math.max(1, numberOfLines - 1); // exclude current new line
|
||||
if (DEBUG)
|
||||
if (DEBUG) {
|
||||
System.out.println(height + " " + numberOfLines);
|
||||
}
|
||||
return numberOfLines;
|
||||
}
|
||||
else {
|
||||
@@ -191,7 +192,7 @@ public class ForkPDFLayoutTextStripper extends PDFTextStripper {
|
||||
|
||||
private TextLine addNewLine() {
|
||||
TextLine textLine = new TextLine(this.getCurrentPageWidth());
|
||||
textLineList.add(textLine);
|
||||
this.textLineList.add(textLine);
|
||||
return textLine;
|
||||
}
|
||||
|
||||
@@ -248,7 +249,7 @@ class TextLine {
|
||||
}
|
||||
|
||||
public String getLine() {
|
||||
return line;
|
||||
return this.line;
|
||||
}
|
||||
|
||||
private int computeIndexForCharacter(final Character character) {
|
||||
@@ -313,7 +314,7 @@ class TextLine {
|
||||
|
||||
private void completeLineWithSpaces() {
|
||||
for (int i = 0; i < this.getLineLength(); ++i) {
|
||||
line += SPACE_CHARACTER;
|
||||
this.line += SPACE_CHARACTER;
|
||||
}
|
||||
}
|
||||
|
||||
@@ -350,8 +351,9 @@ class Character {
|
||||
this.isFirstCharacterOfAWord = isFirstCharacterOfAWord;
|
||||
this.isCharacterAtTheBeginningOfNewLine = isCharacterAtTheBeginningOfNewLine;
|
||||
this.isCharacterCloseToPreviousWord = isCharacterPartOfASentence;
|
||||
if (ForkPDFLayoutTextStripper.DEBUG)
|
||||
if (ForkPDFLayoutTextStripper.DEBUG) {
|
||||
System.out.println(this.toString());
|
||||
}
|
||||
}
|
||||
|
||||
public char getCharacterValue() {
|
||||
@@ -384,14 +386,14 @@ class Character {
|
||||
|
||||
public String toString() {
|
||||
String toString = "";
|
||||
toString += index;
|
||||
toString += this.index;
|
||||
toString += " ";
|
||||
toString += characterValue;
|
||||
toString += " isCharacterPartOfPreviousWord=" + isCharacterPartOfPreviousWord;
|
||||
toString += " isFirstCharacterOfAWord=" + isFirstCharacterOfAWord;
|
||||
toString += " isCharacterAtTheBeginningOfNewLine=" + isCharacterAtTheBeginningOfNewLine;
|
||||
toString += " isCharacterPartOfASentence=" + isCharacterCloseToPreviousWord;
|
||||
toString += " isCharacterCloseToPreviousWord=" + isCharacterCloseToPreviousWord;
|
||||
toString += this.characterValue;
|
||||
toString += " isCharacterPartOfPreviousWord=" + this.isCharacterPartOfPreviousWord;
|
||||
toString += " isFirstCharacterOfAWord=" + this.isFirstCharacterOfAWord;
|
||||
toString += " isCharacterAtTheBeginningOfNewLine=" + this.isCharacterAtTheBeginningOfNewLine;
|
||||
toString += " isCharacterPartOfASentence=" + this.isCharacterCloseToPreviousWord;
|
||||
toString += " isCharacterCloseToPreviousWord=" + this.isCharacterCloseToPreviousWord;
|
||||
return toString;
|
||||
}
|
||||
|
||||
@@ -424,12 +426,12 @@ class CharacterFactory {
|
||||
this.isCharacterCloseToPreviousWord = this.isCharacterCloseToPreviousWord(textPosition);
|
||||
char character = this.getCharacterFromTextPosition(textPosition);
|
||||
int index = (int) textPosition.getX() / ForkPDFLayoutTextStripper.OUTPUT_SPACE_CHARACTER_WIDTH_IN_PT;
|
||||
return new Character(character, index, isCharacterPartOfPreviousWord, isFirstCharacterOfAWord,
|
||||
isCharacterAtTheBeginningOfNewLine, isCharacterCloseToPreviousWord);
|
||||
return new Character(character, index, this.isCharacterPartOfPreviousWord, this.isFirstCharacterOfAWord,
|
||||
this.isCharacterAtTheBeginningOfNewLine, this.isCharacterCloseToPreviousWord);
|
||||
}
|
||||
|
||||
private boolean isCharacterAtTheBeginningOfNewLine(final TextPosition textPosition) {
|
||||
if (!firstCharacterOfLineFound) {
|
||||
if (!this.firstCharacterOfLineFound) {
|
||||
return true;
|
||||
}
|
||||
TextPosition previousTextPosition = this.getPreviousTextPosition();
|
||||
@@ -438,18 +440,18 @@ class CharacterFactory {
|
||||
}
|
||||
|
||||
private boolean isFirstCharacterOfAWord(final TextPosition textPosition) {
|
||||
if (!firstCharacterOfLineFound) {
|
||||
if (!this.firstCharacterOfLineFound) {
|
||||
return true;
|
||||
}
|
||||
double numberOfSpaces = this.numberOfSpacesBetweenTwoCharacters(previousTextPosition, textPosition);
|
||||
double numberOfSpaces = this.numberOfSpacesBetweenTwoCharacters(this.previousTextPosition, textPosition);
|
||||
return (numberOfSpaces > 1) || this.isCharacterAtTheBeginningOfNewLine(textPosition);
|
||||
}
|
||||
|
||||
private boolean isCharacterCloseToPreviousWord(final TextPosition textPosition) {
|
||||
if (!firstCharacterOfLineFound) {
|
||||
if (!this.firstCharacterOfLineFound) {
|
||||
return false;
|
||||
}
|
||||
double numberOfSpaces = this.numberOfSpacesBetweenTwoCharacters(previousTextPosition, textPosition);
|
||||
double numberOfSpaces = this.numberOfSpacesBetweenTwoCharacters(this.previousTextPosition, textPosition);
|
||||
return (numberOfSpaces > 1 && numberOfSpaces <= ForkPDFLayoutTextStripper.OUTPUT_SPACE_CHARACTER_WIDTH_IN_PT);
|
||||
}
|
||||
|
||||
@@ -485,4 +487,4 @@ class CharacterFactory {
|
||||
this.previousTextPosition = previousTextPosition;
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@@ -13,6 +13,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.reader.pdf.layout;
|
||||
|
||||
import java.awt.geom.Rectangle2D;
|
||||
@@ -70,8 +71,8 @@ public class PDFLayoutTextStripperByArea extends ForkPDFLayoutTextStripper {
|
||||
* java coordinates (y == 0 is top), not PDF coordinates (y == 0 is bottom).
|
||||
*/
|
||||
public void addRegion(String regionName, Rectangle2D rect) {
|
||||
regions.add(regionName);
|
||||
regionArea.put(regionName, rect);
|
||||
this.regions.add(regionName);
|
||||
this.regionArea.put(regionName, rect);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -80,8 +81,8 @@ public class PDFLayoutTextStripperByArea extends ForkPDFLayoutTextStripper {
|
||||
* @param regionName The name of the region to delete.
|
||||
*/
|
||||
public void removeRegion(String regionName) {
|
||||
regions.remove(regionName);
|
||||
regionArea.remove(regionName);
|
||||
this.regions.remove(regionName);
|
||||
this.regionArea.remove(regionName);
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -89,7 +90,7 @@ public class PDFLayoutTextStripperByArea extends ForkPDFLayoutTextStripper {
|
||||
* @return A list of java.lang.String objects to identify the region names.
|
||||
*/
|
||||
public List<String> getRegions() {
|
||||
return regions;
|
||||
return this.regions;
|
||||
}
|
||||
|
||||
/**
|
||||
@@ -98,7 +99,7 @@ public class PDFLayoutTextStripperByArea extends ForkPDFLayoutTextStripper {
|
||||
* @return The text that was identified in that region.
|
||||
*/
|
||||
public String getTextForRegion(String regionName) {
|
||||
StringWriter text = regionText.get(regionName);
|
||||
StringWriter text = this.regionText.get(regionName);
|
||||
return text.toString();
|
||||
}
|
||||
|
||||
@@ -108,14 +109,14 @@ public class PDFLayoutTextStripperByArea extends ForkPDFLayoutTextStripper {
|
||||
* @throws IOException If there is an error while extracting text.
|
||||
*/
|
||||
public void extractRegions(PDPage page) throws IOException {
|
||||
for (String regionName : regions) {
|
||||
for (String regionName : this.regions) {
|
||||
setStartPage(getCurrentPageNo());
|
||||
setEndPage(getCurrentPageNo());
|
||||
// reset the stored text for the region so this class can be reused.
|
||||
ArrayList<List<TextPosition>> regionCharactersByArticle = new ArrayList<List<TextPosition>>();
|
||||
regionCharactersByArticle.add(new ArrayList<TextPosition>());
|
||||
regionCharacterList.put(regionName, regionCharactersByArticle);
|
||||
regionText.put(regionName, new StringWriter());
|
||||
this.regionCharacterList.put(regionName, regionCharactersByArticle);
|
||||
this.regionText.put(regionName, new StringWriter());
|
||||
}
|
||||
|
||||
if (page.hasContents()) {
|
||||
@@ -128,10 +129,10 @@ public class PDFLayoutTextStripperByArea extends ForkPDFLayoutTextStripper {
|
||||
*/
|
||||
@Override
|
||||
protected void processTextPosition(TextPosition text) {
|
||||
for (Map.Entry<String, Rectangle2D> regionAreaEntry : regionArea.entrySet()) {
|
||||
for (Map.Entry<String, Rectangle2D> regionAreaEntry : this.regionArea.entrySet()) {
|
||||
Rectangle2D rect = regionAreaEntry.getValue();
|
||||
if (rect.contains(text.getX(), text.getY())) {
|
||||
charactersByArticle = regionCharacterList.get(regionAreaEntry.getKey());
|
||||
this.charactersByArticle = this.regionCharacterList.get(regionAreaEntry.getKey());
|
||||
super.processTextPosition(text);
|
||||
}
|
||||
}
|
||||
@@ -143,9 +144,9 @@ public class PDFLayoutTextStripperByArea extends ForkPDFLayoutTextStripper {
|
||||
*/
|
||||
@Override
|
||||
protected void writePage() throws IOException {
|
||||
for (String region : regionArea.keySet()) {
|
||||
charactersByArticle = regionCharacterList.get(region);
|
||||
output = regionText.get(region);
|
||||
for (String region : this.regionArea.keySet()) {
|
||||
this.charactersByArticle = this.regionCharacterList.get(region);
|
||||
this.output = this.regionText.get(region);
|
||||
super.writePage();
|
||||
}
|
||||
}
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@@ -13,6 +13,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.reader.pdf;
|
||||
|
||||
import java.util.List;
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@@ -13,6 +13,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.reader.pdf;
|
||||
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@@ -13,10 +13,12 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.reader.pdf.aot;
|
||||
|
||||
import org.assertj.core.api.Assertions;
|
||||
import org.junit.jupiter.api.Test;
|
||||
|
||||
import org.springframework.aot.hint.RuntimeHints;
|
||||
|
||||
import static org.springframework.aot.hint.predicate.RuntimeHintsPredicates.resource;
|
||||
|
||||
@@ -1,4 +1,20 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!--
|
||||
~ Copyright 2023-2024 the original author or authors.
|
||||
~
|
||||
~ Licensed under the Apache License, Version 2.0 (the "License");
|
||||
~ you may not use this file except in compliance with the License.
|
||||
~ You may obtain a copy of the License at
|
||||
~
|
||||
~ https://www.apache.org/licenses/LICENSE-2.0
|
||||
~
|
||||
~ Unless required by applicable law or agreed to in writing, software
|
||||
~ distributed under the License is distributed on an "AS IS" BASIS,
|
||||
~ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
|
||||
~ See the License for the specific language governing permissions and
|
||||
~ limitations under the License.
|
||||
-->
|
||||
|
||||
<project xmlns="http://maven.apache.org/POM/4.0.0"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
|
||||
<modelVersion>4.0.0</modelVersion>
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@@ -13,6 +13,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.reader.tika;
|
||||
|
||||
import java.io.IOException;
|
||||
|
||||
@@ -1,11 +1,11 @@
|
||||
/*
|
||||
* Copyright 2023 - 2024 the original author or authors.
|
||||
* Copyright 2023-2024 the original author or authors.
|
||||
*
|
||||
* Licensed under the Apache License, Version 2.0 (the "License");
|
||||
* you may not use this file except in compliance with the License.
|
||||
* You may obtain a copy of the License at
|
||||
*
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
* https://www.apache.org/licenses/LICENSE-2.0
|
||||
*
|
||||
* Unless required by applicable law or agreed to in writing, software
|
||||
* distributed under the License is distributed on an "AS IS" BASIS,
|
||||
@@ -13,6 +13,7 @@
|
||||
* See the License for the specific language governing permissions and
|
||||
* limitations under the License.
|
||||
*/
|
||||
|
||||
package org.springframework.ai.reader.tika;
|
||||
|
||||
import org.junit.jupiter.params.ParameterizedTest;
|
||||
|
||||
Reference in New Issue
Block a user