452 lines
19 KiB
HTML
452 lines
19 KiB
HTML
<!DOCTYPE html>
|
|
<html lang="en">
|
|
<head>
|
|
<meta charset="UTF-8">
|
|
<!--[if IE]><meta http-equiv="X-UA-Compatible" content="IE=edge"><![endif]-->
|
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
|
<meta name="generator" content="Asciidoctor 1.5.8">
|
|
<title>Google Cloud Vision</title>
|
|
<link rel="stylesheet" href="css/spring.css">
|
|
<link rel="stylesheet" href="https://cdnjs.cloudflare.com/ajax/libs/font-awesome/4.7.0/css/font-awesome.min.css">
|
|
|
|
<style>
|
|
.hidden {
|
|
display: none;
|
|
}
|
|
|
|
.switch {
|
|
border-width: 1px 1px 0 1px;
|
|
border-style: solid;
|
|
border-color: #7a2518;
|
|
display: inline-block;
|
|
}
|
|
|
|
.switch--item {
|
|
padding: 10px;
|
|
background-color: #ffffff;
|
|
color: #7a2518;
|
|
display: inline-block;
|
|
cursor: pointer;
|
|
}
|
|
|
|
.switch--item:not(:first-child) {
|
|
border-width: 0 0 0 1px;
|
|
border-style: solid;
|
|
border-color: #7a2518;
|
|
}
|
|
|
|
.switch--item.selected {
|
|
background-color: #7a2519;
|
|
color: #ffffff;
|
|
}
|
|
</style>
|
|
<script src="https://cdnjs.cloudflare.com/ajax/libs/zepto/1.2.0/zepto.min.js"></script>
|
|
<script type="text/javascript">
|
|
function addBlockSwitches() {
|
|
$('.primary').each(function() {
|
|
primary = $(this);
|
|
createSwitchItem(primary, createBlockSwitch(primary)).item.addClass("selected");
|
|
primary.children('.title').remove();
|
|
});
|
|
$('.secondary').each(function(idx, node) {
|
|
secondary = $(node);
|
|
primary = findPrimary(secondary);
|
|
switchItem = createSwitchItem(secondary, primary.children('.switch'));
|
|
switchItem.content.addClass('hidden');
|
|
findPrimary(secondary).append(switchItem.content);
|
|
secondary.remove();
|
|
});
|
|
}
|
|
|
|
function createBlockSwitch(primary) {
|
|
blockSwitch = $('<div class="switch"></div>');
|
|
primary.prepend(blockSwitch);
|
|
return blockSwitch;
|
|
}
|
|
|
|
function findPrimary(secondary) {
|
|
candidate = secondary.prev();
|
|
while (!candidate.is('.primary')) {
|
|
candidate = candidate.prev();
|
|
}
|
|
return candidate;
|
|
}
|
|
|
|
function createSwitchItem(block, blockSwitch) {
|
|
blockName = block.children('.title').text();
|
|
content = block.children('.content').first().append(block.next('.colist'));
|
|
item = $('<div class="switch--item">' + blockName + '</div>');
|
|
item.on('click', '', content, function(e) {
|
|
$(this).addClass('selected');
|
|
$(this).siblings().removeClass('selected');
|
|
e.data.siblings('.content').addClass('hidden');
|
|
e.data.removeClass('hidden');
|
|
});
|
|
blockSwitch.append(item);
|
|
return {'item': item, 'content': content};
|
|
}
|
|
|
|
$(addBlockSwitches);
|
|
</script>
|
|
|
|
</head>
|
|
<body class="book toc2 toc-left">
|
|
<div id="header">
|
|
<div id="toc" class="toc2">
|
|
<div id="toctitle">Table of Contents</div>
|
|
<ul class="sectlevel1">
|
|
<li><a href="#_google_cloud_vision">Google Cloud Vision</a>
|
|
<ul class="sectlevel2">
|
|
<li><a href="#_dependency_setup">Dependency Setup</a></li>
|
|
<li><a href="#_image_analysis">Image Analysis</a></li>
|
|
<li><a href="#_document_ocr_template">Document OCR Template</a></li>
|
|
<li><a href="#_configuration">Configuration</a></li>
|
|
<li><a href="#_sample">Sample</a></li>
|
|
</ul>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
<div id="content">
|
|
<div class="sect1">
|
|
<h2 id="_google_cloud_vision"><a class="link" href="#_google_cloud_vision">Google Cloud Vision</a></h2>
|
|
<div class="sectionbody">
|
|
<div class="paragraph">
|
|
<p>The <a href="https://cloud.google.com/vision/">Google Cloud Vision API</a> allows users to leverage machine learning algorithms for processing images and documents including: image classification, face detection, text extraction, optical character recognition, and others.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>Spring Cloud GCP provides:</p>
|
|
</div>
|
|
<div class="ulist">
|
|
<ul>
|
|
<li>
|
|
<p>A convenience starter which automatically configures authentication settings and client objects needed to begin using the <a href="https://cloud.google.com/vision/">Google Cloud Vision API</a>.</p>
|
|
</li>
|
|
<li>
|
|
<p><code>CloudVisionTemplate</code> which simplifies interactions with the Cloud Vision API.</p>
|
|
<div class="ulist">
|
|
<ul>
|
|
<li>
|
|
<p>Allows you to easily send images to the API as Spring Resources.</p>
|
|
</li>
|
|
<li>
|
|
<p>Offers convenience methods for common operations, such as classifying content of an image.</p>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</li>
|
|
<li>
|
|
<p><code>DocumentOcrTemplate</code> which offers convenient methods for running <a href="https://cloud.google.com/vision/docs/pdf">optical character recognition (OCR)</a> on PDF and TIFF documents.</p>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_dependency_setup"><a class="link" href="#_dependency_setup">Dependency Setup</a></h3>
|
|
<div class="paragraph">
|
|
<p>To begin using this library, add the <code>spring-cloud-gcp-starter-vision</code> artifact to your project.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>Maven coordinates, using <a href="getting-started.html#_bill_of_materials">Spring Cloud GCP BOM</a>:</p>
|
|
</div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="highlightjs highlight"><code class="language-xml hljs" data-lang="xml"><dependency>
|
|
<groupId>org.springframework.cloud</groupId>
|
|
<artifactId>spring-cloud-gcp-starter-vision</artifactId>
|
|
</dependency></code></pre>
|
|
</div>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>Gradle coordinates:</p>
|
|
</div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="highlightjs highlight"><code>dependencies {
|
|
compile group: 'org.springframework.cloud', name: 'spring-cloud-gcp-starter-vision'
|
|
}</code></pre>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_cloud_vision_ocr_dependencies"><a class="link" href="#_cloud_vision_ocr_dependencies">Cloud Vision OCR Dependencies</a></h4>
|
|
<div class="paragraph">
|
|
<p>If you are interested in applying optical character recognition (OCR) on documents for your project, you’ll need to add both <code>spring-cloud-gcp-starter-vision</code> and <code>spring-cloud-gcp-starter-storage</code> to your dependencies.
|
|
The storage starter is necessary because the Cloud Vision API will process your documents and write OCR output files all within your Google Cloud Storage buckets.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>Maven coordinates using <a href="getting-started.html#_bill_of_materials">Spring Cloud GCP BOM</a>:</p>
|
|
</div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="highlightjs highlight"><code class="language-xml hljs" data-lang="xml"><dependency>
|
|
<groupId>org.springframework.cloud</groupId>
|
|
<artifactId>spring-cloud-gcp-starter-vision</artifactId>
|
|
</dependency>
|
|
<dependency>
|
|
<groupId>org.springframework.cloud</groupId>
|
|
<artifactId>spring-cloud-gcp-starter-storage</artifactId>
|
|
</dependency></code></pre>
|
|
</div>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>Gradle coordinates:</p>
|
|
</div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="highlightjs highlight"><code>dependencies {
|
|
compile group: 'org.springframework.cloud', name: 'spring-cloud-gcp-starter-vision'
|
|
compile group: 'org.springframework.cloud', name: 'spring-cloud-gcp-starter-storage'
|
|
}</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_image_analysis"><a class="link" href="#_image_analysis">Image Analysis</a></h3>
|
|
<div class="paragraph">
|
|
<p>The <code>CloudVisionTemplate</code> allows you to easily analyze images; it provides the following method for interfacing with Cloud Vision:</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p><code>public AnnotateImageResponse analyzeImage(Resource imageResource, Feature.Type…​ featureTypes)</code></p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p><strong>Parameters:</strong></p>
|
|
</div>
|
|
<div class="ulist">
|
|
<ul>
|
|
<li>
|
|
<p><code>Resource imageResource</code> refers to the Spring Resource of the image object you wish to analyze.
|
|
The Google Cloud Vision documentation provides a <a href="https://cloud.google.com/vision/docs/supported-files">list of the image types that they support</a>.</p>
|
|
</li>
|
|
<li>
|
|
<p><code>Feature.Type…​ featureTypes</code> refers to a var-arg array of Cloud Vision Features to extract from the image.
|
|
A feature refers to a kind of image analysis one wishes to perform on an image, such as label detection, OCR recognition, facial detection, etc.
|
|
One may specify multiple features to analyze within one request.
|
|
A full list of Cloud Vision Features is provided in the <a href="https://cloud.google.com/vision/docs/features">Cloud Vision Feature docs</a>.</p>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p><strong>Returns:</strong></p>
|
|
</div>
|
|
<div class="ulist">
|
|
<ul>
|
|
<li>
|
|
<p><a href="https://cloud.google.com/vision/docs/reference/rpc/google.cloud.vision.v1#google.cloud.vision.v1.AnnotateImageResponse"><code>AnnotateImageResponse</code></a> contains the results of all the feature analyses that were specified in the request.
|
|
For each feature type that you provide in the request, <code>AnnotateImageResponse</code> provides a getter method to get the result of that feature analysis.
|
|
For example, if you analyzed an image using the <code>LABEL_DETECTION</code> feature, you would retrieve the results from the response using <code>annotateImageResponse.getLabelAnnotationsList()</code>.</p>
|
|
<div class="paragraph">
|
|
<p><code>AnnotateImageResponse</code> is provided by the Google Cloud Vision libraries; please consult the <a href="https://cloud.google.com/vision/docs/reference/rpc/google.cloud.vision.v1#google.cloud.vision.v1.AnnotateImageResponse">RPC reference</a> or <a href="https://googleapis.github.io/googleapis/java/all/latest/apidocs/com/google/cloud/vision/v1/AnnotateImageResponse.html">Javadoc</a> for more details.
|
|
Additionally, you may consult the <a href="https://cloud.google.com/vision/docs/">Cloud Vision docs</a> to familiarize yourself with the concepts and features of the API.</p>
|
|
</div>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_detect_image_labels_example"><a class="link" href="#_detect_image_labels_example">Detect Image Labels Example</a></h4>
|
|
<div class="paragraph">
|
|
<p><a href="https://cloud.google.com/vision/docs/detecting-labels">Image labeling</a> refers to producing labels that describe the contents of an image.
|
|
Below is a code sample of how this is done using the Cloud Vision Spring Template.</p>
|
|
</div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre class="highlightjs highlight"><code class="language-java hljs" data-lang="java">@Autowired
|
|
private ResourceLoader resourceLoader;
|
|
|
|
@Autowired
|
|
private CloudVisionTemplate cloudVisionTemplate;
|
|
|
|
public void processImage() {
|
|
Resource imageResource = this.resourceLoader.getResource("my_image.jpg");
|
|
AnnotateImageResponse response = this.cloudVisionTemplate.analyzeImage(
|
|
imageResource, Type.LABEL_DETECTION);
|
|
System.out.println("Image Classification results: " + response.getLabelAnnotationsList());
|
|
}</code></pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_document_ocr_template"><a class="link" href="#_document_ocr_template">Document OCR Template</a></h3>
|
|
<div class="paragraph">
|
|
<p>The <code>DocumentOcrTemplate</code> allows you to easily run <a href="https://cloud.google.com/vision/docs/pdf">optical character recognition (OCR)</a> on your PDF and TIFF documents stored in your Google Storage bucket.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>First, you will need to create a bucket in <a href="https://console.cloud.google.com/storage">Google Cloud Storage</a> and <a href="https://cloud.google.com/storage/docs/uploading-objects#storage-upload-object-java">upload the documents you wish to process into the bucket</a>.</p>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_running_ocr_on_a_document"><a class="link" href="#_running_ocr_on_a_document">Running OCR on a Document</a></h4>
|
|
<div class="paragraph">
|
|
<p>When OCR is run on a document, the Cloud Vision APIs will output a collection of OCR output files in JSON which describe the text content, bounding rectangles of words and letters, and other information about the document.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>The <code>DocumentOcrTemplate</code> provides the following method for running OCR on a document saved in Google Cloud Storage:</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p><code>ListenableFuture<DocumentOcrResultSet> runOcrForDocument(GoogleStorageLocation document, GoogleStorageLocation outputFilePathPrefix)</code></p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p>The method allows you to specify the location of the document and the output location for where all the JSON output files will be saved in Google Cloud Storage.
|
|
It returns a <code>ListenableFuture</code> containing <code>DocumentOcrResultSet</code> which contains the OCR content of the document.</p>
|
|
</div>
|
|
<div class="admonitionblock note">
|
|
<table>
|
|
<tr>
|
|
<td class="icon">
|
|
<i class="fa icon-note" title="Note"></i>
|
|
</td>
|
|
<td class="content">
|
|
Running OCR on a document is an operation that can take between several minutes to several hours depending on how large the document is.
|
|
It is recommended to register callbacks to the returned ListenableFuture or ignore it and process the JSON output files at a later point in time using <code>readOcrOutputFile</code> or <code>readOcrOutputFileSet</code>.
|
|
</td>
|
|
</tr>
|
|
</table>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_running_ocr_example"><a class="link" href="#_running_ocr_example">Running OCR Example</a></h4>
|
|
<div class="paragraph">
|
|
<p>Below is a code snippet of how to run OCR on a document stored in a Google Storage bucket and read the text in the first page of the document.</p>
|
|
</div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre>@Autowired
|
|
private DocumentOcrTemplate documentOcrTemplate;
|
|
|
|
public void runOcrOnDocument() {
|
|
GoogleStorageLocation document = GoogleStorageLocation.forFile(
|
|
"your-bucket", "test.pdf");
|
|
GoogleStorageLocation outputLocationPrefix = GoogleStorageLocation.forFolder(
|
|
"your-bucket", "output_folder/test.pdf/");
|
|
|
|
ListenableFuture<DocumentOcrResultSet> result =
|
|
this.documentOcrTemplate.runOcrForDocument(
|
|
document, outputLocationPrefix);
|
|
|
|
DocumentOcrResultSet ocrPages = result.get(5, TimeUnit.MINUTES);
|
|
|
|
String page1Text = ocrPages.getPage(1).getText();
|
|
System.out.println(page1Text);
|
|
}</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_reading_ocr_output_files"><a class="link" href="#_reading_ocr_output_files">Reading OCR Output Files</a></h4>
|
|
<div class="paragraph">
|
|
<p>In some use-cases, you may need to directly read OCR output files stored in Google Cloud Storage.</p>
|
|
</div>
|
|
<div class="paragraph">
|
|
<p><code>DocumentOcrTemplate</code> offers the following methods for reading and processing OCR output files:</p>
|
|
</div>
|
|
<div class="ulist">
|
|
<ul>
|
|
<li>
|
|
<p><code>readOcrOutputFileSet(GoogleStorageLocation jsonOutputFilePathPrefix)</code>:
|
|
Reads a collection of OCR output files under a file path prefix and returns the parsed contents.
|
|
All of the files under the path should correspond to the same document.</p>
|
|
</li>
|
|
<li>
|
|
<p><code>readOcrOutputFile(GoogleStorageLocation jsonFile)</code>:
|
|
Reads a single OCR output file and returns the parsed contents.</p>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
<div class="sect3">
|
|
<h4 id="_reading_ocr_output_files_example"><a class="link" href="#_reading_ocr_output_files_example">Reading OCR Output Files Example</a></h4>
|
|
<div class="paragraph">
|
|
<p>The code snippet below describes how to read the OCR output files of a single document.</p>
|
|
</div>
|
|
<div class="listingblock">
|
|
<div class="content">
|
|
<pre>@Autowired
|
|
private DocumentOcrTemplate documentOcrTemplate;
|
|
|
|
// Parses the OCR output files corresponding to a single document in a directory
|
|
public void parseOutputFileSet() {
|
|
GoogleStorageLocation ocrOutputPrefix = GoogleStorageLocation.forFolder(
|
|
"your-bucket", "json_output_set/");
|
|
|
|
DocumentOcrResultSet result = this.documentOcrTemplate.readOcrOutputFileSet(ocrOutputPrefix);
|
|
System.out.println("Page 2 text: " + result.getPage(2).getText());
|
|
}
|
|
|
|
// Parses a single OCR output file
|
|
public void parseSingleOutputFile() {
|
|
GoogleStorageLocation ocrOutputFile = GoogleStorageLocation.forFile(
|
|
"your-bucket", "json_output_set/test_output-2-to-2.json");
|
|
|
|
DocumentOcrResultSet result = this.documentOcrTemplate.readOcrOutputFile(ocrOutputFile);
|
|
System.out.println("Page 2 text: " + result.getPage(2).getText());
|
|
}</pre>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_configuration"><a class="link" href="#_configuration">Configuration</a></h3>
|
|
<div class="paragraph">
|
|
<p>The following options may be configured with Spring Cloud GCP Vision libraries.</p>
|
|
</div>
|
|
<table class="tableblock frame-all grid-all stretch">
|
|
<colgroup>
|
|
<col style="width: 25%;">
|
|
<col style="width: 25%;">
|
|
<col style="width: 25%;">
|
|
<col style="width: 25%;">
|
|
</colgroup>
|
|
<tbody>
|
|
<tr>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock">Name</p></td>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock">Description</p></td>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock">Required</p></td>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock">Default value</p></td>
|
|
</tr>
|
|
<tr>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock"><code>spring.cloud.gcp.vision.enabled</code></p></td>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock">Enables or disables Cloud Vision autoconfiguration</p></td>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock">No</p></td>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock"><code>true</code></p></td>
|
|
</tr>
|
|
<tr>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock"><code>spring.cloud.gcp.vision.executors-threads-count</code></p></td>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock">Number of threads used during document OCR processing for waiting on long-running OCR operations</p></td>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock">No</p></td>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock">1</p></td>
|
|
</tr>
|
|
<tr>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock"><code>spring.cloud.gcp.vision.json-output-batch-size</code></p></td>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock">Number of document pages to include in each OCR output file.</p></td>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock">No</p></td>
|
|
<td class="tableblock halign-left valign-top"><p class="tableblock">20</p></td>
|
|
</tr>
|
|
</tbody>
|
|
</table>
|
|
</div>
|
|
<div class="sect2">
|
|
<h3 id="_sample"><a class="link" href="#_sample">Sample</a></h3>
|
|
<div class="paragraph">
|
|
<p>Samples are provided to show example usages of Spring Cloud GCP with Google Cloud Vision.</p>
|
|
</div>
|
|
<div class="ulist">
|
|
<ul>
|
|
<li>
|
|
<p>The <a href="https://github.com/spring-cloud/spring-cloud-gcp/tree/master/spring-cloud-gcp-samples/spring-cloud-gcp-vision-api-sample">Image Labeling Sample</a> shows you how to use image labelling in your Spring application.
|
|
The application generates labels describing the content inside the images you specify in the application.</p>
|
|
</li>
|
|
<li>
|
|
<p>The <a href="https://github.com/spring-cloud/spring-cloud-gcp/tree/master/spring-cloud-gcp-samples/spring-cloud-gcp-vision-ocr-demo">Document OCR demo</a> shows how you can apply OCR processing on your PDF/TIFF documents in order to extract their text contents.</p>
|
|
</li>
|
|
</ul>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
</div>
|
|
<script type="text/javascript" src="js/tocbot/tocbot.min.js"></script>
|
|
<script type="text/javascript" src="js/toc.js"></script>
|
|
<link rel="stylesheet" href="js/highlight/styles/atom-one-dark-reasonable.min.css">
|
|
<script src="js/highlight/highlight.min.js"></script>
|
|
<script>hljs.initHighlighting()</script>
|
|
</body>
|
|
</html> |