Documentation Migration
This commit migrates the old Spring Batch documentation to a new asciidoc toolchain. It will be the first piece in modernizing the existing Spring Batch documentation. Future steps will include making java based configuraion more prominant in the reference documentation. Resolves BATCH-2620
58
build.gradle
@@ -8,11 +8,12 @@ buildscript {
|
||||
maven { url 'https://plugins.gradle.org/m2/' }
|
||||
}
|
||||
dependencies {
|
||||
classpath 'io.spring.gradle:docbook-reference-plugin:0.3.1'
|
||||
classpath 'org.springframework.build.gradle:propdeps-plugin:0.0.7'
|
||||
classpath 'io.spring.gradle:spring-io-plugin:0.0.5.RELEASE'
|
||||
classpath "io.spring.gradle:dependency-management-plugin:0.6.0.RELEASE"
|
||||
classpath "org.sonarsource.scanner.gradle:sonarqube-gradle-plugin:2.1"
|
||||
classpath "org.asciidoctor:asciidoctor-gradle-plugin:1.5.3"
|
||||
classpath "org.asciidoctor:asciidoctorj-pdf:1.5.0-alpha.14"
|
||||
}
|
||||
}
|
||||
|
||||
@@ -381,6 +382,10 @@ project('spring-batch-infrastructure') {
|
||||
}
|
||||
}
|
||||
|
||||
project('spring-batch-docs') {
|
||||
|
||||
}
|
||||
|
||||
project('spring-batch-core-tests') {
|
||||
description = 'Spring Batch Core Tests'
|
||||
project.tasks.findByPath("artifactoryPublish")?.enabled = false
|
||||
@@ -613,10 +618,28 @@ project('spring-batch-samples') {
|
||||
}
|
||||
}
|
||||
|
||||
apply plugin: 'docbook-reference'
|
||||
apply plugin: "org.asciidoctor.convert"
|
||||
asciidoctor {
|
||||
sourceDir = file('spring-batch-docs/asciidoc')
|
||||
sources {
|
||||
include '*.adoc'
|
||||
}
|
||||
logDocuments = true
|
||||
backends = ["html", "pdf"]
|
||||
options doctype: 'book', eruby: 'erubis'
|
||||
attributes 'icons': 'font',
|
||||
'idprefix': '',
|
||||
'idseparator': '-',
|
||||
docinfo: '',
|
||||
revnumber: project.version,
|
||||
sectanchors: '',
|
||||
sectnums: '',
|
||||
'source-highlighter': 'coderay@', // TODO switch to 'rouge' once supported by the html5 backend
|
||||
stylesdir: 'stylesheets/',
|
||||
stylesheet: 'spring.css',
|
||||
'spring-version': project.version,
|
||||
'allow-uri-read': ''
|
||||
|
||||
reference {
|
||||
sourceDir = file('src/site/docbook/reference')
|
||||
}
|
||||
|
||||
apply plugin: 'org.sonarqube'
|
||||
@@ -698,7 +721,32 @@ task docsZip(type: Zip) {
|
||||
into 'api'
|
||||
}
|
||||
|
||||
from (reference) {
|
||||
from (asciidoctor) {
|
||||
exclude '*.pdf'
|
||||
exclude '*.html'
|
||||
exclude 'images'
|
||||
exclude 'html/index-pdf.html'
|
||||
exclude 'pdf/images'
|
||||
exclude 'pdf/appendix.pdf'
|
||||
exclude 'pdf/common-patterns.pdf'
|
||||
exclude 'pdf/domain.pdf'
|
||||
exclude 'pdf/index.pdf'
|
||||
exclude 'pdf/index-single.pdf'
|
||||
exclude 'pdf/job.pdf'
|
||||
exclude 'pdf/jsr-352.pdf'
|
||||
exclude 'pdf/readersAndWriters.pdf'
|
||||
exclude 'pdf/repeat.pdf'
|
||||
exclude 'pdf/retry.pdf'
|
||||
exclude 'pdf/scalability.pdf'
|
||||
exclude 'pdf/schema-appendix.pdf'
|
||||
exclude 'pdf/spring-batch-integration.pdf'
|
||||
exclude 'pdf/spring-batch-intro.pdf'
|
||||
exclude 'pdf/step.pdf'
|
||||
exclude 'pdf/testing.pdf'
|
||||
exclude 'pdf/transaction-appendix.pdf'
|
||||
exclude 'pdf/whatsnew.pdf'
|
||||
exclude 'pdf/glossary.pdf'
|
||||
|
||||
into 'reference'
|
||||
}
|
||||
}
|
||||
|
||||
4
gradle/wrapper/gradle-wrapper.properties
vendored
@@ -1,6 +1,6 @@
|
||||
#Sat May 06 15:13:08 CEST 2017
|
||||
#Wed May 24 14:01:03 EDT 2017
|
||||
distributionBase=GRADLE_USER_HOME
|
||||
distributionPath=wrapper/dists
|
||||
zipStoreBase=GRADLE_USER_HOME
|
||||
zipStorePath=wrapper/dists
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-3.5-bin.zip
|
||||
distributionUrl=https\://services.gradle.org/distributions/gradle-3.5-all.zip
|
||||
|
||||
@@ -2,6 +2,7 @@ rootProject.name = 'spring-batch'
|
||||
|
||||
include 'spring-batch-core'
|
||||
include 'spring-batch-core-tests'
|
||||
include 'spring-batch-docs'
|
||||
include 'spring-batch-infrastructure'
|
||||
include 'spring-batch-infrastructure-tests'
|
||||
include 'spring-batch-test'
|
||||
|
||||
128
spring-batch-docs/asciidoc/appendix.adoc
Normal file
@@ -0,0 +1,128 @@
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
[[listOfReadersAndWriters]]
|
||||
|
||||
[appendix]
|
||||
== List of ItemReaders and ItemWriters
|
||||
|
||||
[[itemReadersAppendix]]
|
||||
|
||||
=== Item Readers
|
||||
|
||||
.Available Item Readers
|
||||
[options="header"]
|
||||
|===============
|
||||
|Item Reader|Description
|
||||
|AbstractItemCountingItemStreamItemReader|Abstract base class that provides basic
|
||||
restart capabilities by counting the number of items returned from
|
||||
an `ItemReader`.
|
||||
|AggregateItemReader|An `ItemReader` that delivers a list as its
|
||||
item, storing up objects from the injected `ItemReader` until they
|
||||
are ready to be packed out as a collection. This `ItemReader` should
|
||||
mark the beginning and end of records with the constant values in
|
||||
`FieldSetMapper AggregateItemReader#__$$BEGIN_RECORD$$__` and
|
||||
`AggregateItemReader#__$$END_RECORD$$__`
|
||||
|AmqpItemReader|Given a Spring AmqpTemplate it provides
|
||||
synchronous receive methods. The receiveAndConvert() method
|
||||
lets you receive POJO objects.
|
||||
|FlatFileItemReader|Reads from a flat file. Includes `ItemStream`
|
||||
and Skippable functionality. See section on Read from a
|
||||
File
|
||||
|HibernateCursorItemReader|Reads from a cursor based on an HQL query. See
|
||||
section on Reading from a Database
|
||||
|HibernatePagingItemReader|Reads from a paginated HQL query
|
||||
|ItemReaderAdapter|Adapts any class to the
|
||||
`ItemReader` interface.
|
||||
|JdbcCursorItemReader|Reads from a database cursor via JDBC. See
|
||||
HOWTO - Read from a Database
|
||||
|JdbcPagingItemReader|Given a SQL statement, pages through the rows,
|
||||
such that large datasets can be read without running out of
|
||||
memory
|
||||
|JmsItemReader|Given a Spring JmsOperations object and a JMS
|
||||
Destination or destination name to send errors, provides items
|
||||
received through the injected JmsOperations receive()
|
||||
method
|
||||
|JpaPagingItemReader|Given a JPQL statement, pages through the
|
||||
rows, such that large datasets can be read without running out of
|
||||
memory
|
||||
|ListItemReader|Provides the items from a list, one at a
|
||||
time
|
||||
|MongoItemReader|Given a MongoOperations object and JSON based MongoDB
|
||||
query, provides items received from the MongoOperations find method
|
||||
|Neo4jItemReader|Given a Neo4jOperations object and the components of a
|
||||
Cyhper query, items are returned as the result of the Neo4jOperations.query
|
||||
method
|
||||
|RepositoryItemReader|Given a Spring Data PagingAndSortingRepository object,
|
||||
a Sort and the name of method to execute, returns items provided by the
|
||||
Spring Data repository implementation
|
||||
|StoredProcedureItemReader|Reads from a database cursor resulting from the
|
||||
execution of a database stored procedure. See HOWTO - Read from a
|
||||
Database
|
||||
|StaxEventItemReader|Reads via StAX. See HOWTO - Read from a
|
||||
File
|
||||
|
||||
|===============
|
||||
|
||||
|
||||
[[itemWritersAppendix]]
|
||||
|
||||
|
||||
=== Item Writers
|
||||
|
||||
.Available Item Writers
|
||||
[options="header"]
|
||||
|===============
|
||||
|Item Writer|Description
|
||||
|AbstractItemStreamItemWriter|Abstract base class that combines the
|
||||
`ItemStream` and
|
||||
`ItemWriter` interfaces.
|
||||
|AmqpItemWriter|Given a Spring AmqpTemplate it provides
|
||||
for synchronous send method. The convertAndSend(Object)
|
||||
method lets you send POJO objects.
|
||||
|CompositeItemWriter|Passes an item to the process method of each
|
||||
in an injected __List__ of __ItemWriter__ objects
|
||||
|FlatFileItemWriter|Writes to a flat file. Includes `ItemStream` and
|
||||
Skippable functionality. See section on Writing to a File
|
||||
|GemfireItemWriter|Using a GemfireOperations object, items wre either written
|
||||
or removed from the Gemfire instance based on the configuration of the delete
|
||||
flag
|
||||
|HibernateItemWriter|This item writer is hibernate session aware
|
||||
and handles some transaction-related work that a non-"hibernate
|
||||
aware" item writer would not need to know about and then delegates
|
||||
to another item writer to do the actual writing.
|
||||
|ItemWriterAdapter|Adapts any class to the
|
||||
`ItemWriter` interface.
|
||||
|JdbcBatchItemWriter|Uses batching features from a
|
||||
`PreparedStatement`, if available, and can
|
||||
take rudimentary steps to locate a failure during a
|
||||
`flush`.
|
||||
|JmsItemWriter|Using a JmsOperations object, items are written
|
||||
to the default queue via the JmsOperations.convertAndSend() method
|
||||
|JpaItemWriter|This item writer is JPA EntityManager aware
|
||||
and handles some transaction-related work that a non-"jpa aware"
|
||||
`ItemWriter` would not need to know about and
|
||||
then delegates to another writer to do the actual writing.
|
||||
|MimeMessageItemWriter|Using Spring's JavaMailSender, items of type `MimeMessage`
|
||||
are sent as mail messages
|
||||
|MongoItemWriter|Given a MongoOperations object, items are written
|
||||
via the MongoOperations.save(Object) method. The actual write is delayed
|
||||
until the last possible moment before the transaction commits.
|
||||
|Neo4jItemWriter|Given a Neo4jOperations object, items are persisted via the
|
||||
save(Object) method or deleted via the delete(Object) per the
|
||||
`ItemWriter's` configuration
|
||||
|PropertyExtractingDelegatingItemWriter|Extends AbstractMethodInvokingDelegator
|
||||
creating arguments on the fly. Arguments are created by retrieving
|
||||
the values from the fields in the item to be processed (via a
|
||||
SpringBeanWrapper) based on an injected array of field
|
||||
name
|
||||
|RepositoryItemWriter|Given a Spring Data CrudRepository implementation,
|
||||
items are saved via the method specified in the configuration.
|
||||
|StaxEventItemWriter|Uses an __ObjectToXmlSerializer__ implementation to
|
||||
convert each item to XML and then writes it to an XML file using
|
||||
StAX.
|
||||
|
||||
|===============
|
||||
|
||||
|
||||
635
spring-batch-docs/asciidoc/common-patterns.adoc
Normal file
@@ -0,0 +1,635 @@
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
[[commonPatterns]]
|
||||
|
||||
== Common Batch Patterns
|
||||
|
||||
Some batch jobs can be assembled purely from off-the-shelf components
|
||||
in Spring Batch. For instance the `ItemReader` and
|
||||
`ItemWriter` implementations can be configured to cover
|
||||
a wide range of scenarios. However, for the majority of cases, custom code
|
||||
will have to be written. The main API entry points for application
|
||||
developers are the `Tasklet`,
|
||||
`ItemReader`, `ItemWriter` and the
|
||||
various listener interfaces. Most simple batch jobs will be able to use
|
||||
off-the-shelf input from a Spring Batch `ItemReader`,
|
||||
but it is often the case that there are custom concerns in the processing
|
||||
and writing, which require developers to implement an
|
||||
`ItemWriter` or
|
||||
`ItemProcessor`.
|
||||
|
||||
Here, we provide a few examples of common patterns in custom business
|
||||
logic. These examples primarily feature the listener interfaces. It should
|
||||
be noted that an `ItemReader` or
|
||||
`ItemWriter` can implement a listener interface as
|
||||
well, if appropriate.
|
||||
|
||||
[[loggingItemProcessingAndFailures]]
|
||||
=== Logging Item Processing and Failures
|
||||
|
||||
A common use case is the need for special handling of errors in a
|
||||
step, item by item, perhaps logging to a special channel, or inserting a
|
||||
record into a database. A chunk-oriented `Step`
|
||||
(created from the step factory beans) allows users to implement this use
|
||||
case with a simple `ItemReadListener`, for errors on
|
||||
read, and an `ItemWriteListener`, for errors on
|
||||
write. The below code snippets illustrate a listener that logs both read
|
||||
and write failures:
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public class ItemFailureLoggerListener extends ItemListenerSupport {
|
||||
|
||||
private static Log logger = LogFactory.getLog("item.error");
|
||||
|
||||
public void onReadError(Exception ex) {
|
||||
logger.error("Encountered error on read", e);
|
||||
}
|
||||
|
||||
public void onWriteError(Exception ex, Object item) {
|
||||
logger.error("Encountered error on write", ex);
|
||||
}
|
||||
}
|
||||
----
|
||||
|
||||
Having implemented this listener it must be registered with the step:
|
||||
[source, xml]
|
||||
----
|
||||
<step id="simpleStep">
|
||||
...
|
||||
<listeners>
|
||||
<listener>
|
||||
<bean class="org.example...ItemFailureLoggerListener"/>
|
||||
</listener>
|
||||
</listeners>
|
||||
</step>
|
||||
----
|
||||
|
||||
Remember that if your listener does anything in an
|
||||
`onError()` method, it will be inside a transaction that is
|
||||
going to be rolled back. If you need to use a transactional resource such
|
||||
as a database inside an `onError()` method, consider adding a
|
||||
declarative transaction to that method (see Spring Core Reference Guide
|
||||
for details), and giving its propagation attribute the value
|
||||
REQUIRES_NEW.
|
||||
|
||||
|
||||
[[stoppingAJobManuallyForBusinessReasons]]
|
||||
=== Stopping a Job Manually for Business Reasons
|
||||
|
||||
Spring Batch provides a `stop()` method
|
||||
through the `JobLauncher` interface, but this is
|
||||
really for use by the operator rather than the application programmer.
|
||||
Sometimes it is more convenient or makes more sense to stop a job
|
||||
execution from within the business logic.
|
||||
|
||||
The simplest thing to do is to throw a
|
||||
`RuntimeException` (one that isn't retried
|
||||
indefinitely or skipped). For example, a custom exception type could be
|
||||
used, as in the example below:
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public class PoisonPillItemWriter implements ItemWriter<T> {
|
||||
|
||||
public void write(T item) throws Exception {
|
||||
if (isPoisonPill(item)) {
|
||||
throw new PoisonPillException("Posion pill detected: " + item);
|
||||
}
|
||||
}
|
||||
}
|
||||
----
|
||||
|
||||
Another simple way to stop a step from executing is to simply return
|
||||
`null` from the `ItemReader`:
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public class EarlyCompletionItemReader implements ItemReader<T> {
|
||||
|
||||
private ItemReader<T> delegate;
|
||||
|
||||
public void setDelegate(ItemReader<T> delegate) { ... }
|
||||
|
||||
public T read() throws Exception {
|
||||
T item = delegate.read();
|
||||
if (isEndItem(item)) {
|
||||
return null; // end the step here
|
||||
}
|
||||
return item;
|
||||
}
|
||||
|
||||
}
|
||||
----
|
||||
The previous example actually relies on the fact that there is a
|
||||
default implementation of the `CompletionPolicy`
|
||||
strategy which signals a complete batch when the item to be processed is
|
||||
`null`. A more sophisticated completion policy could be implemented and
|
||||
injected into the `Step` through the
|
||||
`SimpleStepFactoryBean`:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<step id="simpleStep">
|
||||
<tasklet>
|
||||
<chunk reader="reader" writer="writer" commit-interval="10"
|
||||
chunk-completion-policy="completionPolicy"/>
|
||||
</tasklet>
|
||||
</step>
|
||||
|
||||
<bean id="completionPolicy" class="org.example...SpecialCompletionPolicy"/>
|
||||
----
|
||||
|
||||
An alternative is to set a flag in the
|
||||
`StepExecution`, which is checked by the
|
||||
`Step` implementations in the framework in between
|
||||
item processing. To implement this alternative, we need access to the
|
||||
current `StepExecution`, and this can be achieved by
|
||||
implementing a `StepListener` and registering it with
|
||||
the `Step`. Here is an example of a listener that
|
||||
sets the flag:
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public class CustomItemWriter extends ItemListenerSupport implements StepListener {
|
||||
|
||||
private StepExecution stepExecution;
|
||||
|
||||
public void beforeStep(StepExecution stepExecution) {
|
||||
this.stepExecution = stepExecution;
|
||||
}
|
||||
|
||||
public void afterRead(Object item) {
|
||||
if (isPoisonPill(item)) {
|
||||
stepExecution.setTerminateOnly(true);
|
||||
}
|
||||
}
|
||||
|
||||
}
|
||||
----
|
||||
|
||||
The default behavior here when the flag is set is for the step to
|
||||
throw a `JobInterruptedException`. This can be
|
||||
controlled through the `StepInterruptionPolicy`, but
|
||||
the only choice is to throw or not throw an exception, so this is always
|
||||
an abnormal ending to a job.
|
||||
|
||||
|
||||
[[addingAFooterRecord]]
|
||||
=== Adding a Footer Record
|
||||
|
||||
Often when writing to flat files, a "footer" record must be appended
|
||||
to the end of the file, after all processing has be completed. This can
|
||||
also be achieved using the `FlatFileFooterCallback`
|
||||
interface provided by Spring Batch. The
|
||||
`FlatFileFooterCallback` (and its counterpart, the
|
||||
`FlatFileHeaderCallback`) are optional properties of
|
||||
the `FlatFileItemWriter`:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<bean id="itemWriter" class="org.spr...FlatFileItemWriter">
|
||||
<property name="resource" ref="outputResource" />
|
||||
<property name="lineAggregator" ref="lineAggregator"/>
|
||||
<property name="headerCallback" ref="headerCallback" />
|
||||
<property name="footerCallback" ref="footerCallback" />
|
||||
</bean>
|
||||
----
|
||||
|
||||
The footer callback interface is very simple. It has just one method
|
||||
that is called when the footer must be written:
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public interface FlatFileFooterCallback {
|
||||
|
||||
void writeFooter(Writer writer) throws IOException;
|
||||
|
||||
}
|
||||
----
|
||||
|
||||
[[writingASummaryFooter]]
|
||||
==== Writing a Summary Footer
|
||||
|
||||
A very common requirement involving footer records is to aggregate
|
||||
information during the output process and to append this information to
|
||||
the end of the file. This footer serves as a summarization of the file
|
||||
or provides a checksum.
|
||||
|
||||
For example, if a batch job is writing
|
||||
`Trade` records to a flat file, and there is a
|
||||
requirement that the total amount from all the
|
||||
`Trades` is placed in a footer, then the following
|
||||
`ItemWriter` implementation can be used:
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public class TradeItemWriter implements ItemWriter<Trade>,
|
||||
FlatFileFooterCallback {
|
||||
|
||||
private ItemWriter<Trade> delegate;
|
||||
|
||||
private BigDecimal totalAmount = BigDecimal.ZERO;
|
||||
|
||||
public void write(List<? extends Trade> items) {
|
||||
BigDecimal chunkTotal = BigDecimal.ZERO;
|
||||
for (Trade trade : items) {
|
||||
chunkTotal = chunkTotal.add(trade.getAmount());
|
||||
}
|
||||
|
||||
delegate.write(items);
|
||||
|
||||
// After successfully writing all items
|
||||
totalAmount = totalAmount.add(chunkTotal);
|
||||
}
|
||||
|
||||
public void writeFooter(Writer writer) throws IOException {
|
||||
writer.write("Total Amount Processed: " + totalAmount);
|
||||
}
|
||||
|
||||
public void setDelegate(ItemWriter delegate) {...}
|
||||
}
|
||||
----
|
||||
|
||||
This `TradeItemWriter` stores a
|
||||
`totalAmount` value that is increased with the
|
||||
`amount` from each Trade item written.
|
||||
After the last Trade is processed, the framework
|
||||
will call `writeFooter`, which will put that
|
||||
`totalAmount` into the file. Note that the
|
||||
`write` method makes use of a temporary variable,
|
||||
chunkTotalAmount, that stores the total of the `Trades`
|
||||
in the chunk. This is done to ensure that if a skip occurs in the
|
||||
`write` method, that the
|
||||
`totalAmount` will be left unchanged. It is only at
|
||||
the end of the `write` method, once we are
|
||||
guaranteed that no exceptions will be thrown, that we update the
|
||||
`totalAmount`.
|
||||
|
||||
In order for the `writeFooter` method to be
|
||||
called, the `TradeItemWriter` (which implements
|
||||
`FlatFileFooterCallback`) must be wired into the
|
||||
`FlatFileItemWriter` as the
|
||||
`footerCallback`:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<bean id="tradeItemWriter" class="..TradeItemWriter">
|
||||
<property name="delegate" ref="flatFileItemWriter" />
|
||||
</bean>
|
||||
|
||||
<bean id="flatFileItemWriter" class="org.spr...FlatFileItemWriter">
|
||||
<property name="resource" ref="outputResource" />
|
||||
<property name="lineAggregator" ref="lineAggregator"/>
|
||||
<property name="footerCallback" ref="tradeItemWriter" />
|
||||
</bean>
|
||||
----
|
||||
|
||||
The way that the `TradeItemWriter` has been
|
||||
so far will only function correctly if the `Step`
|
||||
is not restartable. This is because the class is stateful (since it
|
||||
stores the `totalAmount`), but the `totalAmount`
|
||||
is not persisted to the database, and therefore, it cannot be retrieved
|
||||
in the event of a restart. In order to make this class restartable, the
|
||||
`ItemStream` interface should be implemented along
|
||||
with the methods `open` and
|
||||
`update`:
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public void open(ExecutionContext executionContext) {
|
||||
if (executionContext.containsKey("total.amount") {
|
||||
totalAmount = (BigDecimal) executionContext.get("total.amount");
|
||||
}
|
||||
}
|
||||
|
||||
public void update(ExecutionContext executionContext) {
|
||||
executionContext.put("total.amount", totalAmount);
|
||||
}
|
||||
----
|
||||
|
||||
The update method will store the most
|
||||
current version of `totalAmount` to the
|
||||
`ExecutionContext` just before that object is
|
||||
persisted to the database. The open method will
|
||||
retrieve any existing `totalAmount` from the
|
||||
`ExecutionContext` and use it as the starting point
|
||||
for processing, allowing the `TradeItemWriter` to
|
||||
pick up on restart where it left off the previous time the
|
||||
`Step` was executed.
|
||||
|
||||
|
||||
|
||||
[[drivingQueryBasedItemReaders]]
|
||||
=== Driving Query Based ItemReaders
|
||||
|
||||
In the chapter on readers and writers, database input using paging
|
||||
was discussed. Many database vendors, such as DB2, have extremely
|
||||
pessimistic locking strategies that can cause issues if the table being
|
||||
read also needs to be used by other portions of the online application.
|
||||
Furthermore, opening cursors over extremely large datasets can cause
|
||||
issues on certain vendors. Therefore, many projects prefer to use a
|
||||
'Driving Query' approach to reading in data. This approach works by
|
||||
iterating over keys, rather than the entire object that needs to be
|
||||
returned, as the following example illustrates:
|
||||
|
||||
.Driving Query Job
|
||||
image::{batch-asciidoc}images/drivingQueryExample.png[Driving Query Job, scaledwidth="60%"]
|
||||
|
||||
|
||||
As you can see, this example uses the same 'FOO' table as was used
|
||||
in the cursor based example. However, rather than selecting the entire
|
||||
row, only the ID's were selected in the SQL statement. So, rather than a
|
||||
FOO object being returned from `read`, an Integer
|
||||
will be returned. This number can then be used to query for the 'details',
|
||||
which is a complete Foo object:
|
||||
|
||||
.Driving Query Example
|
||||
image::{batch-asciidoc}images/drivingQueryJob.png[Driving Query Example, scaledwidth="60%"]
|
||||
|
||||
An `ItemProcessor` should be used to transform the key obtained from
|
||||
the driving query into a full 'Foo' object. An existing DAO can be used to
|
||||
query for the full object based on the key.
|
||||
|
||||
|
||||
[[multiLineRecords]]
|
||||
==== Multi-Line Records
|
||||
|
||||
While it is usually the case with flat files that one each record is
|
||||
confined to a single line, it is common that a file might have records
|
||||
spanning multiple lines with multiple formats. The following excerpt from
|
||||
a file illustrates this:
|
||||
|
||||
----
|
||||
HEA;0013100345;2007-02-15
|
||||
NCU;Smith;Peter;;T;20014539;F
|
||||
BAD;;Oak Street 31/A;;Small Town;00235;IL;US
|
||||
FOT;2;2;267.34
|
||||
----
|
||||
Everything between the line starting with 'HEA' and the line
|
||||
starting with 'FOT' is considered one record. There are a few
|
||||
considerations that must be made in order to handle this situation
|
||||
correctly:
|
||||
|
||||
|
||||
|
||||
Instead of reading one record at a time, the
|
||||
`ItemReader` must read every line of the
|
||||
multi-line record as a group, so that it can be passed to the
|
||||
`ItemWriter` intact.
|
||||
|
||||
|
||||
|
||||
Each line type may need to be tokenized differently.
|
||||
|
||||
|
||||
|
||||
Because a single record spans multiple lines, and we may not know
|
||||
how many lines there are, the `ItemReader` must be
|
||||
careful to always read an entire record. In order to do this, a custom
|
||||
`ItemReader` should be implemented as a wrapper for
|
||||
the `FlatFileItemReader`.
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<bean id="itemReader" class="org.spr...MultiLineTradeItemReader">
|
||||
<property name="delegate">
|
||||
<bean class="org.springframework.batch.item.file.FlatFileItemReader">
|
||||
<property name="resource" value="data/iosample/input/multiLine.txt" />
|
||||
<property name="lineMapper">
|
||||
<bean class="org.spr...DefaultLineMapper">
|
||||
<property name="lineTokenizer" ref="orderFileTokenizer"/>
|
||||
<property name="fieldSetMapper">
|
||||
<bean class="org.spr...PassThroughFieldSetMapper" />
|
||||
</property>
|
||||
</bean>
|
||||
</property>
|
||||
</bean>
|
||||
</property>
|
||||
</bean>
|
||||
----
|
||||
|
||||
To ensure that each line is tokenized properly, which is especially
|
||||
important for fixed length input, the
|
||||
`PatternMatchingCompositeLineTokenizer` can be used
|
||||
on the delegate `FlatFileItemReader`. See for more details. The delegate
|
||||
reader will then use a `PassThroughFieldSetMapper` to
|
||||
deliver a `FieldSet` for each line back to the
|
||||
wrapping `ItemReader`.
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<bean id="orderFileTokenizer" class="org.spr...PatternMatchingCompositeLineTokenizer">
|
||||
<property name="tokenizers">
|
||||
<map>
|
||||
<entry key="HEA*" value-ref="headerRecordTokenizer" />
|
||||
<entry key="FOT*" value-ref="footerRecordTokenizer" />
|
||||
<entry key="NCU*" value-ref="customerLineTokenizer" />
|
||||
<entry key="BAD*" value-ref="billingAddressLineTokenizer" />
|
||||
</map>
|
||||
</property>
|
||||
</bean>
|
||||
----
|
||||
|
||||
This wrapper will have to be able recognize the end of a record so
|
||||
that it can continually call `read()` on its
|
||||
delegate until the end is reached. For each line that is read, the wrapper
|
||||
should build up the item to be returned. Once the footer is reached, the
|
||||
item can be returned for delivery to the
|
||||
`ItemProcessor` and `ItemWriter`.
|
||||
|
||||
[source, java]
|
||||
----
|
||||
private FlatFileItemReader<FieldSet> delegate;
|
||||
|
||||
public Trade read() throws Exception {
|
||||
Trade t = null;
|
||||
|
||||
for (FieldSet line = null; (line = this.delegate.read()) != null;) {
|
||||
String prefix = line.readString(0);
|
||||
if (prefix.equals("HEA")) {
|
||||
t = new Trade(); // Record must start with header
|
||||
}
|
||||
else if (prefix.equals("NCU")) {
|
||||
Assert.notNull(t, "No header was found.");
|
||||
t.setLast(line.readString(1));
|
||||
t.setFirst(line.readString(2));
|
||||
...
|
||||
}
|
||||
else if (prefix.equals("BAD")) {
|
||||
Assert.notNull(t, "No header was found.");
|
||||
t.setCity(line.readString(4));
|
||||
t.setState(line.readString(6));
|
||||
...
|
||||
}
|
||||
else if (prefix.equals("FOT")) {
|
||||
return t; // Record must end with footer
|
||||
}
|
||||
}
|
||||
Assert.isNull(t, "No 'END' was found.");
|
||||
return null;
|
||||
}
|
||||
----
|
||||
|
||||
[[executingSystemCommands]]
|
||||
=== Executing System Commands
|
||||
|
||||
Many batch jobs may require that an external command be called from
|
||||
within the batch job. Such a process could be kicked off separately by the
|
||||
scheduler, but the advantage of common meta-data about the run would be
|
||||
lost. Furthermore, a multi-step job would also need to be split up into
|
||||
multiple jobs as well.
|
||||
|
||||
Because the need is so common, Spring Batch provides a
|
||||
`Tasklet` implementation for calling system
|
||||
commands:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<bean class="org.springframework.batch.core.step.tasklet.SystemCommandTasklet">
|
||||
<property name="command" value="echo hello" />
|
||||
<!-- 5 second timeout for the command to complete -->
|
||||
<property name="timeout" value="5000" />
|
||||
</bean>
|
||||
----
|
||||
|
||||
[[handlingStepCompletionWhenNoInputIsFound]]
|
||||
=== Handling Step Completion When No Input is Found
|
||||
|
||||
In many batch scenarios, finding no rows in a database or file to
|
||||
process is not exceptional. The `Step` is simply
|
||||
considered to have found no work and completes with 0 items read. All of
|
||||
the `ItemReader` implementations provided out of the
|
||||
box in Spring Batch default to this approach. This can lead to some
|
||||
confusion if nothing is written out even when input is present. (which
|
||||
usually happens if a file was misnamed, etc) For this reason, the meta
|
||||
data itself should be inspected to determine how much work the framework
|
||||
found to be processed. However, what if finding no input is considered
|
||||
exceptional? In this case, programmatically checking the meta data for no
|
||||
items processed and causing failure is the best solution. Because this is
|
||||
a common use case, a listener is provided with just this
|
||||
functionality:
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public class NoWorkFoundStepExecutionListener extends StepExecutionListenerSupport {
|
||||
|
||||
public ExitStatus afterStep(StepExecution stepExecution) {
|
||||
if (stepExecution.getReadCount() == 0) {
|
||||
return ExitStatus.FAILED;
|
||||
}
|
||||
return null;
|
||||
}
|
||||
|
||||
}
|
||||
----
|
||||
|
||||
The above `StepExecutionListener` inspects the
|
||||
readCount property of the `StepExecution` during the
|
||||
'afterStep' phase to determine if no items were read. If that is the case,
|
||||
an exit code of FAILED is returned, indicating that the
|
||||
`Step` should fail. Otherwise, `null` is returned,
|
||||
which will not affect the status of the
|
||||
`Step`.
|
||||
|
||||
|
||||
[[passingDataToFutureSteps]]
|
||||
=== Passing Data to Future Steps
|
||||
|
||||
It is often useful to pass information from one step to another.
|
||||
This can be done using the `ExecutionContext`. The
|
||||
catch is that there are two `ExecutionContexts`: one
|
||||
at the `Step` level and one at the
|
||||
`Job` level. The `Step`
|
||||
`ExecutionContext` lives only as long as the step
|
||||
while the `Job`
|
||||
`ExecutionContext` lives through the whole
|
||||
`Job`. On the other hand, the
|
||||
`Step` `ExecutionContext` is
|
||||
updated every time the `Step` commits a chunk while
|
||||
the `Job` `ExecutionContext` is
|
||||
updated only at the end of each `Step`.
|
||||
|
||||
The consequence of this separation is that all data must be placed
|
||||
in the `Step` `ExecutionContext`
|
||||
while the `Step` is executing. This will ensure that
|
||||
the data will be stored properly while the `Step` is
|
||||
on-going. If data is stored to the `Job`
|
||||
`ExecutionContext`, then it will not be persisted
|
||||
during `Step` execution and if the
|
||||
`Step` fails, that data will be lost.
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public class SavingItemWriter implements ItemWriter<Object> {
|
||||
private StepExecution stepExecution;
|
||||
|
||||
public void write(List<? extends Object> items) throws Exception {
|
||||
// ...
|
||||
|
||||
ExecutionContext stepContext = this.stepExecution.getExecutionContext();
|
||||
stepContext.put("someKey", someObject);
|
||||
}
|
||||
|
||||
@BeforeStep
|
||||
public void saveStepExecution(StepExecution stepExecution) {
|
||||
this.stepExecution = stepExecution;
|
||||
}
|
||||
}
|
||||
----
|
||||
|
||||
To make the data available to future `Steps`,
|
||||
it will have to be "promoted" to the `Job`
|
||||
`ExecutionContext` after the step has finished.
|
||||
Spring Batch provides the
|
||||
`ExecutionContextPromotionListener` for this purpose.
|
||||
The listener must be configured with the keys related to the data in the
|
||||
`ExecutionContext` that must be promoted. It can
|
||||
also, optionally, be configured with a list of exit code patterns for
|
||||
which the promotion should occur ("COMPLETED" is the default). As with all
|
||||
listeners, it must be registered on the
|
||||
`Step`.
|
||||
[source, xml]
|
||||
----
|
||||
<job id="job1">
|
||||
<step id="step1">
|
||||
<tasklet>
|
||||
<chunk reader="reader" writer="savingWriter" commit-interval="10"/>
|
||||
</tasklet>
|
||||
<listeners>
|
||||
<listener ref="promotionListener"/>
|
||||
</listeners>
|
||||
</step>
|
||||
|
||||
<step id="step2">
|
||||
...
|
||||
</step>
|
||||
</job>
|
||||
|
||||
<beans:bean id="promotionListener" class="org.spr....ExecutionContextPromotionListener">
|
||||
<beans:property name="keys" value="someKey"/>
|
||||
</beans:bean>
|
||||
----
|
||||
|
||||
Finally, the saved values must be retrieved from the
|
||||
`Job` `ExecutionContext`:
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public class RetrievingItemWriter implements ItemWriter<Object> {
|
||||
private Object someObject;
|
||||
|
||||
public void write(List<? extends Object> items) throws Exception {
|
||||
// ...
|
||||
}
|
||||
|
||||
@BeforeStep
|
||||
public void retrieveInterstepData(StepExecution stepExecution) {
|
||||
JobExecution jobExecution = stepExecution.getJobExecution();
|
||||
ExecutionContext jobContext = jobExecution.getExecutionContext();
|
||||
this.someObject = jobContext.get("someKey");
|
||||
}
|
||||
}
|
||||
----
|
||||
702
spring-batch-docs/asciidoc/domain.adoc
Normal file
@@ -0,0 +1,702 @@
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
[[domainLanguageOfBatch]]
|
||||
|
||||
== The Domain Language of Batch
|
||||
|
||||
To any experienced batch architect, the overall concepts of batch
|
||||
processing used in Spring Batch should be familiar and comfortable. There
|
||||
are "Jobs" and "Steps" and developer supplied processing units called
|
||||
`ItemReaders` and `ItemWriters`. However, because of the Spring patterns,
|
||||
operations, templates, callbacks, and idioms, there are opportunities for
|
||||
the following:
|
||||
|
||||
* significant improvement in adherence to a clear separation of concerns
|
||||
* clearly delineated architectural layers and services provided as interfaces
|
||||
* simple and default implementations that allow for quick adoption and ease of use out-of-the-box
|
||||
* significantly enhanced extensibility
|
||||
|
||||
The diagram below is simplified version of the batch reference
|
||||
architecture that has been used for decades. It provides an overview of the
|
||||
components that make up the domain language of batch processing. This
|
||||
architecture framework is a blueprint that has been proven through decades
|
||||
of implementations on the last several generations of platforms
|
||||
(COBOL/Mainframe, C++/Unix, and now Java/anywhere). JCL and COBOL developers
|
||||
are likely to be as comfortable with the concepts as C++, C# and Java
|
||||
developers. Spring Batch provides a physical implementation of the layers,
|
||||
components and technical services commonly found in robust, maintainable
|
||||
systems used to address the creation of simple to complex batch
|
||||
applications, with the infrastructure and extensions to address very complex
|
||||
processing needs.
|
||||
|
||||
.Batch Stereotypes
|
||||
image::{batch-asciidoc}images/spring-batch-reference-model.png[Figure 2.1: Batch Stereotypes, scaledwidth="60%"]
|
||||
|
||||
The diagram above highlights the key concepts that make up the domain
|
||||
language of batch. A Job has one to many steps, which has exactly one
|
||||
`ItemReader`, `ItemProcessor`, and `ItemWriter`. A job needs to be launched
|
||||
(JobLauncher), and meta data about the currently running process needs to be
|
||||
stored (JobRepository).
|
||||
|
||||
|
||||
=== Job
|
||||
|
||||
This section describes stereotypes relating to the concept of a
|
||||
batch job. A `Job` is an entity that encapsulates an
|
||||
entire batch process. As is common with other Spring projects, a
|
||||
`Job` will be wired together via an XML configuration
|
||||
file or Java based configuration. This configuration may be referred to as
|
||||
the "job configuration". However, `Job` is just the
|
||||
top of an overall hierarchy:
|
||||
|
||||
.Job Hierarchy
|
||||
image::{batch-asciidoc}images/job-heirarchy.png[Job Hierarchy, scaledwidth="60%"]
|
||||
|
||||
In Spring Batch, a Job is simply a container for Steps. It combines
|
||||
multiple steps that belong logically together in a flow and allows for
|
||||
configuration of properties global to all steps, such as restartability.
|
||||
The job configuration contains:
|
||||
|
||||
* The simple name of the job
|
||||
* Definition and ordering of Steps
|
||||
* Whether or not the job is restartable
|
||||
|
||||
A default simple implementation of the Job
|
||||
interface is provided by Spring Batch in the form of the
|
||||
`SimpleJob` class which creates some standard
|
||||
functionality on top of `Job`, however the batch
|
||||
namespace abstracts away the need to instantiate it directly. Instead, the
|
||||
`<job>` tag can be used:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<job id="footballJob">
|
||||
<step id="playerload" next="gameLoad"/>
|
||||
<step id="gameLoad" next="playerSummarization"/>
|
||||
<step id="playerSummarization"/>
|
||||
</job>
|
||||
----
|
||||
|
||||
==== JobInstance
|
||||
|
||||
A `JobInstance` refers to the concept of a
|
||||
logical job run. Let's consider a batch job that should be run once at
|
||||
the end of the day, such as the 'EndOfDay' `Job` from the diagram above.
|
||||
There is one 'EndOfDay' job, but each individual
|
||||
run of the `Job` must be tracked separately. In the
|
||||
case of this job, there will be one logical
|
||||
`JobInstance` per day. For example, there will be a
|
||||
January 1st run, and a January 2nd run. If the January 1st run fails the
|
||||
first time and is run again the next day, it is still the January 1st
|
||||
run. (Usually this corresponds with the data it is processing as well,
|
||||
meaning the January 1st run processes data for January 1st, etc).
|
||||
Therefore, each `JobInstance` can have multiple
|
||||
executions (`JobExecution` is discussed in more
|
||||
detail below) and only one `JobInstance`
|
||||
corresponding to a particular `Job` and
|
||||
identifying `JobParameters` can be running at a given
|
||||
time.
|
||||
|
||||
The definition of a `JobInstance` has
|
||||
absolutely no bearing on the data the will be loaded. It is entirely up
|
||||
to the `ItemReader` implementation used to
|
||||
determine how data will be loaded. For example, in the EndOfDay
|
||||
scenario, there may be a column on the data that indicates the
|
||||
'effective date' or 'schedule date' to which the data belongs. So, the
|
||||
January 1st run would only load data from the 1st, and the January 2nd
|
||||
run would only use data from the 2nd. Because this determination will
|
||||
likely be a business decision, it is left up to the
|
||||
`ItemReader` to decide. What using the same
|
||||
`JobInstance` will determine, however, is whether
|
||||
or not the 'state' (i.e. the `ExecutionContext`,
|
||||
which is discussed below) from previous executions will be used. Using a
|
||||
new `JobInstance` will mean 'start from the
|
||||
beginning' and using an existing instance will generally mean 'start
|
||||
from where you left off'.
|
||||
|
||||
==== JobParameters
|
||||
|
||||
Having discussed `JobInstance` and how it
|
||||
differs from Job, the natural question to ask is:
|
||||
"how is one `JobInstance` distinguished from
|
||||
another?" The answer is: `JobParameters`.
|
||||
`JobParameters` is a set of parameters used to
|
||||
start a batch job. They can be used for identification or even as
|
||||
reference data during the run:
|
||||
|
||||
.Job Parameters
|
||||
image::{batch-asciidoc}images/job-stereotypes-parameters.png[Job Parameters, scaledwidth="60%"]
|
||||
|
||||
In the example above, where there are two instances, one for
|
||||
January 1st, and another for January 2nd, there is really only one Job,
|
||||
one that was started with a job parameter of 01-01-2017 and another that
|
||||
was started with a parameter of 01-02-2017. Thus, the contract can be
|
||||
defined as: `JobInstance` =
|
||||
`Job` + identifying `JobParameters`. This
|
||||
allows a developer to effectively control how a
|
||||
`JobInstance` is defined, since they control what
|
||||
parameters are passed in.
|
||||
|
||||
NOTE: Not all job parameters are required to contribute to the identification
|
||||
of a `JobInstance`. By default they do, however the framework
|
||||
allows the submission of a `Job` with parameters that do
|
||||
not contribute to the identity of a `JobInstance` as well.
|
||||
|
||||
==== JobExecution
|
||||
|
||||
A `JobExecution` refers to the technical
|
||||
concept of a single attempt to run a Job. An
|
||||
execution may end in failure or success, but the
|
||||
`JobInstance` corresponding to a given execution
|
||||
will not be considered complete unless the execution completes
|
||||
successfully. Using the EndOfDay `Job` described
|
||||
above as an example, consider a `JobInstance` for
|
||||
01-01-2017 that failed the first time it was run. If it is run again
|
||||
with the same identifying job parameters as the first run (01-01-2017), a new
|
||||
`JobExecution` will be created. However, there will
|
||||
still be only one `JobInstance`.
|
||||
|
||||
A `Job` defines what a job is and how it is
|
||||
to be executed, and `JobInstance` is a purely
|
||||
organizational object to group executions together, primarily to enable
|
||||
correct restart semantics. A `JobExecution`,
|
||||
however, is the primary storage mechanism for what actually happened
|
||||
during a run, and as such contains many more properties that must be
|
||||
controlled and persisted:
|
||||
|
||||
|
||||
.JobExecution Properties
|
||||
|
||||
|===
|
||||
|Property |Definition
|
||||
|status
|
||||
|A `BatchStatus` object that
|
||||
indicates the status of the execution. While running, it's
|
||||
BatchStatus.STARTED, if it fails, it's BatchStatus.FAILED, and
|
||||
if it finishes successfully, it's BatchStatus.COMPLETED
|
||||
|
||||
|startTime
|
||||
|A `java.util.Date` representing the
|
||||
current system time when the execution was started.
|
||||
|
||||
|endTime
|
||||
|A `java.util.Date` representing the
|
||||
current system time when the execution finished, regardless of
|
||||
whether or not it was successful.
|
||||
|
||||
|exitStatus
|
||||
|The `ExitStatus` indicating the
|
||||
result of the run. It is most important because it contains an
|
||||
exit code that will be returned to the caller. See chapter 5 for
|
||||
more details.
|
||||
|
||||
|createTime
|
||||
|A `java.util.Date` representing the
|
||||
current system time when the `JobExecution`
|
||||
was first persisted. The job may not have been started yet (and
|
||||
thus has no start time), but it will always have a createTime,
|
||||
which is required by the framework for managing job level
|
||||
`ExecutionContexts`.
|
||||
|
||||
|lastUpdated
|
||||
|A `java.util.Date` representing the
|
||||
last time a `JobExecution` was
|
||||
persisted.
|
||||
|
||||
|executionContext
|
||||
|The 'property bag' containing any user data that needs to
|
||||
be persisted between executions.
|
||||
|
||||
|failureExceptions
|
||||
|The list of exceptions encountered during the execution
|
||||
of a Job. These can be useful if more
|
||||
than one exception is encountered during the failure of a
|
||||
Job.
|
||||
|===
|
||||
|
||||
These properties are important because they will be persisted and
|
||||
can be used to completely determine the status of an execution. For
|
||||
example, if the EndOfDay job for 01-01 is executed at 9:00 PM, and fails
|
||||
at 9:30, the following entries will be made in the batch meta data
|
||||
tables:
|
||||
|
||||
.BATCH_JOB_INSTANCE
|
||||
|
||||
|===
|
||||
|JOB_INST_ID |JOB_NAME
|
||||
|1
|
||||
|EndOfDayJob
|
||||
|===
|
||||
|
||||
.BATCH_JOB_EXECUTION_PARAMS
|
||||
|===
|
||||
|JOB_EXECUTION_ID|TYPE_CD|KEY_NAME|DATE_VAL|IDENTIFYING
|
||||
|1
|
||||
|DATE
|
||||
|schedule.Date
|
||||
|2017-01-01
|
||||
|TRUE
|
||||
|===
|
||||
|
||||
.BATCH_JOB_EXECUTION
|
||||
|===
|
||||
|JOB_EXEC_ID|JOB_INST_ID|START_TIME|END_TIME|STATUS
|
||||
|1
|
||||
|1
|
||||
|2017-01-01 21:00
|
||||
|2017-01-01 21:30
|
||||
|FAILED
|
||||
|===
|
||||
|
||||
NOTE: column names may have been abbreviated or removed for clarity
|
||||
and formatting
|
||||
|
||||
|
||||
Now that the job has failed, let's assume that it took the entire
|
||||
course of the night for the problem to be determined, so that the 'batch
|
||||
window' is now closed. Assuming the window starts at 9:00 PM, the job
|
||||
will be kicked off again for 01-01, starting where it left off and
|
||||
completing successfully at 9:30. Because it's now the next day, the
|
||||
01-02 job must be run as well, which is kicked off just afterwards at
|
||||
9:31, and completes in its normal one hour time at 10:30. There is no
|
||||
requirement that one `JobInstance` be kicked off
|
||||
after another, unless there is potential for the two jobs to attempt to
|
||||
access the same data, causing issues with locking at the database level.
|
||||
It is entirely up to the scheduler to determine when a
|
||||
Job should be run. Since they're separate
|
||||
`JobInstances`, Spring Batch will make no attempt
|
||||
to stop them from being run concurrently. (Attempting to run the same
|
||||
`JobInstance` while another is already running will
|
||||
result in a `JobExecutionAlreadyRunningException`
|
||||
being thrown). There should now be an extra entry in both the
|
||||
`JobInstance` and
|
||||
`JobParameters` tables, and two extra entries in
|
||||
the `JobExecution` table:
|
||||
|
||||
.BATCH_JOB_INSTANCE
|
||||
|===
|
||||
|JOB_INST_ID |JOB_NAME
|
||||
|1
|
||||
|EndOfDayJob
|
||||
|
||||
|2
|
||||
|EndOfDayJob
|
||||
|===
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
.BATCH_JOB_EXECUTION_PARAMS
|
||||
|===
|
||||
|JOB_EXECUTION_ID|TYPE_CD|KEY_NAME|DATE_VAL|IDENTIFYING
|
||||
|1
|
||||
|DATE
|
||||
|schedule.Date
|
||||
|2017-01-01 00:00:00
|
||||
|TRUE
|
||||
|
||||
|2
|
||||
|DATE
|
||||
|schedule.Date
|
||||
|2017-01-01 00:00:00
|
||||
|TRUE
|
||||
|
||||
|3
|
||||
|DATE
|
||||
|schedule.Date
|
||||
|2017-01-02 00:00:00
|
||||
|TRUE
|
||||
|===
|
||||
|
||||
.BATCH_JOB_EXECUTION
|
||||
|===
|
||||
|JOB_EXEC_ID|JOB_INST_ID|START_TIME|END_TIME|STATUS
|
||||
|1
|
||||
|1
|
||||
|2017-01-01 21:00
|
||||
|2017-01-01 21:30
|
||||
|FAILED
|
||||
|
||||
|2
|
||||
|1
|
||||
|2017-01-02 21:00
|
||||
|2017-01-02 21:30
|
||||
|COMPLETED
|
||||
|
||||
|3
|
||||
|2
|
||||
|2017-01-02 21:31
|
||||
|2017-01-02 22:29
|
||||
|COMPLETED
|
||||
|===
|
||||
|
||||
NOTE: column names may have been abbreviated or removed for clarity
|
||||
and formatting
|
||||
|
||||
=== Step
|
||||
|
||||
A `Step` is a domain object that encapsulates
|
||||
an independent, sequential phase of a batch job. Therefore, every
|
||||
Job is composed entirely of one or more steps. A
|
||||
`Step` contains all of the information necessary to
|
||||
define and control the actual batch processing. This is a necessarily
|
||||
vague description because the contents of any given
|
||||
`Step` are at the discretion of the developer writing
|
||||
a `Job`. A `Step` can be as simple or complex as the
|
||||
developer desires. A simple `Step` might load data
|
||||
from a file into the database, requiring little or no code. (depending
|
||||
upon the implementations used) A more complex `Step`
|
||||
may have complicated business rules that are applied as part of the
|
||||
processing. As with `Job`, a
|
||||
`Step` has an individual
|
||||
`StepExecution` that corresponds with a unique
|
||||
`JobExecution`:
|
||||
|
||||
.Job Hierarchy With Steps
|
||||
image::{batch-asciidoc}images/jobHeirarchyWithSteps.png[Figure 2.1: Job Hierarchy With Steps, scaledwidth="60%"]
|
||||
|
||||
|
||||
|
||||
==== StepExecution
|
||||
|
||||
A `StepExecution` represents a single attempt
|
||||
to execute a `Step`. A new
|
||||
`StepExecution` will be created each time a
|
||||
`Step` is run, similar to
|
||||
`JobExecution`. However, if a step fails to execute
|
||||
because the step before it fails, there will be no execution persisted
|
||||
for it. A `StepExecution` will only be created when
|
||||
its `Step` is actually started.
|
||||
|
||||
`Step` executions are represented by objects of the
|
||||
`StepExecution` class. Each execution contains a
|
||||
reference to its corresponding step and
|
||||
`JobExecution`, and transaction related data such
|
||||
as commit and rollback count and start and end times. Additionally, each
|
||||
step execution will contain an `ExecutionContext`,
|
||||
which contains any data a developer needs persisted across batch runs,
|
||||
such as statistics or state information needed to restart. The following
|
||||
is a listing of the properties for
|
||||
StepExecution:
|
||||
|
||||
.StepExecution Properties
|
||||
|===
|
||||
|Property|Definition
|
||||
|status
|
||||
|A `BatchStatus` object that
|
||||
indicates the status of the execution. While it's running, the
|
||||
status is BatchStatus.STARTED, if it fails, the status is
|
||||
BatchStatus.FAILED, and if it finishes successfully, the status
|
||||
is BatchStatus.COMPLETED
|
||||
|
||||
|startTime
|
||||
|A `java.util.Date` representing the
|
||||
current system time when the execution was started.
|
||||
|
||||
|endTime
|
||||
|
||||
|A `java.util.Date` representing the
|
||||
current system time when the execution finished, regardless of
|
||||
whether or not it was successful.
|
||||
|
||||
|exitStatus
|
||||
|The `ExitStatus` indicating the
|
||||
result of the execution. It is most important because it
|
||||
contains an exit code that will be returned to the caller. See
|
||||
chapter 5 for more details.
|
||||
|
||||
|executionContext
|
||||
|The 'property bag' containing any user data that needs to
|
||||
be persisted between executions.
|
||||
|
||||
|readCount
|
||||
|The number of items that have been successfully
|
||||
read
|
||||
|
||||
|writeCount
|
||||
|The number of items that have been successfully
|
||||
written
|
||||
|
||||
|commitCount
|
||||
|The number transactions that have been committed for this
|
||||
execution
|
||||
|
||||
|rollbackCount
|
||||
|The number of times the business transaction controlled
|
||||
by the `Step` has been rolled back.
|
||||
|
||||
|readSkipCount
|
||||
|The number of times `read` has
|
||||
failed, resulting in a skipped item.
|
||||
|
||||
|processSkipCount
|
||||
|The number of times `process` has
|
||||
failed, resulting in a skipped item.
|
||||
|
||||
|filterCount
|
||||
|The number of items that have been 'filtered' by the
|
||||
`ItemProcessor`.
|
||||
|
||||
|writeSkipCount
|
||||
|The number of times `write` has
|
||||
failed, resulting in a skipped item.
|
||||
|===
|
||||
|
||||
=== ExecutionContext
|
||||
|
||||
An `ExecutionContext` represents a collection
|
||||
of key/value pairs that are persisted and controlled by the framework in
|
||||
order to allow developers a place to store persistent state that is scoped
|
||||
to a `StepExecution` or
|
||||
`JobExecution`. For those familiar with Quartz, it is
|
||||
very similar to JobDataMap. The best usage example
|
||||
is to facilitate restart. Using flat file input as an example, while
|
||||
processing individual lines, the framework periodically persists the
|
||||
`ExecutionContext` at commit points. This allows the
|
||||
`ItemReader` to store its state in case a fatal error
|
||||
occurs during the run, or even if the power goes out. All that is needed
|
||||
is to put the current number of lines read into the context, and the
|
||||
framework will do the rest:
|
||||
|
||||
[source, java]
|
||||
----
|
||||
executionContext.putLong(getKey(LINES_READ_COUNT), reader.getPosition());
|
||||
----
|
||||
|
||||
Using the EndOfDay example from the `Job` Stereotypes section as an
|
||||
example, assume there's one step: 'loadData', that loads a file into the
|
||||
database. After the first failed run, the meta data tables would look like
|
||||
the following:
|
||||
|
||||
.BATCH_JOB_INSTANCE
|
||||
|===
|
||||
|JOB_INST_ID|JOB_NAME
|
||||
|1
|
||||
|EndOfDayJob
|
||||
|===
|
||||
|
||||
.BATCH_JOB_PARAMS
|
||||
|===
|
||||
|JOB_INST_ID|TYPE_CD|KEY_NAME|DATE_VAL
|
||||
|1
|
||||
|DATE
|
||||
|schedule.Date
|
||||
|2017-01-01
|
||||
|===
|
||||
|
||||
.BATCH_JOB_EXECUTION
|
||||
|===
|
||||
|JOB_EXEC_ID|JOB_INST_ID|START_TIME|END_TIME|STATUS
|
||||
|1
|
||||
|1
|
||||
|2017-01-01 21:00
|
||||
|2017-01-01 21:30
|
||||
|FAILED
|
||||
|===
|
||||
|
||||
.BATCH_STEP_EXECUTION
|
||||
|===
|
||||
|STEP_EXEC_ID|JOB_EXEC_ID|STEP_NAME|START_TIME|END_TIME|STATUS
|
||||
|1
|
||||
|1
|
||||
|loadData
|
||||
|2017-01-01 21:00
|
||||
|2017-01-01 21:30
|
||||
|FAILED
|
||||
|===
|
||||
|
||||
.BATCH_STEP_EXECUTION_CONTEXT
|
||||
|===
|
||||
|STEP_EXEC_ID|SHORT_CONTEXT
|
||||
|1
|
||||
|{piece.count=40321}
|
||||
|===
|
||||
|
||||
|
||||
In this case, the `Step` ran for 30 minutes
|
||||
and processed 40,321 'pieces', which would represent lines in a file in
|
||||
this scenario. This value will be updated just before each commit by the
|
||||
framework, and can contain multiple rows corresponding to entries within
|
||||
the `ExecutionContext`. Being notified before a
|
||||
commit requires one of the various StepListeners,
|
||||
or an ItemStream, which are discussed in more
|
||||
detail later in this guide. As with the previous example, it is assumed
|
||||
that the `Job` is restarted the next day. When it is
|
||||
restarted, the values from the `ExecutionContext` of
|
||||
the last run are reconstituted from the database, and when the
|
||||
`ItemReader` is opened, it can check to see if it has
|
||||
any stored state in the context, and initialize itself from there:
|
||||
|
||||
[source, java]
|
||||
----
|
||||
if (executionContext.containsKey(getKey(LINES_READ_COUNT))) {
|
||||
log.debug("Initializing for restart. Restart data is: " + executionContext);
|
||||
|
||||
long lineCount = executionContext.getLong(getKey(LINES_READ_COUNT));
|
||||
|
||||
LineReader reader = getReader();
|
||||
|
||||
Object record = "";
|
||||
while (reader.getPosition() < lineCount && record != null) {
|
||||
record = readLine();
|
||||
}
|
||||
}
|
||||
----
|
||||
In this case, after the above code is executed, the current line
|
||||
will be 40,322, allowing the `Step` to start again
|
||||
from where it left off. The `ExecutionContext` can
|
||||
also be used for statistics that need to be persisted about the run
|
||||
itself. For example, if a flat file contains orders for processing that
|
||||
exist across multiple lines, it may be necessary to store how many orders
|
||||
have been processed (which is much different from than the number of lines
|
||||
read) so that an email can be sent at the end of the
|
||||
`Step` with the total orders processed in the body.
|
||||
The framework handles storing this for the developer, in order to
|
||||
correctly scope it with an individual `JobInstance`.
|
||||
It can be very difficult to know whether an existing
|
||||
`ExecutionContext` should be used or not. For
|
||||
example, using the 'EndOfDay' example from above, when the 01-01 run
|
||||
starts again for the second time, the framework recognizes that it is the
|
||||
same `JobInstance` and on an individual
|
||||
`Step` basis, pulls the
|
||||
`ExecutionContext` out of the database and hands it
|
||||
as part of the `StepExecution` to the
|
||||
`Step` itself. Conversely, for the 01-02 run the
|
||||
framework recognizes that it is a different instance, so an empty context
|
||||
must be handed to the `Step`. There are many of these
|
||||
types of determinations that the framework makes for the developer to
|
||||
ensure the state is given to them at the correct time. It is also
|
||||
important to note that exactly one `ExecutionContext`
|
||||
exists per `StepExecution` at any given time. Clients
|
||||
of the `ExecutionContext` should be careful because
|
||||
this creates a shared keyspace, so care should be taken when putting
|
||||
values in to ensure no data is overwritten. However, the
|
||||
`Step` stores absolutely no data in the context, so
|
||||
there is no way to adversely affect the framework.
|
||||
|
||||
It is also important to note that there is at least one
|
||||
`ExecutionContext` per
|
||||
`JobExecution`, and one for every
|
||||
StepExecution. For example, consider the following
|
||||
code snippet:
|
||||
|
||||
[source, java]
|
||||
----
|
||||
ExecutionContext ecStep = stepExecution.getExecutionContext();
|
||||
ExecutionContext ecJob = jobExecution.getExecutionContext();
|
||||
//ecStep does not equal ecJob
|
||||
----
|
||||
|
||||
As noted in the comment, ecStep will not equal ecJob; they are two
|
||||
different `ExecutionContexts`. The one scoped to the
|
||||
`Step` will be saved at every commit point in the
|
||||
`Step`, whereas the one scoped to the
|
||||
Job will be saved in between every
|
||||
`Step` execution.
|
||||
|
||||
=== JobRepository
|
||||
|
||||
`JobRepository` is the persistence mechanism
|
||||
for all of the Stereotypes mentioned above. It provides CRUD operations
|
||||
for `JobLauncher`, `Job`, and
|
||||
`Step` implementations. When a
|
||||
Job is first launched, a
|
||||
`JobExecution` is obtained from the repository, and
|
||||
during the course of execution `StepExecution` and
|
||||
`JobExecution` implementations are persisted by
|
||||
passing them to the repository:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<job-repository id="jobRepository"/>
|
||||
----
|
||||
|
||||
=== JobLauncher
|
||||
|
||||
`JobLauncher` represents a simple interface for
|
||||
launching a `Job` with a given set of
|
||||
`JobParameters`:
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public interface JobLauncher {
|
||||
|
||||
public JobExecution run(Job job, JobParameters jobParameters)
|
||||
throws JobExecutionAlreadyRunningException, JobRestartException;
|
||||
}
|
||||
----
|
||||
It is expected that implementations will obtain a valid
|
||||
`JobExecution` from the
|
||||
`JobRepository` and execute the
|
||||
`Job`.
|
||||
|
||||
=== Item Reader
|
||||
|
||||
`ItemReader` is an abstraction that represents
|
||||
the retrieval of input for a `Step`, one item at a
|
||||
time. When the `ItemReader` has exhausted the items
|
||||
it can provide, it will indicate this by returning null. More details
|
||||
about the `ItemReader` interface and its various
|
||||
implementations can be found in <<readersAndWriters.adoc#readersAndWriters,Readers And Writers>>.
|
||||
|
||||
=== Item Writer
|
||||
|
||||
`ItemWriter` is an abstraction that
|
||||
represents the output of a `Step`, one batch
|
||||
or chunk of items at a time. Generally, an `ItemWriter` has no
|
||||
knowledge of the input it will receive next, only the item that
|
||||
was passed in its current invocation. More details about the
|
||||
`ItemWriter` interface and its various
|
||||
implementations can be found in <<readersAndWriters.adoc#readersAndWriters,Readers And Writers>>.
|
||||
|
||||
=== Item Processor
|
||||
|
||||
`ItemProcessor` is an abstraction that
|
||||
represents the business processing of an item. While the
|
||||
`ItemReader` reads one item, and the
|
||||
`ItemWriter` writes them, the
|
||||
`ItemProcessor` provides access to transform or apply
|
||||
other business processing. If, while processing the item, it is determined
|
||||
that the item is not valid, returning null indicates that the item should
|
||||
not be written out. More details about the `ItemProcessor` interface can be
|
||||
found in <<readersAndWriters.adoc#readersAndWriters,Readers And Writers>>.
|
||||
|
||||
|
||||
=== Batch Namespace
|
||||
|
||||
Many of the domain concepts listed above need to be configured in a
|
||||
Spring ApplicationContext. While there are
|
||||
implementations of the interfaces above that can be used in a standard
|
||||
bean definition, a namespace has been provided for ease of
|
||||
configuration:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<beans:beans xmlns="http://www.springframework.org/schema/batch"
|
||||
xmlns:beans="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="
|
||||
http://www.springframework.org/schema/beans
|
||||
http://www.springframework.org/schema/beans/spring-beans.xsd
|
||||
http://www.springframework.org/schema/batch
|
||||
http://www.springframework.org/schema/batch/spring-batch-2.2.xsd">
|
||||
|
||||
<job id="ioSampleJob">
|
||||
<step id="step1">
|
||||
<tasklet>
|
||||
<chunk reader="itemReader" writer="itemWriter" commit-interval="2"/>
|
||||
</tasklet>
|
||||
</step>
|
||||
</job>
|
||||
|
||||
</beans:beans>
|
||||
----
|
||||
|
||||
As long as the batch namespace has been declared, any of its
|
||||
elements can be used. More information on configuring a
|
||||
Job can be found in <<job.adoc#configureJob,Configuring and Running a Job>>. More information on configuring a `Step` can be
|
||||
found in <<step.adoc#configureStep,Configuring a Step>>.
|
||||
|
||||
119
spring-batch-docs/asciidoc/glossary.adoc
Normal file
@@ -0,0 +1,119 @@
|
||||
[[glossary]]
|
||||
[appendix]
|
||||
== Glossary
|
||||
[glossary]
|
||||
=== Spring Batch Glossary
|
||||
|
||||
Batch::
|
||||
An accumulation of business transactions over time.
|
||||
|
||||
Batch Application Style::
|
||||
Term used to designate batch as an application style in its own
|
||||
right similar to online, Web or SOA. It has standard elements of
|
||||
input, validation, transformation of information to business model,
|
||||
business processing and output. In addition, it requires monitoring at
|
||||
a macro level.
|
||||
|
||||
Batch Processing::
|
||||
The handling of a batch of many business transactions that have
|
||||
accumulated over a period of time (e.g. an hour, day, week, month, or
|
||||
year). It is the application of a process, or set of processes, to
|
||||
many data entities or objects in a repetitive and predictable fashion
|
||||
with either no manual element, or a separate manual element for error
|
||||
processing.
|
||||
|
||||
Batch Window::
|
||||
The time frame within which a batch job must complete. This can
|
||||
be constrained by other systems coming online, other dependent jobs
|
||||
needing to execute or other factors specific to the batch
|
||||
environment.
|
||||
|
||||
Step::
|
||||
It is the main batch task or unit of work controller. It
|
||||
initializes the business logic, and controls the transaction
|
||||
environment based on commit interval setting, etc.
|
||||
|
||||
Tasklet::
|
||||
A component created by application developer to process the
|
||||
business logic for a Step.
|
||||
|
||||
Batch Job Type::
|
||||
Job Types describe application of jobs for particular type of
|
||||
processing. Common areas are interface processing (typically flat
|
||||
files), forms processing (either for online pdf generation or print
|
||||
formats), report processing.
|
||||
|
||||
Driving Query::
|
||||
A driving query identifies the set of work for a job to do; the
|
||||
job then breaks that work into individual units of work. For instance,
|
||||
identify all financial transactions that have a status of "pending
|
||||
transmission" and send them to our partner system. The driving query
|
||||
returns a set of record IDs to process; each record ID then becomes a
|
||||
unit of work. A driving query may involve a join (if the criteria for
|
||||
selection falls across two or more tables) or it may work with a
|
||||
single table.
|
||||
|
||||
Item::
|
||||
An item represents the smallest ammount of complete data for
|
||||
processing. In the simplest terms, this might mean a line in a file, a
|
||||
row in a database table, or a particular element in an XML
|
||||
file.
|
||||
|
||||
Logicial Unit of Work (LUW)::
|
||||
A batch job iterates through a driving query (or another input
|
||||
source such as a file) to perform the set of work that the job must
|
||||
accomplish. Each iteration of work performed is a unit of work.
|
||||
|
||||
Commit Interval::
|
||||
A set of LUWs processed within a single transaction.
|
||||
|
||||
Partitioning::
|
||||
Splitting a job into multiple threads where each thread is
|
||||
responsible for a subset of the overall data to be processed. The
|
||||
threads of execution may be within the same JVM or they may span JVMs
|
||||
in a clustered environment that supports workload balancing.
|
||||
|
||||
Staging Table::
|
||||
A table that holds temporary data while it is being
|
||||
processed.
|
||||
|
||||
Restartable::
|
||||
A job that can be executed again and will assume the same
|
||||
identity as when run initially. In othewords, it is has the same job
|
||||
instance id.
|
||||
|
||||
Rerunnable::
|
||||
A job that is restartable and manages its own state in terms of
|
||||
previous run's record processing. An example of a rerunnable step is
|
||||
one based on a driving query. If the driving query can be formed so
|
||||
that it will limit the processed rows when the job is restarted than
|
||||
it is re-runnable. This is managed by the application logic. Often
|
||||
times a condition is added to the where statement to limit the rows
|
||||
returned by the driving query with something like "and processedFlag
|
||||
!= true".
|
||||
|
||||
Repeat::
|
||||
One of the most basic units of batch processing, that defines
|
||||
repeatability calling a portion of code until it is finished, and
|
||||
while there is no error. Typically a batch process would be repeatable
|
||||
as long as there is input.
|
||||
|
||||
Retry::
|
||||
Simplifies the execution of operations with retry semantics most
|
||||
frequently associated with handling transactional output exceptions.
|
||||
Retry is slightly different from repeat, rather than continually
|
||||
calling a block of code, retry is stateful, and continually calls the
|
||||
same block of code with the same input, until it either succeeds, or
|
||||
some type of retry limit has been exceeded. It is only generally
|
||||
useful if a subsequent invocation of the operation might succeed
|
||||
because something in the environment has improved.
|
||||
|
||||
Recover::
|
||||
Recover operations handle an exception in such a way that a
|
||||
repeat process is able to continue.
|
||||
|
||||
Skip::
|
||||
Skip is a recovery strategy often used on file input sources as
|
||||
the strategy for ignoring bad input records that failed
|
||||
validation.
|
||||
|
||||
12
spring-batch-docs/asciidoc/header/index-header.adoc
Normal file
@@ -0,0 +1,12 @@
|
||||
= Spring Batch - Reference Documentation
|
||||
|
||||
Lucas Ward, Dave Syer, Thomas Risberg, Robert Kasanicky, Dan Garrette, Wayne Lund, Michael Minella, Chris Schaefer, Gunnar Hillert, Glenn Renfro
|
||||
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
|
||||
Copyright © 2009, 2010, 2011, 2012, 2013, 2014, 201, 2016, 2017 Pivotal, Inc. All Rights Reserved.
|
||||
|
||||
Copies of this document may be made for your own use and for
|
||||
distribution to others, provided that you do not charge any fee for such
|
||||
copies and further provided that each copy contains this Copyright
|
||||
Notice, whether distributed in print or electronically.
|
||||
|
Before Width: | Height: | Size: 13 KiB After Width: | Height: | Size: 13 KiB |
|
Before Width: | Height: | Size: 1.2 MiB After Width: | Height: | Size: 1.2 MiB |
|
Before Width: | Height: | Size: 3.7 KiB After Width: | Height: | Size: 3.7 KiB |
|
Before Width: | Height: | Size: 60 KiB After Width: | Height: | Size: 60 KiB |
|
Before Width: | Height: | Size: 15 KiB After Width: | Height: | Size: 15 KiB |
|
Before Width: | Height: | Size: 40 KiB After Width: | Height: | Size: 40 KiB |
|
Before Width: | Height: | Size: 3.2 KiB After Width: | Height: | Size: 3.2 KiB |
|
Before Width: | Height: | Size: 8.1 KiB After Width: | Height: | Size: 8.1 KiB |
|
Before Width: | Height: | Size: 32 KiB After Width: | Height: | Size: 32 KiB |
|
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 24 KiB |
|
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 20 KiB |
|
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 22 KiB |
|
Before Width: | Height: | Size: 251 KiB After Width: | Height: | Size: 251 KiB |
|
Before Width: | Height: | Size: 20 KiB After Width: | Height: | Size: 20 KiB |
|
Before Width: | Height: | Size: 5.8 KiB After Width: | Height: | Size: 5.8 KiB |
|
Before Width: | Height: | Size: 5.3 KiB After Width: | Height: | Size: 5.3 KiB |
|
Before Width: | Height: | Size: 5.6 KiB After Width: | Height: | Size: 5.6 KiB |
|
Before Width: | Height: | Size: 77 KiB After Width: | Height: | Size: 77 KiB |
|
Before Width: | Height: | Size: 46 KiB After Width: | Height: | Size: 46 KiB |
|
Before Width: | Height: | Size: 128 KiB After Width: | Height: | Size: 128 KiB |
|
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 28 KiB |
|
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 24 KiB |
|
Before Width: | Height: | Size: 584 KiB After Width: | Height: | Size: 584 KiB |
|
Before Width: | Height: | Size: 23 KiB After Width: | Height: | Size: 23 KiB |
|
Before Width: | Height: | Size: 380 KiB After Width: | Height: | Size: 380 KiB |
|
Before Width: | Height: | Size: 143 KiB After Width: | Height: | Size: 143 KiB |
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
|
Before Width: | Height: | Size: 43 KiB After Width: | Height: | Size: 43 KiB |
|
Before Width: | Height: | Size: 29 KiB After Width: | Height: | Size: 29 KiB |
|
Before Width: | Height: | Size: 14 KiB After Width: | Height: | Size: 14 KiB |
|
Before Width: | Height: | Size: 4.4 KiB After Width: | Height: | Size: 4.4 KiB |
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 18 KiB |
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
|
Before Width: | Height: | Size: 4.6 KiB After Width: | Height: | Size: 4.6 KiB |
|
Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 19 KiB |
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 18 KiB |
|
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
|
Before Width: | Height: | Size: 106 KiB After Width: | Height: | Size: 106 KiB |
|
Before Width: | Height: | Size: 26 KiB After Width: | Height: | Size: 26 KiB |
|
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 24 KiB |
|
Before Width: | Height: | Size: 28 KiB After Width: | Height: | Size: 28 KiB |
|
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 12 KiB |
|
Before Width: | Height: | Size: 5.0 KiB After Width: | Height: | Size: 5.0 KiB |
|
Before Width: | Height: | Size: 4.8 KiB After Width: | Height: | Size: 4.8 KiB |
|
Before Width: | Height: | Size: 36 KiB After Width: | Height: | Size: 36 KiB |
|
Before Width: | Height: | Size: 5.9 KiB After Width: | Height: | Size: 5.9 KiB |
|
Before Width: | Height: | Size: 39 KiB After Width: | Height: | Size: 39 KiB |
|
Before Width: | Height: | Size: 172 KiB After Width: | Height: | Size: 172 KiB |
|
Before Width: | Height: | Size: 11 KiB After Width: | Height: | Size: 11 KiB |
|
Before Width: | Height: | Size: 19 KiB After Width: | Height: | Size: 19 KiB |
|
Before Width: | Height: | Size: 12 KiB After Width: | Height: | Size: 12 KiB |
|
Before Width: | Height: | Size: 45 KiB After Width: | Height: | Size: 45 KiB |
|
Before Width: | Height: | Size: 37 KiB After Width: | Height: | Size: 37 KiB |
|
Before Width: | Height: | Size: 22 KiB After Width: | Height: | Size: 22 KiB |
|
Before Width: | Height: | Size: 24 KiB After Width: | Height: | Size: 24 KiB |
|
Before Width: | Height: | Size: 18 KiB After Width: | Height: | Size: 18 KiB |
|
Before Width: | Height: | Size: 25 KiB After Width: | Height: | Size: 25 KiB |
|
Before Width: | Height: | Size: 17 KiB After Width: | Height: | Size: 17 KiB |
|
Before Width: | Height: | Size: 52 KiB After Width: | Height: | Size: 52 KiB |
39
spring-batch-docs/asciidoc/index-pdf.adoc
Normal file
@@ -0,0 +1,39 @@
|
||||
:doctype: book
|
||||
:toc:
|
||||
:toclevels: 4
|
||||
|
||||
include::header/index-header.adoc[]
|
||||
|
||||
include::spring-batch-intro.adoc[]
|
||||
|
||||
include::whatsnew.adoc[]
|
||||
|
||||
include::domain.adoc[]
|
||||
|
||||
include::job.adoc[]
|
||||
|
||||
include::step.adoc[]
|
||||
|
||||
include::readersAndWriters.adoc[]
|
||||
|
||||
include::scalability.adoc[]
|
||||
|
||||
include::repeat.adoc[]
|
||||
|
||||
include::retry.adoc[]
|
||||
|
||||
include::testing.adoc[]
|
||||
|
||||
include::common-patterns.adoc[]
|
||||
|
||||
include::jsr-352.adoc[]
|
||||
|
||||
include::spring-batch-integration.adoc[]
|
||||
|
||||
include::appendix.adoc[]
|
||||
|
||||
include::schema-appendix.adoc[]
|
||||
|
||||
include::transaction-appendix.adoc[]
|
||||
|
||||
include::glossary.adoc[]
|
||||
39
spring-batch-docs/asciidoc/index-single.adoc
Normal file
@@ -0,0 +1,39 @@
|
||||
:doctype: book
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
include::header/index-header.adoc[]
|
||||
|
||||
include::spring-batch-intro.adoc[]
|
||||
|
||||
include::whatsnew.adoc[]
|
||||
|
||||
include::domain.adoc[]
|
||||
|
||||
include::job.adoc[]
|
||||
|
||||
include::step.adoc[]
|
||||
|
||||
include::readersAndWriters.adoc[]
|
||||
|
||||
include::scalability.adoc[]
|
||||
|
||||
include::repeat.adoc[]
|
||||
|
||||
include::retry.adoc[]
|
||||
|
||||
include::testing.adoc[]
|
||||
|
||||
include::common-patterns.adoc[]
|
||||
|
||||
include::jsr-352.adoc[]
|
||||
|
||||
include::spring-batch-integration.adoc[]
|
||||
|
||||
include::appendix.adoc[]
|
||||
|
||||
include::schema-appendix.adoc[]
|
||||
|
||||
include::transaction-appendix.adoc[]
|
||||
|
||||
include::glossary.adoc[]
|
||||
39
spring-batch-docs/asciidoc/index.adoc
Normal file
@@ -0,0 +1,39 @@
|
||||
include::header/index-header.adoc[]
|
||||
|
||||
// ======================================================================================
|
||||
|
||||
* <<spring-batch-intro.adoc#spring-batch-intro,Spring Batch Introduction>>
|
||||
|
||||
* <<whatsnew.adoc#whatsNew,What's New in Spring Batch 4.0>>
|
||||
|
||||
* <<domain.adoc#domainLanguageOfBatch,The Domain Language of Batch>>
|
||||
|
||||
* <<job.adoc#configureJob,Configuring and Running a Job>>
|
||||
|
||||
* <<step.adoc#configureStep,Configuring a Step>>
|
||||
|
||||
* <<readersAndWriters.adoc#readersAndWriters,ItemReaders and ItemWriters>>
|
||||
|
||||
* <<scalability.adoc#scalability,Scaling and Parallel Processings>>
|
||||
|
||||
* <<repeat.adoc#repeat,Repeat>>
|
||||
|
||||
* <<retry.adoc#retry,Retry>>
|
||||
|
||||
* <<testing.adoc#testing,Unit Testing>>
|
||||
|
||||
* <<common-patterns.adoc#commonPatterns, Common Patterns>>
|
||||
|
||||
* <<jsr-352.adoc#jsr-352,JSR-352 Support>>
|
||||
|
||||
* <<spring-batch-integration.adoc#springBatchIntegration,Spring Batch Integration>>
|
||||
|
||||
[big maroon]#Appendix#
|
||||
|
||||
* <<appendix.adoc#listOfReadersAndWriters,List of ItemReaders and ItemWriters>>
|
||||
|
||||
* <<schema-appendix.adoc#metaDataSchema,Meta-Data Schema>>
|
||||
|
||||
* <<transaction-appendix.adoc#transactions,Batch Processing and Transactions>>
|
||||
|
||||
* <<glossary.adoc#glossary,Glossary>>
|
||||
1210
spring-batch-docs/asciidoc/job.adoc
Normal file
626
spring-batch-docs/asciidoc/jsr-352.adoc
Normal file
@@ -0,0 +1,626 @@
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
[[jsr-352]]
|
||||
|
||||
== JSR-352 Support
|
||||
|
||||
As of Spring Batch 3.0 support for JSR-352 has been fully implemented. This section is not a replacement for
|
||||
the spec itself and instead, intends to explain how the JSR-352 specific concepts apply to Spring Batch.
|
||||
Additional information on JSR-352 can be found via the
|
||||
JCP here: link:$$https://jcp.org/en/jsr/detail?id=352$$[https://jcp.org/en/jsr/detail?id=352]
|
||||
|
||||
[[jsrGeneralNotes]]
|
||||
|
||||
|
||||
=== General Notes Spring Batch and JSR-352
|
||||
|
||||
Spring Batch and JSR-352 are structurally the same. They both have jobs that are made up of steps. They
|
||||
both have readers, processors, writers, and listeners. However, their interactions are subtly different.
|
||||
For example, the `org.springframework.batch.core.SkipListener#onSkipInWrite(S item, Throwable t)`
|
||||
within Spring Batch receives two parameters: the item that was skipped and the Exception that caused the
|
||||
skip. The JSR-352 version of the same method
|
||||
(`javax.batch.api.chunk.listener.SkipWriteListener#onSkipWriteItem(List<Object> items, Exception ex)`)
|
||||
also receives two parameters. However the first one is a `List` of all the items
|
||||
within the current chunk with the second being the `Exception` that caused the skip.
|
||||
Because of these differences, it is important to note that there are two paths to execute a job within
|
||||
Spring Batch: either a traditional Spring Batch job or a JSR-352 based job. While the use of Spring Batch
|
||||
artifacts (readers, writers, etc) will work within a job configured via JSR-352's JSL and executed via the
|
||||
`JsrJobOperator`, they will behave according to the rules of JSR-352. It is also
|
||||
important to note that batch artifacts that have been developed against the JSR-352 interfaces will not work
|
||||
within a traditional Spring Batch job.
|
||||
|
||||
[[jsrSetup]]
|
||||
|
||||
|
||||
=== Setup
|
||||
|
||||
[[jsrSetupContexts]]
|
||||
|
||||
|
||||
==== Application Contexts
|
||||
|
||||
All JSR-352 based jobs within Spring Batch consist of two application contexts. A parent context, that
|
||||
contains beans related to the infrastructure of Spring Batch such as the `JobRepository`,
|
||||
`PlatformTransactionManager`, etc and a child context that consists of the configuration
|
||||
of the job to be run. The parent context is defined via the `baseContext.xml` provided
|
||||
by the framework. This context may be overridden via the `JSR-352-BASE-CONTEXT` system
|
||||
property.
|
||||
|
||||
|
||||
[NOTE]
|
||||
====
|
||||
The base context is not processed by the JSR-352 processors for things like property injection so
|
||||
no components requiring that additional processing should be configured there.
|
||||
|
||||
====
|
||||
|
||||
|
||||
[[jsrSetupLaunching]]
|
||||
|
||||
|
||||
==== Launching a JSR-352 based job
|
||||
|
||||
JSR-352 requires a very simple path to executing a batch job. The following code is all that is needed to
|
||||
execute your first batch job:
|
||||
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
JobOperator operator = BatchRuntime.getJobOperator();
|
||||
jobOperator.start("myJob", new Properties());
|
||||
----
|
||||
|
||||
While that is convenient for developers, the devil is in the details. Spring Batch bootstraps a bit of
|
||||
infrastructure behind the scenes that a developer may want to override. The following is bootstrapped the
|
||||
first time `BatchRuntime.getJobOperator()` is called:
|
||||
|
||||
|===============
|
||||
|__Bean Name__|__Default Configuration__|__Notes__
|
||||
|
|
||||
dataSource
|
||||
|
|
||||
Apache DBCP BasicDataSource with configured values.
|
||||
|
|
||||
By default, HSQLDB is bootstrapped.
|
||||
|
||||
|`transactionManager`|`org.springframework.jdbc.datasource.DataSourceTransactionManager`|
|
||||
References the dataSource bean defined above.
|
||||
|
||||
|
|
||||
A Datasource initializer
|
||||
||
|
||||
This is configured to execute the scripts configured via the
|
||||
`batch.drop.script` and `batch.schema.script` properties. By
|
||||
default, the schema scripts for HSQLDB are executed. This behavior can be disabled via
|
||||
`batch.data.source.init` property.
|
||||
|
||||
|
|
||||
jobRepository
|
||||
|
|
||||
A JDBC based `SimpleJobRepository`.
|
||||
|
|
||||
This `JobRepository` uses the previously mentioned data source and transaction
|
||||
manager. The schema's table prefix is configurable (defaults to BATCH_) via the
|
||||
`batch.table.prefix` property.
|
||||
|
||||
|
|
||||
jobLauncher
|
||||
|`org.springframework.batch.core.launch.support.SimpleJobLauncher`|
|
||||
Used to launch jobs.
|
||||
|
||||
|
|
||||
batchJobOperator
|
||||
|`org.springframework.batch.core.launch.support.SimpleJobOperator`|
|
||||
The `JsrJobOperator` wraps this to provide most of it's functionality.
|
||||
|
||||
|
|
||||
jobExplorer
|
||||
|`org.springframework.batch.core.explore.support.JobExplorerFactoryBean`|
|
||||
Used to address lookup functionality provided by the `JsrJobOperator`.
|
||||
|
||||
|
|
||||
jobParametersConverter
|
||||
|`org.springframework.batch.core.jsr.JsrJobParametersConverter`|
|
||||
JSR-352 specific implementation of the `JobParametersConverter`.
|
||||
|
||||
|
|
||||
jobRegistry
|
||||
|`org.springframework.batch.core.configuration.support.MapJobRegistry`|
|
||||
Used by the `SimpleJobOperator`.
|
||||
|
||||
|
|
||||
placeholderProperties
|
||||
|`org.springframework.beans.factory.config.PropertyPlaceholderConfigure`|
|
||||
Loads the properties file `batch-${ENVIRONMENT:hsql}.properties` to configure
|
||||
the properties mentioned above. ENVIRONMENT is a System property (defaults to hsql)
|
||||
that can be used to specify any of the supported databases Spring Batch currently
|
||||
supports.
|
||||
|
||||
|
||||
|===============
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
[NOTE]
|
||||
====
|
||||
None of the above beans are optional for executing JSR-352 based jobs. All may be overriden to
|
||||
provide customized functionality as needed.
|
||||
====
|
||||
|
||||
|
||||
[[dependencyInjection]]
|
||||
|
||||
|
||||
=== Dependency Injection
|
||||
|
||||
JSR-352 is based heavily on the Spring Batch programming model. As such, while not explicitly requiring a
|
||||
formal dependency injection implementation, DI of some kind implied. Spring Batch supports all three
|
||||
methods for loading batch artifacts defined by JSR-352:
|
||||
|
||||
|
||||
* Implementation Specific Loader - Spring Batch is built upon Spring and so supports Spring
|
||||
dependency injection within JSR-352 batch jobs.
|
||||
|
||||
|
||||
* Archive Loader - JSR-352 defines the existing of a batch.xml file that provides mappings between a
|
||||
logical name and a class name. This file must be found within the /META-INF/ directory if it is
|
||||
used.
|
||||
|
||||
|
||||
* Thread Context Class Loader - JSR-352 allows configurations to specify batch artifact
|
||||
implementations in their JSL by providing the fully qualified class name inline. Spring Batch
|
||||
supports this as well in JSR-352 configured jobs.
|
||||
|
||||
To use Spring dependency injection within a JSR-352 based batch job consists of configuring batch
|
||||
artifacts using a Spring application context as beans. Once the beans have been defined, a job can refer to
|
||||
them as it would any bean defined within the batch.xml.
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xsi:schemaLocation="http://www.springframework.org/schema/beans
|
||||
http://www.springframework.org/schema/beans/spring-beans.xsd
|
||||
http://xmlns.jcp.org/xml/ns/javaee
|
||||
http://xmlns.jcp.org/xml/ns/javaee/jobXML_1_0.xsd">
|
||||
|
||||
<!-- javax.batch.api.Batchlet implementation -->
|
||||
<bean id="fooBatchlet" class="io.spring.FooBatchlet">
|
||||
<property name="prop" value="bar"/>
|
||||
</bean>
|
||||
|
||||
<!-- Job is defined using the JSL schema provided in JSR-352 -->
|
||||
<job id="fooJob" xmlns="http://xmlns.jcp.org/xml/ns/javaee" version="1.0">
|
||||
<step id="step1">
|
||||
<batchlet ref="fooBatchlet"/>
|
||||
</step>
|
||||
</job>
|
||||
</beans>
|
||||
|
||||
----
|
||||
|
||||
The assembly of Spring contexts (imports, etc) works with JSR-352 jobs just as it would with any other
|
||||
Spring based application. The only difference with a JSR-352 based job is that the entry point for the
|
||||
context definition will be the job definition found in /META-INF/batch-jobs/.
|
||||
|
||||
To use the thread context class loader approach, all you need to do is provide the fully qualified class
|
||||
name as the ref. It is important to note that when using this approach or the batch.xml approach, the class
|
||||
referenced requires a no argument constructor which will be used to create the bean.
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<job id="fooJob" xmlns="http://xmlns.jcp.org/xml/ns/javaee" version="1.0">
|
||||
<step id="step1" >
|
||||
<batchlet ref="io.spring.FooBatchlet" />
|
||||
</step>
|
||||
</job>
|
||||
|
||||
----
|
||||
|
||||
[[jsrJobProperties]]
|
||||
|
||||
|
||||
=== Batch Properties
|
||||
|
||||
[[jsrPropertySupport]]
|
||||
|
||||
|
||||
==== Property Support
|
||||
|
||||
JSR-352 allows for properties to be defined at the Job, Step and batch artifact level by way of
|
||||
configuration in the JSL. Batch properties are configured at each level in the following way:
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<properties>
|
||||
<property name="propertyName1" value="propertyValue1"/>
|
||||
<property name="propertyName2" value="propertyValue2"/>
|
||||
</properties>
|
||||
----
|
||||
|
||||
|
||||
`Properties` may be configured on any batch artifact.
|
||||
|
||||
[[jsrBatchPropertyAnnotation]]
|
||||
|
||||
|
||||
==== @BatchProperty annotation
|
||||
|
||||
`Properties` are referenced in batch artifacts by annotating class fields with the
|
||||
`@BatchProperty` and `@Inject` annotations (both annotations
|
||||
are required by the spec). As defined by JSR-352, fields for properties must be String typed. Any type
|
||||
conversion is up to the implementing developer to perform.
|
||||
|
||||
An `javax.batch.api.chunk.ItemReader` artifact could be configured with a
|
||||
properties block such as the one described above and accessed as such:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public class MyItemReader extends AbstractItemReader {
|
||||
@Inject
|
||||
@BatchProperty
|
||||
private String propertyName1;
|
||||
|
||||
...
|
||||
}
|
||||
----
|
||||
|
||||
|
||||
The value of the field "propertyName1" will be "propertyValue1"
|
||||
|
||||
[[jsrPropertySubstitution]]
|
||||
|
||||
|
||||
==== Property Substitution
|
||||
|
||||
Property substitution is provided by way of operators and simple conditional expressions. The general
|
||||
usage is `#{operator['key']}`.
|
||||
|
||||
Supported operators:
|
||||
|
||||
* jobParameters - access job parameter values that the job was started/restarted with.
|
||||
|
||||
|
||||
* jobProperties - access properties configured at the job level of the JSL.
|
||||
|
||||
|
||||
* systemProperties - access named system properties.
|
||||
|
||||
|
||||
* partitionPlan - access named property from the partition plan of a partitioned step.
|
||||
|
||||
----
|
||||
#{jobParameters['unresolving.prop']}?:#{systemProperties['file.separator']}
|
||||
----
|
||||
|
||||
The left hand side of the assignment is the expected value, the right hand side is the default value. In
|
||||
this example, the result will resolve to a value of the system property file.separator as
|
||||
#{jobParameters['unresolving.prop']} is assumed to not be resolvable. If neither expressions can be
|
||||
resolved, an empty String will be returned. Multiple conditions can be used, which are separated by a
|
||||
';'.
|
||||
|
||||
|
||||
[[jsrProcessingModels]]
|
||||
|
||||
|
||||
=== Processing Models
|
||||
|
||||
JSR-352 provides the same two basic processing models that Spring Batch does:
|
||||
|
||||
|
||||
|
||||
* Item based processing - Using an `javax.batch.api.chunk.ItemReader`, an
|
||||
optional `javax.batch.api.chunk.ItemProcessor`, and an
|
||||
`javax.batch.api.chunk.ItemWriter`.
|
||||
|
||||
|
||||
* Task based processing - Using a `javax.batch.api.Batchlet`
|
||||
implementation. This processing model is the same as the
|
||||
`org.springframework.batch.core.step.tasklet.Tasklet` based processing
|
||||
currently available.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
==== Item based processing
|
||||
|
||||
Item based processing in this context is a chunk size being set by the number of items read by an
|
||||
`ItemReader`. To configure a step this way, specify the
|
||||
`item-count` (which defaults to 10) and optionally configure the
|
||||
`checkpoint-policy` as item (this is the default).
|
||||
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
...
|
||||
<step id="step1">
|
||||
<chunk checkpoint-policy="item" item-count="3">
|
||||
<reader ref="fooReader"/>
|
||||
<processor ref="fooProcessor"/>
|
||||
<writer ref="fooWriter"/>
|
||||
</chunk>
|
||||
</step>
|
||||
...
|
||||
----
|
||||
|
||||
|
||||
If item based checkpointing is chosen, an additional attribute `time-limit` is
|
||||
supported. This sets a time limit for how long the number of items specified has to be processed. If
|
||||
the timeout is reached, the chunk will complete with however many items have been read by then
|
||||
regardless of what the `item-count` is configured to be.
|
||||
|
||||
|
||||
|
||||
|
||||
==== Custom checkpointing
|
||||
|
||||
JSR-352 calls the process around the commit interval within a step "checkpointing". Item based
|
||||
checkpointing is one approach as mentioned above. However, this will not be robust enough in many
|
||||
cases. Because of this, the spec allows for the implementation of a custom checkpointing algorithm by
|
||||
implementing the `javax.batch.api.chunk.CheckpointAlgorithm` interface. This
|
||||
functionality is functionally the same as Spring Batch's custom completion policy. To use an
|
||||
implementation of `CheckpointAlgorithm`, configure your step with the custom
|
||||
`checkpoint-policy` as shown below where fooCheckpointer refers to an
|
||||
implementation of `CheckpointAlgorithm`.
|
||||
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
...
|
||||
<step id="step1">
|
||||
<chunk checkpoint-policy="custom">
|
||||
<checkpoint-algorithm ref="fooCheckpointer"/>
|
||||
<reader ref="fooReader"/>
|
||||
<processor ref="fooProcessor"/>
|
||||
<writer ref="fooWriter"/>
|
||||
</chunk>
|
||||
</step>
|
||||
...
|
||||
----
|
||||
|
||||
[[jsrRunningAJob]]
|
||||
|
||||
|
||||
=== Running a job
|
||||
|
||||
The entrance to executing a JSR-352 based job is through the
|
||||
`javax.batch.operations.JobOperator`. Spring Batch provides our own implementation to
|
||||
this interface (`org.springframework.batch.core.jsr.launch.JsrJobOperator`). This
|
||||
implementation is loaded via the `javax.batch.runtime.BatchRuntime`. Launching a
|
||||
JSR-352 based batch job is implemented as follows:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
|
||||
JobOperator jobOperator = BatchRuntime.getJobOperator();
|
||||
long jobExecutionId = jobOperator.start("fooJob", new Properties());
|
||||
|
||||
----
|
||||
|
||||
The above code does the following:
|
||||
|
||||
|
||||
|
||||
* Bootstraps a base `ApplicationContext` - In order to provide batch functionality, the framework
|
||||
needs some infrastructure bootstrapped. This occurs once per JVM. The components that are
|
||||
bootstrapped are similar to those provided by `@EnableBatchProcessing`.
|
||||
Specific details can be found in the javadoc for the `JsrJobOperator`.
|
||||
|
||||
|
||||
|
||||
* Loads an `ApplicationContext` for the job requested - In the example
|
||||
above, the framework will look in /META-INF/batch-jobs for a file named fooJob.xml and load a
|
||||
context that is a child of the shared context mentioned previously.
|
||||
|
||||
|
||||
* Launch the job - The job defined within the context will be executed asynchronously. The
|
||||
`JobExecution's` id will be returned.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
[NOTE]
|
||||
====
|
||||
All JSR-352 based batch jobs are executed asynchronously.
|
||||
====
|
||||
|
||||
|
||||
When `JobOperator#start` is called using `SimpleJobOperator`,
|
||||
Spring Batch determines if the call is an initial run or a retry of a previously executed run. Using the
|
||||
JSR-352 based `JobOperator#start(String jobXMLName, Properties jobParameters)`, the
|
||||
framework will always create a new JobInstance (JSR-352 job parameters are
|
||||
non-identifying). In order to restart a job, a call to
|
||||
`JobOperator#restart(long executionId, Properties restartParameters)` is required.
|
||||
|
||||
|
||||
[[jsrContexts]]
|
||||
|
||||
|
||||
=== Contexts
|
||||
|
||||
JSR-352 defines two context objects that are used to interact with the meta-data of a job or step from
|
||||
within a batch artifact: `javax.batch.runtime.context.JobContext` and
|
||||
`javax.batch.runtime.context.StepContext`. Both of these are available in any step
|
||||
level artifact (`Batchlet`, `ItemReader`, etc) with the
|
||||
`JobContext` being available to job level artifacts as well
|
||||
(JobListener for example).
|
||||
|
||||
To obtain a reference to the `JobContext` or `StepContext`
|
||||
within the current scope, simply use the `@Inject` annotation:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
@Inject
|
||||
JobContext jobContext;
|
||||
|
||||
----
|
||||
|
||||
|
||||
[NOTE]
|
||||
.@Autowire for JSR-352 contexts
|
||||
====
|
||||
Using Spring's @Autowire is not supported for the injection of these contexts.
|
||||
====
|
||||
|
||||
|
||||
In Spring Batch, the `JobContext` and `StepContext` wrap their
|
||||
corresponding execution objects (`JobExecution` and
|
||||
`StepExecution` respectively). Data stored via
|
||||
`StepContext#persistent#setPersistentUserData(Serializable data)` is stored in the
|
||||
Spring Batch `StepExecution#executionContext`.
|
||||
|
||||
[[jsrStepFlow]]
|
||||
|
||||
|
||||
=== Step Flow
|
||||
|
||||
Within a JSR-352 based job, the flow of steps works similarly as it does within Spring Batch.
|
||||
However, there are a few subtle differences:
|
||||
|
||||
|
||||
|
||||
* Decision's are steps - In a regular Spring Batch job, a decision is a state that does not
|
||||
have an independent `StepExecution` or any of the rights and
|
||||
responsibilities that go along with being a full step.. However, with JSR-352, a decision
|
||||
is a step just like any other and will behave just as any other steps (transactionality,
|
||||
it gets a `StepExecution`, etc). This means that they are treated the
|
||||
same as any other step on restarts as well.
|
||||
|
||||
|
||||
* `next` attribute and step transitions - In a regular job, these are
|
||||
allowed to appear together in the same step. JSR-352 allows them to both be used in the
|
||||
same step with the next attribute taking precedence in evaluation.
|
||||
|
||||
|
||||
* Transition element ordering - In a standard Spring Batch job, transition elements are
|
||||
sorted from most specific to least specific and evaluated in that order. JSR-352 jobs
|
||||
evaluate transition elements in the order they are specified in the XML.
|
||||
|
||||
|
||||
|
||||
|
||||
[[jsrScaling]]
|
||||
|
||||
|
||||
=== Scaling a JSR-352 batch job
|
||||
|
||||
Traditional Spring Batch jobs have four ways of scaling (the last two capable of being executed across
|
||||
multiple JVMs):
|
||||
|
||||
* Split - Running multiple steps in parallel.
|
||||
|
||||
|
||||
* Multiple threads - Executing a single step via multiple threads.
|
||||
|
||||
|
||||
* Partitioning - Dividing the data up for parallel processing (master/slave).
|
||||
|
||||
|
||||
* Remote Chunking - Executing the processor piece of logic remotely.
|
||||
|
||||
|
||||
|
||||
|
||||
JSR-352 provides two options for scaling batch jobs. Both options support only a single JVM:
|
||||
|
||||
* Split - Same as Spring Batch
|
||||
|
||||
|
||||
* Partitioning - Conceptually the same as Spring Batch however implemented slightly different.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
[[jsrPartitioning]]
|
||||
|
||||
|
||||
==== Partitioning
|
||||
|
||||
Conceptually, partitioning in JSR-352 is the same as it is in Spring Batch. Meta-data is provided
|
||||
to each slave to identify the input to be processed with the slaves reporting back to the master the
|
||||
results upon completion. However, there are some important differences:
|
||||
|
||||
* Partitioned `Batchlet` - This will run multiple instances of the
|
||||
configured `Batchlet` on multiple threads. Each instance will have
|
||||
it's own set of properties as provided by the JSL or the
|
||||
`PartitionPlan`
|
||||
|
||||
|
||||
* `PartitionPlan` - With Spring Batch's partitioning, an
|
||||
`ExecutionContext` is provided for each partition. With JSR-352, a
|
||||
single `javax.batch.api.partition.PartitionPlan` is provided with an
|
||||
array of `Properties` providing the meta-data for each partition.
|
||||
|
||||
|
||||
|
||||
* `PartitionMapper` - JSR-352 provides two ways to generate partition
|
||||
meta-data. One is via the JSL (partition properties). The second is via an implementation
|
||||
of the `javax.batch.api.partition.PartitionMapper` interface.
|
||||
Functionally, this interface is similar to the
|
||||
`org.springframework.batch.core.partition.support.Partitioner`
|
||||
interface provided by Spring Batch in that it provides a way to programmatically generate
|
||||
meta-data for partitioning.
|
||||
|
||||
|
||||
* `StepExecutions` - In Spring Batch, partitioned steps are run as
|
||||
master/slave. Within JSR-352, the same configuration occurs. However, the slave steps do
|
||||
not get official `StepExecutions`. Because of that, calls to
|
||||
`JsrJobOperator#getStepExecutions(long jobExecutionId)` will only
|
||||
return the `StepExecution` for the master.
|
||||
[NOTE]
|
||||
====
|
||||
The child `StepExecutions` still exist in the job repository and are available
|
||||
via the `JobExplorer` and Spring Batch Admin.
|
||||
|
||||
====
|
||||
|
||||
|
||||
* Compensating logic - Since Spring Batch implements the master/slave logic of
|
||||
partitioning using steps, `StepExecutionListeners` can be used to
|
||||
handle compensating logic if something goes wrong. However, since the slaves JSR-352
|
||||
provides a collection of other components for the ability to provide compensating logic when
|
||||
errors occur and to dynamically set the exit status. These components include the following:
|
||||
|
||||
|===============
|
||||
|__Artifact Interface__|__Description__
|
||||
|`javax.batch.api.partition.PartitionCollector`|Provides a way for slave steps to send information back to the
|
||||
master. There is one instance per slave thread.
|
||||
|`javax.batch.api.partition.PartitionAnalyzer`|End point that receives the information collected by the
|
||||
`PartitionCollector` as well as the resulting
|
||||
statuses from a completed partition.
|
||||
|`javax.batch.api.partition.PartitionReducer`|Provides the ability to provide compensating logic for a partitioned
|
||||
step.
|
||||
|
||||
|===============
|
||||
|
||||
|
||||
[[jsrTesting]]
|
||||
|
||||
=== Testing
|
||||
|
||||
Since all JSR-352 based jobs are executed asynchronously, it can be difficult to determine when a job has
|
||||
completed. To help with testing, Spring Batch provides the
|
||||
`org.springframework.batch.core.jsr.JsrTestUtils`. This utility class provides the
|
||||
ability to start a job and restart a job and wait for it to complete. Once the job completes, the
|
||||
associated `JobExecution` is returned.
|
||||
|
||||
3008
spring-batch-docs/asciidoc/readersAndWriters.adoc
Normal file
308
spring-batch-docs/asciidoc/repeat.adoc
Normal file
@@ -0,0 +1,308 @@
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
[[repeat]]
|
||||
|
||||
== Repeat
|
||||
|
||||
[[repeatTemplate]]
|
||||
|
||||
|
||||
=== RepeatTemplate
|
||||
|
||||
Batch processing is about repetitive actions - either as a simple
|
||||
optimization, or as part of a job. To strategize and generalize the
|
||||
repetition as well as to provide what amounts to an iterator framework,
|
||||
Spring Batch has the `RepeatOperations` interface.
|
||||
The `RepeatOperations` interface looks like
|
||||
this:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public interface RepeatOperations {
|
||||
|
||||
RepeatStatus iterate(RepeatCallback callback) throws RepeatException;
|
||||
|
||||
}
|
||||
----
|
||||
|
||||
The callback is a simple interface that allows you to insert
|
||||
some business logic to be repeated:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public interface RepeatCallback {
|
||||
|
||||
RepeatStatus doInIteration(RepeatContext context) throws Exception;
|
||||
|
||||
}
|
||||
----
|
||||
|
||||
The callback is executed repeatedly until the implementation
|
||||
decides that the iteration should end. The return value in these
|
||||
interfaces is an enumeration that can either be
|
||||
`RepeatStatus.CONTINUABLE` or
|
||||
`RepeatStatus.FINISHED`. A `RepeatStatus`
|
||||
conveys information to the caller of the repeat operations about whether
|
||||
there is any more work to do. Generally speaking, implementations of
|
||||
`RepeatOperations` should inspect the
|
||||
`RepeatStatus` and use it as part of the decision to
|
||||
end the iteration. Any callback that wishes to signal to the caller that
|
||||
there is no more work to do can return
|
||||
`RepeatStatus.FINISHED`.
|
||||
|
||||
The simplest general purpose implementation of
|
||||
`RepeatOperations` is
|
||||
`RepeatTemplate`. It could be used like this:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
RepeatTemplate template = new RepeatTemplate();
|
||||
|
||||
template.setCompletionPolicy(new FixedChunkSizeCompletionPolicy(2));
|
||||
|
||||
template.iterate(new RepeatCallback() {
|
||||
|
||||
public ExitStatus doInIteration(RepeatContext context) {
|
||||
// Do stuff in batch...
|
||||
return ExitStatus.CONTINUABLE;
|
||||
}
|
||||
|
||||
});
|
||||
----
|
||||
|
||||
In the example we return `RepeatStatus.CONTINUABLE` to
|
||||
show that there is more work to do. The callback can also return
|
||||
`ExitStatus.FINISHED` if it wants to signal to the caller that
|
||||
there is no more work to do. Some iterations can be terminated by
|
||||
considerations intrinsic to the work being done in the callback, others
|
||||
are effectively infinite loops as far as the callback is concerned and the
|
||||
completion decision is delegated to an external policy as in the case
|
||||
above.
|
||||
|
||||
[[repeatContext]]
|
||||
|
||||
|
||||
==== RepeatContext
|
||||
|
||||
The method parameter for the `RepeatCallback`
|
||||
is a `RepeatContext`. Many callbacks will simply
|
||||
ignore the context, but if necessary it can be used as an attribute bag
|
||||
to store transient data for the duration of the iteration. After the
|
||||
`iterate` method returns, the context will no
|
||||
longer exist.
|
||||
|
||||
A `RepeatContext` will have a parent context
|
||||
if there is a nested iteration in progress. The parent context is
|
||||
occasionally useful for storing data that need to be shared between
|
||||
calls to `iterate`. This is the case for instance
|
||||
if you want to count the number of occurrences of an event in the
|
||||
iteration and remember it across subsequent calls.
|
||||
|
||||
[[repeatStatus]]
|
||||
|
||||
|
||||
==== RepeatStatus
|
||||
|
||||
`RepeatStatus` is an enumeration used by
|
||||
Spring Batch to indicate whether processing has finished. These are
|
||||
possible `RepeatStatus` values:
|
||||
|
||||
.ExitStatus Properties
|
||||
|
||||
|===============
|
||||
|__Value__|__Description__
|
||||
|CONTINUABLE|There is more work to do.
|
||||
|FINISHED|No more repetitions should take place.
|
||||
|
||||
|===============
|
||||
|
||||
|
||||
`RepeatStatus` values can also be combined
|
||||
with a logical AND operation using the `and()`
|
||||
method in `RepeatStatus`. The effect of this is to
|
||||
do a logical AND on the continuable flag. In other words, if either
|
||||
status is `FINISHED`, then the result will be
|
||||
`FINISHED`.
|
||||
|
||||
[[completionPolicies]]
|
||||
|
||||
|
||||
=== Completion Policies
|
||||
|
||||
Inside a `RepeatTemplate` the termination of
|
||||
the loop in the `iterate` method is determined by a
|
||||
`CompletionPolicy` which is also a factory for the
|
||||
`RepeatContext`. The
|
||||
`RepeatTemplate` has the responsibility to use the
|
||||
current policy to create a `RepeatContext` and pass
|
||||
that in to the `RepeatCallback` at every stage in the
|
||||
iteration. After a callback completes its
|
||||
`doInIteration`, the
|
||||
`RepeatTemplate` has to make a call to the
|
||||
`CompletionPolicy` to ask it to update its state
|
||||
(which will be stored in the `RepeatContext`). Then
|
||||
it asks the policy if the iteration is complete.
|
||||
|
||||
Spring Batch provides some simple general purpose implementations of
|
||||
`CompletionPolicy`. The
|
||||
`SimpleCompletionPolicy` just allows an execution up
|
||||
to a fixed number of times (with `RepeatStatus.FINISHED`
|
||||
forcing early completion at any time).
|
||||
|
||||
Users might need to implement their own completion policies for more
|
||||
complicated decisions. For example, a batch processing window that
|
||||
prevents batch jobs from executing once the online systems are in use
|
||||
would require a custom policy.
|
||||
|
||||
[[repeatExceptionHandling]]
|
||||
|
||||
|
||||
=== Exception Handling
|
||||
|
||||
If there is an exception thrown inside a
|
||||
`RepeatCallback`, the
|
||||
`RepeatTemplate` consults an
|
||||
`ExceptionHandler` which can decide whether or not to
|
||||
re-throw the exception.
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public interface ExceptionHandler {
|
||||
|
||||
void handleException(RepeatContext context, Throwable throwable)
|
||||
throws RuntimeException;
|
||||
|
||||
}
|
||||
----
|
||||
|
||||
A common use case is to count the number of exceptions of a
|
||||
given type, and fail when a limit is reached. For this purpose Spring
|
||||
Batch provides the `SimpleLimitExceptionHandler` and
|
||||
slightly more flexible
|
||||
`RethrowOnThresholdExceptionHandler`. The
|
||||
`SimpleLimitExceptionHandler` has a limit property
|
||||
and an exception type that should be compared with the current exception -
|
||||
all subclasses of the provided type are also counted. Exceptions of the
|
||||
given type are ignored until the limit is reached, and then rethrown.
|
||||
Those of other types are always rethrown.
|
||||
|
||||
An important optional property of the
|
||||
`SimpleLimitExceptionHandler` is the boolean flag
|
||||
`useParent`. It is false by default, so the limit is only
|
||||
accounted for in the current `RepeatContext`. When
|
||||
set to true, the limit is kept across sibling contexts in a nested
|
||||
iteration (e.g. a set of chunks inside a step).
|
||||
|
||||
[[repeatListeners]]
|
||||
|
||||
|
||||
=== Listeners
|
||||
|
||||
Often it is useful to be able to receive additional callbacks for
|
||||
cross cutting concerns across a number of different iterations. For this
|
||||
purpose Spring Batch provides the `RepeatListeners`
|
||||
interface. The `RepeatTemplate` allows users to
|
||||
register `RepeatListeners`, and they will be given
|
||||
callbacks with the `RepeatContext` and
|
||||
`RepeatStatus` where available during the
|
||||
iteration.
|
||||
|
||||
The interface looks like this:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public interface RepeatListener {
|
||||
void before(RepeatContext context);
|
||||
void after(RepeatContext context, RepeatStatus result);
|
||||
void open(RepeatContext context);
|
||||
void onError(RepeatContext context, Throwable e);
|
||||
void close(RepeatContext context);
|
||||
}
|
||||
----
|
||||
|
||||
The `open` and `close` callbacks come before and after the entire
|
||||
iteration. `before`, `after`
|
||||
and `onError` apply to the individual
|
||||
`RepeatCallback` calls.
|
||||
|
||||
Note that when there is more than one listener, they are in a list,
|
||||
so there is an order. In this case `open` and
|
||||
`before` are called in the same order while
|
||||
`after`, `onError` and
|
||||
`close` are called in reverse order.
|
||||
|
||||
[[repeatParallelProcessing]]
|
||||
|
||||
|
||||
=== Parallel Processing
|
||||
|
||||
Implementations of `RepeatOperations` are not
|
||||
restricted to executing the callback sequentially. It is quite important
|
||||
that some implementations are able to execute their callbacks in parallel.
|
||||
To this end, Spring Batch provides the
|
||||
`TaskExecutorRepeatTemplate`, which uses the Spring
|
||||
`TaskExecutor` strategy to run the
|
||||
`RepeatCallback`. The default is to use a
|
||||
`SynchronousTaskExecutor`, which has the effect of
|
||||
executing the whole iteration in the same thread (the same as a normal
|
||||
`RepeatTemplate`).
|
||||
|
||||
[[declarativeIteration]]
|
||||
|
||||
|
||||
=== Declarative Iteration
|
||||
|
||||
Sometimes there is some business processing that you know you want
|
||||
to repeat every time it happens. The classic example of this is the
|
||||
optimization of a message pipeline - it is more efficient to process a
|
||||
batch of messages, if they are arriving frequently, than to bear the cost
|
||||
of a separate transaction for every message. Spring Batch provides an AOP
|
||||
interceptor that wraps a method call in a
|
||||
`RepeatOperations` for just this purpose. The
|
||||
`RepeatOperationsInterceptor` executes the
|
||||
intercepted method and repeats according to the
|
||||
`CompletionPolicy` in the provided
|
||||
`RepeatTemplate`.
|
||||
|
||||
Here is an example of declarative iteration using the Spring AOP
|
||||
namespace to repeat a service call to a method called
|
||||
processMessage (for more detail on how to
|
||||
configure AOP interceptors see the Spring User Guide):
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<aop:config>
|
||||
<aop:pointcut id="transactional"
|
||||
expression="execution(* com..*Service.processMessage(..))" />
|
||||
<aop:advisor pointcut-ref="transactional"
|
||||
advice-ref="retryAdvice" order="-1"/>
|
||||
</aop:config>
|
||||
|
||||
<bean id="retryAdvice" class="org.spr...RepeatOperationsInterceptor"/>
|
||||
----
|
||||
|
||||
The example above uses a default
|
||||
`RepeatTemplate` inside the interceptor. To change
|
||||
the policies, listeners etc. you only need to inject an instance of
|
||||
`RepeatTemplate` into the interceptor.
|
||||
|
||||
If the intercepted method returns `void` then the
|
||||
interceptor always returns `ExitStatus.CONTINUABLE` (so there is a danger of
|
||||
an infinite loop if the `CompletionPolicy` does not
|
||||
have a finite end point). Otherwise it returns
|
||||
`ExitStatus.CONTINUABLE` until the return value from the
|
||||
intercepted method is null, at which point it returns
|
||||
`ExitStatus.FINISHED`. So the business logic inside the target
|
||||
method can signal that there is no more work to do by returning
|
||||
`null`, or by throwing an exception that is re-thrown by the
|
||||
`ExceptionHandler` in the provided
|
||||
`RepeatTemplate`.
|
||||
|
||||
425
spring-batch-docs/asciidoc/retry.adoc
Normal file
@@ -0,0 +1,425 @@
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
[[retry]]
|
||||
|
||||
== Retry
|
||||
|
||||
[[retryTemplate]]
|
||||
|
||||
|
||||
=== RetryTemplate
|
||||
|
||||
|
||||
[NOTE]
|
||||
====
|
||||
The retry functionality was pulled out of Spring Batch as of 2.2.0.
|
||||
It is now part of a new library, Spring Retry.
|
||||
====
|
||||
|
||||
|
||||
To make processing more robust and less prone to failure, sometimes
|
||||
it helps to automatically retry a failed operation in case it might
|
||||
succeed on a subsequent attempt. Errors that are susceptible to this kind
|
||||
of treatment are transient in nature. For example a remote call to a web
|
||||
service or RMI service that fails because of a network glitch or a
|
||||
`DeadLockLoserException` in a database update may
|
||||
resolve themselves after a short wait. To automate the retry of such
|
||||
operations Spring Batch has the `RetryOperations`
|
||||
strategy. The `RetryOperations` interface looks like
|
||||
this:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public interface RetryOperations {
|
||||
|
||||
<T> T execute(RetryCallback<T> retryCallback) throws Exception;
|
||||
|
||||
<T> T execute(RetryCallback<T> retryCallback, RecoveryCallback<T> recoveryCallback)
|
||||
throws Exception;
|
||||
|
||||
<T> T execute(RetryCallback<T> retryCallback, RetryState retryState)
|
||||
throws Exception, ExhaustedRetryException;
|
||||
|
||||
<T> T execute(RetryCallback<T> retryCallback, RecoveryCallback<T> recoveryCallback,
|
||||
RetryState retryState) throws Exception;
|
||||
|
||||
}
|
||||
----
|
||||
|
||||
The basic callback is a simple interface that allows you to
|
||||
insert some business logic to be retried:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public interface RetryCallback<T> {
|
||||
|
||||
T doWithRetry(RetryContext context) throws Throwable;
|
||||
|
||||
}
|
||||
----
|
||||
|
||||
The callback is executed and if it fails (by throwing an
|
||||
`Exception`), it will be retried until either it is
|
||||
successful, or the implementation decides to abort. There are a number of
|
||||
overloaded `execute` methods in the
|
||||
`RetryOperations` interface dealing with various use
|
||||
cases for recovery when all retry attempts are exhausted, and also with
|
||||
retry state, which allows clients and implementations to store information
|
||||
between calls (more on this later).
|
||||
|
||||
The simplest general purpose implementation of
|
||||
`RetryOperations` is
|
||||
`RetryTemplate`. It could be used like this
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
RetryTemplate template = new RetryTemplate();
|
||||
|
||||
TimeoutRetryPolicy policy = new TimeoutRetryPolicy();
|
||||
policy.setTimeout(30000L);
|
||||
|
||||
template.setRetryPolicy(policy);
|
||||
|
||||
Foo result = template.execute(new RetryCallback<Foo>() {
|
||||
|
||||
public Foo doWithRetry(RetryContext context) {
|
||||
// Do stuff that might fail, e.g. webservice operation
|
||||
return result;
|
||||
}
|
||||
|
||||
});
|
||||
----
|
||||
|
||||
In the example we execute a web service call and return the result
|
||||
to the user. If that call fails then it is retried until a timeout is
|
||||
reached.
|
||||
|
||||
[[retryContext]]
|
||||
|
||||
|
||||
==== RetryContext
|
||||
|
||||
The method parameter for the `RetryCallback`
|
||||
is a `RetryContext`. Many callbacks will simply
|
||||
ignore the context, but if necessary it can be used as an attribute bag
|
||||
to store data for the duration of the iteration.
|
||||
|
||||
A `RetryContext` will have a parent context
|
||||
if there is a nested retry in progress in the same thread. The parent
|
||||
context is occasionally useful for storing data that need to be shared
|
||||
between calls to `execute`.
|
||||
|
||||
[[recoveryCallback]]
|
||||
|
||||
|
||||
==== RecoveryCallback
|
||||
|
||||
When a retry is exhausted the
|
||||
`RetryOperations` can pass control to a different
|
||||
callback, the `RecoveryCallback`. To use this
|
||||
feature clients just pass in the callbacks together to the same method,
|
||||
for example:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
Foo foo = template.execute(new RetryCallback<Foo>() {
|
||||
public Foo doWithRetry(RetryContext context) {
|
||||
// business logic here
|
||||
},
|
||||
new RecoveryCallback<Foo>() {
|
||||
Foo recover(RetryContext context) throws Exception {
|
||||
// recover logic here
|
||||
}
|
||||
});
|
||||
----
|
||||
|
||||
If the business logic does not succeed before the template
|
||||
decides to abort, then the client is given the chance to do some
|
||||
alternate processing through the recovery callback.
|
||||
|
||||
[[statelessRetry]]
|
||||
|
||||
|
||||
==== Stateless Retry
|
||||
|
||||
In the simplest case, a retry is just a while loop: the
|
||||
`RetryTemplate` can just keep trying until it
|
||||
either succeeds or fails. The `RetryContext`
|
||||
contains some state to determine whether to retry or abort, but this
|
||||
state is on the stack and there is no need to store it anywhere
|
||||
globally, so we call this stateless retry. The distinction between
|
||||
stateless and stateful retry is contained in the implementation of the
|
||||
`RetryPolicy` (the
|
||||
`RetryTemplate` can handle both). In a stateless
|
||||
retry, the callback is always executed in the same thread on retry as
|
||||
when it failed.
|
||||
|
||||
[[statefulRetry]]
|
||||
|
||||
|
||||
==== Stateful Retry
|
||||
|
||||
Where the failure has caused a transactional resource to become
|
||||
invalid, there are some special considerations. This does not apply to a
|
||||
simple remote call because there is no transactional resource (usually),
|
||||
but it does sometimes apply to a database update, especially when using
|
||||
Hibernate. In this case it only makes sense to rethrow the exception
|
||||
that called the failure immediately so that the transaction can roll
|
||||
back and we can start a new valid one.
|
||||
|
||||
In these cases a stateless retry is not good enough because the
|
||||
re-throw and roll back necessarily involve leaving the
|
||||
`RetryOperations.execute()` method and potentially losing the
|
||||
context that was on the stack. To avoid losing it we have to introduce a
|
||||
storage strategy to lift it off the stack and put it (at a minimum) in
|
||||
heap storage. For this purpose Spring Batch provides a storage strategy
|
||||
`RetryContextCache` which can be injected into the
|
||||
`RetryTemplate`. The default implementation of the
|
||||
`RetryContextCache` is in memory, using a simple
|
||||
`Map`. Advanced usage with multiple processes in a
|
||||
clustered environment might also consider implementing the
|
||||
`RetryContextCache` with a cluster cache of some
|
||||
sort (though, even in a clustered environment this might be
|
||||
overkill).
|
||||
|
||||
Part of the responsibility of the
|
||||
`RetryOperations` is to recognize the failed
|
||||
operations when they come back in a new execution (and usually wrapped
|
||||
in a new transaction). To facilitate this, Spring Batch provides the
|
||||
`RetryState` abstraction. This works in conjunction
|
||||
with a special `execute` methods in the
|
||||
`RetryOperations`.
|
||||
|
||||
The way the failed operations are recognized is by identifying the
|
||||
state across multiple invocations of the retry. To identify the state,
|
||||
the user can provide an `RetryState` object that is
|
||||
responsible for returning a unique key identifying the item. The
|
||||
identifier is used as a key in the
|
||||
`RetryContextCache`.
|
||||
|
||||
|
||||
[WARNING]
|
||||
====
|
||||
Be very careful with the implementation of
|
||||
`Object.equals()` and `Object.hashCode()` in the
|
||||
key returned by `RetryState`. The best advice is
|
||||
to use a business key to identify the items. In the case of a JMS
|
||||
message the message ID can be used.
|
||||
====
|
||||
|
||||
|
||||
When the retry is exhausted there is also the option to handle the
|
||||
failed item in a different way, instead of calling the
|
||||
`RetryCallback` (which is presumed now to be likely
|
||||
to fail). Just like in the stateless case, this option is provided by
|
||||
the `RecoveryCallback`, which can be provided by
|
||||
passing it in to the `execute` method of
|
||||
`RetryOperations`.
|
||||
|
||||
The decision to retry or not is actually delegated to a regular
|
||||
`RetryPolicy`, so the usual concerns about limits
|
||||
and timeouts can be injected there (see below).
|
||||
|
||||
[[retryPolicies]]
|
||||
|
||||
|
||||
=== Retry Policies
|
||||
|
||||
Inside a `RetryTemplate` the decision to retry
|
||||
or fail in the `execute` method is determined by a
|
||||
`RetryPolicy` which is also a factory for the
|
||||
`RetryContext`. The
|
||||
`RetryTemplate` has the responsibility to use the
|
||||
current policy to create a `RetryContext` and pass
|
||||
that in to the `RetryCallback` at every attempt.
|
||||
After a callback fails the `RetryTemplate` has to
|
||||
make a call to the `RetryPolicy` to ask it to update
|
||||
its state (which will be stored in the
|
||||
`RetryContext`), and then it asks the policy if
|
||||
another attempt can be made. If another attempt cannot be made (e.g. a
|
||||
limit is reached or a timeout is detected) then the policy is also
|
||||
responsible for handling the exhausted state. Simple implementations will
|
||||
just throw `RetryExhaustedException` which will cause
|
||||
any enclosing transaction to be rolled back. More sophisticated
|
||||
implementations might attempt to take some recovery action, in which case
|
||||
the transaction can remain intact.
|
||||
|
||||
|
||||
[TIP]
|
||||
====
|
||||
Failures are inherently either retryable or not - if the same
|
||||
exception is always going to be thrown from the business logic, it
|
||||
doesn't help to retry it. So don't retry on all exception types - try to
|
||||
focus on only those exceptions that you expect to be retryable. It's not
|
||||
usually harmful to the business logic to retry more aggressively, but
|
||||
it's wasteful because if a failure is deterministic there will be time
|
||||
spent retrying something that you know in advance is fatal.
|
||||
====
|
||||
|
||||
|
||||
Spring Batch provides some simple general purpose implementations of
|
||||
stateless `RetryPolicy`, for example a
|
||||
`SimpleRetryPolicy`, and the
|
||||
`TimeoutRetryPolicy` used in the example
|
||||
above.
|
||||
|
||||
The `SimpleRetryPolicy` just allows a retry on
|
||||
any of a named list of exception types, up to a fixed number of times. It
|
||||
also has a list of "fatal" exceptions that should never be retried, and
|
||||
this list overrides the retryable list so that it can be used to give
|
||||
finer control over the retry behavior:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
SimpleRetryPolicy policy = new SimpleRetryPolicy();
|
||||
// Set the max retry attempts
|
||||
policy.setMaxAttempts(5);
|
||||
// Retry on all exceptions (this is the default)
|
||||
policy.setRetryableExceptions(new Class[] {Exception.class});
|
||||
// ... but never retry IllegalStateException
|
||||
policy.setFatalExceptions(new Class[] {IllegalStateException.class});
|
||||
|
||||
// Use the policy...
|
||||
RetryTemplate template = new RetryTemplate();
|
||||
template.setRetryPolicy(policy);
|
||||
template.execute(new RetryCallback<Foo>() {
|
||||
public Foo doWithRetry(RetryContext context) {
|
||||
// business logic here
|
||||
}
|
||||
});
|
||||
----
|
||||
|
||||
There is also a more flexible implementation called
|
||||
`ExceptionClassifierRetryPolicy`, which allows the
|
||||
user to configure different retry behavior for an arbitrary set of
|
||||
exception types though the `ExceptionClassifier`
|
||||
abstraction. The policy works by calling on the classifier to convert an
|
||||
exception into a delegate `RetryPolicy`, so for
|
||||
example, one exception type can be retried more times before failure than
|
||||
another by mapping it to a different policy.
|
||||
|
||||
Users might need to implement their own retry policies for more
|
||||
customized decisions. For instance, if there is a well-known,
|
||||
solution-specific, classification of exceptions into retryable and not
|
||||
retryable.
|
||||
|
||||
[[backoffPolicies]]
|
||||
|
||||
|
||||
=== Backoff Policies
|
||||
|
||||
When retrying after a transient failure it often helps to wait a bit
|
||||
before trying again, because usually the failure is caused by some problem
|
||||
that will only be resolved by waiting. If a
|
||||
`RetryCallback` fails, the
|
||||
`RetryTemplate` can pause execution according to the
|
||||
`BackoffPolicy` in place.
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public interface BackoffPolicy {
|
||||
|
||||
BackOffContext start(RetryContext context);
|
||||
|
||||
void backOff(BackOffContext backOffContext)
|
||||
throws BackOffInterruptedException;
|
||||
|
||||
}
|
||||
----
|
||||
|
||||
A `BackoffPolicy` is free to implement
|
||||
the backOff in any way it chooses. The policies provided by Spring Batch
|
||||
out of the box all use `Object.wait()`. A common use case is to
|
||||
backoff with an exponentially increasing wait period, to avoid two retries
|
||||
getting into lock step and both failing - this is a lesson learned from
|
||||
the ethernet. For this purpose Spring Batch provides the
|
||||
`ExponentialBackoffPolicy`.
|
||||
|
||||
[[retryListeners]]
|
||||
|
||||
|
||||
=== Listeners
|
||||
|
||||
Often it is useful to be able to receive additional callbacks for
|
||||
cross cutting concerns across a number of different retries. For this
|
||||
purpose Spring Batch provides the `RetryListener`
|
||||
interface. The `RetryTemplate` allows users to
|
||||
register `RetryListeners`, and they will be given
|
||||
callbacks with the `RetryContext` and
|
||||
`Throwable` where available during the
|
||||
iteration.
|
||||
|
||||
The interface looks like this:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public interface RetryListener {
|
||||
|
||||
void open(RetryContext context, RetryCallback<T> callback);
|
||||
|
||||
void onError(RetryContext context, RetryCallback<T> callback, Throwable e);
|
||||
|
||||
void close(RetryContext context, RetryCallback<T> callback, Throwable e);
|
||||
}
|
||||
----
|
||||
|
||||
The `open` and
|
||||
`close` callbacks come before and after the entire
|
||||
retry in the simplest case and `onError` applies to
|
||||
the individual `RetryCallback` calls. The
|
||||
`close` method might also receive a
|
||||
`Throwable`; if there has been an error it is the
|
||||
last one thrown by the `RetryCallback`.
|
||||
|
||||
Note that when there is more than one listener, they are in a list,
|
||||
so there is an order. In this case `open` will be
|
||||
called in the same order while `onError` and
|
||||
`close` will be called in reverse order.
|
||||
|
||||
[[declarativeRetry]]
|
||||
|
||||
|
||||
=== Declarative Retry
|
||||
|
||||
Sometimes there is some business processing that you know you want
|
||||
to retry every time it happens. The classic example of this is the remote
|
||||
service call. Spring Batch provides an AOP interceptor that wraps a method
|
||||
call in a `RetryOperations` for just this purpose.
|
||||
The `RetryOperationsInterceptor` executes the
|
||||
intercepted method and retries on failure according to the
|
||||
`RetryPolicy` in the provided
|
||||
`RepeatTemplate`.
|
||||
|
||||
Here is an example of declarative iteration using the Spring AOP
|
||||
namespace to repeat a service call to a method called
|
||||
`remoteCall` (for more detail on how to configure
|
||||
AOP interceptors see the Spring User Guide):
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<aop:config>
|
||||
<aop:pointcut id="transactional"
|
||||
expression="execution(* com..*Service.remoteCall(..))" />
|
||||
<aop:advisor pointcut-ref="transactional"
|
||||
advice-ref="retryAdvice" order="-1"/>
|
||||
</aop:config>
|
||||
|
||||
<bean id="retryAdvice"
|
||||
class="org.springframework.batch.retry.interceptor.RetryOperationsInterceptor"/>
|
||||
----
|
||||
|
||||
The example above uses a default
|
||||
`RetryTemplate` inside the interceptor. To change the
|
||||
policies or listeners, you only need to inject an instance of
|
||||
`RetryTemplate` into the interceptor.
|
||||
|
||||
401
spring-batch-docs/asciidoc/scalability.adoc
Normal file
@@ -0,0 +1,401 @@
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
[[scalability]]
|
||||
|
||||
== Scaling and Parallel Processing
|
||||
|
||||
Many batch processing problems can be solved with single threaded,
|
||||
single process jobs, so it is always a good idea to properly check if that
|
||||
meets your needs before thinking about more complex implementations. Measure
|
||||
the performance of a realistic job and see if the simplest implementation
|
||||
meets your needs first: you can read and write a file of several hundred
|
||||
megabytes in well under a minute, even with standard hardware.
|
||||
|
||||
When you are ready to start implementing a job with some parallel
|
||||
processing, Spring Batch offers a range of options, which are described in
|
||||
this chapter, although some features are covered elsewhere. At a high level
|
||||
there are two modes of parallel processing: single process, multi-threaded;
|
||||
and multi-process. These break down into categories as well, as
|
||||
follows:
|
||||
|
||||
|
||||
* Multi-threaded Step (single process)
|
||||
|
||||
|
||||
* Parallel Steps (single process)
|
||||
|
||||
|
||||
* Remote Chunking of Step (multi process)
|
||||
|
||||
|
||||
* Partitioning a Step (single or multi process)
|
||||
|
||||
Next we review the single-process options first, and then the
|
||||
multi-process options.
|
||||
|
||||
[[multithreadedStep]]
|
||||
|
||||
|
||||
=== Multi-threaded Step
|
||||
|
||||
The simplest way to start parallel processing is to add a
|
||||
`TaskExecutor` to your Step configuration, e.g. as an
|
||||
attribute of the `tasklet`:
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<step id="loading">
|
||||
<tasklet task-executor="taskExecutor">...</tasklet>
|
||||
</step>
|
||||
----
|
||||
|
||||
In this example the taskExecutor is a reference to another bean
|
||||
definition, implementing the `TaskExecutor`
|
||||
interface. `TaskExecutor` is a standard Spring
|
||||
interface, so consult the Spring User Guide for details of available
|
||||
implementations. The simplest multi-threaded
|
||||
`TaskExecutor` is a
|
||||
`SimpleAsyncTaskExecutor`.
|
||||
|
||||
The result of the above configuration will be that the Step
|
||||
executes by reading, processing and writing each chunk of items
|
||||
(each commit interval) in a separate thread of execution. Note
|
||||
that this means there is no fixed order for the items to be
|
||||
processed, and a chunk might contain items that are
|
||||
non-consecutive compared to the single-threaded case. In addition
|
||||
to any limits placed by the task executor (e.g. if it is backed by
|
||||
a thread pool), there is a throttle limit in the tasklet
|
||||
configuration which defaults to 4. You may need to increase this
|
||||
to ensure that a thread pool is fully utilised, e.g.
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<step id="loading"> <tasklet
|
||||
task-executor="taskExecutor"
|
||||
throttle-limit="20">...</tasklet>
|
||||
</step>
|
||||
----
|
||||
|
||||
Note also that there may be limits placed on concurrency by
|
||||
any pooled resources used in your step, such as
|
||||
a `DataSource`. Be sure to make the pool in
|
||||
those resources at least as large as the desired number of
|
||||
concurrent threads in the step.
|
||||
|
||||
There are some practical limitations of using multi-threaded Steps
|
||||
for some common Batch use cases. Many participants in a Step (e.g. readers
|
||||
and writers) are stateful, and if the state is not segregated by thread,
|
||||
then those components are not usable in a multi-threaded Step. In
|
||||
particular most of the off-the-shelf readers and writers from Spring Batch
|
||||
are not designed for multi-threaded use. It is, however, possible to work
|
||||
with stateless or thread safe readers and writers, and there is a sample
|
||||
(parallelJob) in the Spring Batch Samples that show the use of a process
|
||||
indicator (see <<readersAndWriters.adoc#process-indicator,Preventing State Persistence>>) to keep
|
||||
track of items that have been processed in a database input table.
|
||||
|
||||
Spring Batch provides some implementations of
|
||||
ItemWriter and
|
||||
`ItemReader`. Usually they say in the
|
||||
Javadocs if they are thread safe or not, or what you have to do to
|
||||
avoid problems in a concurrent environment. If there is no
|
||||
information in Javadocs, you can check the implementation to see
|
||||
if there is any state. If a reader is not thread safe, it may
|
||||
still be efficient to use it in your own synchronizing delegator.
|
||||
You can synchronize the call to `read()` and as
|
||||
long as the processing and writing is the most expensive part of
|
||||
the chunk your step may still complete much faster than in a
|
||||
single threaded configuration.
|
||||
|
||||
|
||||
[[scalabilityParallelSteps]]
|
||||
|
||||
|
||||
=== Parallel Steps
|
||||
|
||||
As long as the application logic that needs to be parallelized can
|
||||
be split into distinct responsibilities, and assigned to individual steps
|
||||
then it can be parallelized in a single process. Parallel Step execution
|
||||
is easy to configure and use, for example, to execute steps
|
||||
`(step1,step2)` in parallel with
|
||||
`step3`, you could configure a flow like this:
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<job id="job1">
|
||||
<split id="split1" task-executor="taskExecutor" next="step4">
|
||||
<flow>
|
||||
<step id="step1" parent="s1" next="step2"/>
|
||||
<step id="step2" parent="s2"/>
|
||||
</flow>
|
||||
<flow>
|
||||
<step id="step3" parent="s3"/>
|
||||
</flow>
|
||||
</split>
|
||||
<step id="step4" parent="s4"/>
|
||||
</job>
|
||||
|
||||
<beans:bean id="taskExecutor" class="org.spr...SimpleAsyncTaskExecutor"/>
|
||||
----
|
||||
|
||||
The configurable "task-executor" attribute is used to specify which
|
||||
`TaskExecutor` implementation should be used to execute the individual
|
||||
flows. The default is `SyncTaskExecutor`, but an
|
||||
asynchronous `TaskExecutor` is required to run the steps in parallel. Note
|
||||
that the job will ensure that every flow in the split completes before
|
||||
aggregating the exit statuses and transitioning.
|
||||
|
||||
See the section on <<step.adoc#split-flows,Split Flows>> for more
|
||||
detail.
|
||||
|
||||
[[remoteChunking]]
|
||||
|
||||
|
||||
=== Remote Chunking
|
||||
|
||||
In Remote Chunking the Step processing is split across multiple
|
||||
processes, communicating with each other through some middleware. Here is
|
||||
a picture of the pattern in action:
|
||||
|
||||
.Remote Chunking
|
||||
image::{batch-asciidoc}images/remote-chunking.png[Remote Chunking, scaledwidth="60%"]
|
||||
|
||||
The Master component is a single process, and the Slaves are
|
||||
multiple remote processes. Clearly this pattern works best if the Master
|
||||
is not a bottleneck, so the processing must be more expensive than the
|
||||
reading of items (this is often the case in practice).
|
||||
|
||||
The Master is just an implementation of a Spring Batch
|
||||
Step, with the `ItemWriter` replaced with a generic
|
||||
version that knows how to send chunks of items to the middleware as
|
||||
messages. The Slaves are standard listeners for whatever middleware is
|
||||
being used (e.g. with JMS they would be
|
||||
`MesssageListeners`), and their role is to process
|
||||
the chunks of items using a standard `ItemWriter` or
|
||||
`ItemProcessor` plus
|
||||
`ItemWriter`, through the
|
||||
`ChunkProcessor` interface. One of the advantages of
|
||||
using this pattern is that the reader, processor and writer components are
|
||||
off-the-shelf (the same as would be used for a local execution of the
|
||||
step). The items are divided up dynamically and work is shared through the
|
||||
middleware, so if the listeners are all eager consumers, then load
|
||||
balancing is automatic.
|
||||
|
||||
The middleware has to be durable, with guaranteed delivery and
|
||||
single consumer for each message. JMS is the obvious candidate, but other
|
||||
options exist in the grid computing and shared memory product space (e.g.
|
||||
Java Spaces).
|
||||
|
||||
[[partitioning]]
|
||||
|
||||
|
||||
=== Partitioning
|
||||
|
||||
Spring Batch also provides an SPI for partitioning a `Step` execution
|
||||
and executing it remotely. In this case the remote participants are simply
|
||||
`Step` instances that could just as easily have been configured and used for
|
||||
local processing. Here is a picture of the pattern in action:
|
||||
|
||||
.Partitioning
|
||||
image::{batch-asciidoc}images/partitioning-overview.png[Partitioning Overview, scaledwidth="60%"]
|
||||
|
||||
The `Job` is executing on the left hand side as a sequence of `Steps`,
|
||||
and one of the `Steps` is labelled as a Master. The Slaves in this picture
|
||||
are all identical instances of a `Step`, which could in fact take the place
|
||||
of the Master resulting in the same outcome for the `Job`. The Slaves are
|
||||
typically going to be remote services, but could also be local threads of
|
||||
execution. The messages sent by the Master to the Slaves in this pattern
|
||||
do not need to be durable, or have guaranteed delivery: Spring Batch
|
||||
meta-data in the JobRepository will ensure that
|
||||
each Slave is executed once and only once for each `Job` execution.
|
||||
|
||||
The SPI in Spring Batch consists of a special implementation of `Step`
|
||||
(the `PartitionStep`), and two strategy interfaces
|
||||
that need to be implemented for the specific environment. The strategy
|
||||
interfaces are `PartitionHandler` and
|
||||
`StepExecutionSplitter`, and their role is show in
|
||||
the sequence diagram below:
|
||||
|
||||
.Partitioning SPI
|
||||
image::{batch-asciidoc}images/partitioning-spi.png[Partitioning SPI, scaledwidth="60%"]
|
||||
|
||||
The `Step` on the right in this case is the "remote" Slave, so
|
||||
potentially there are many objects and or processes playing this role, and
|
||||
the `PartitionStep` is shown driving the execution. The `PartitionStep`
|
||||
configuration looks like this:
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<step id="step1.master">
|
||||
<partition step="step1" partitioner="partitioner">
|
||||
<handler grid-size="10" task-executor="taskExecutor"/>
|
||||
</partition>
|
||||
</step>
|
||||
----
|
||||
|
||||
Similar to the multi-threaded step's throttle-limit
|
||||
attribute, the grid-size attribute prevents the task executor from
|
||||
being saturated with requests from a single step.
|
||||
|
||||
There is a simple example which can be copied and extended in the
|
||||
unit test suite for Spring Batch Samples (see
|
||||
`*PartitionJob.xml` configuration).
|
||||
|
||||
Spring Batch creates step executions for the partitions called
|
||||
"step1:partition0", etc., so many people prefer to call the master step
|
||||
"step1:master" for consistency. With Spring 3.0 you can do this using an
|
||||
alias for the step (specifying the `name` attribute
|
||||
instead of the `id`).
|
||||
|
||||
[[partitionHandler]]
|
||||
|
||||
|
||||
==== PartitionHandler
|
||||
|
||||
The `PartitionHandler` is the component that
|
||||
knows about the fabric of the remoting or grid environment. It is able
|
||||
to send `StepExecution` requests to the remote
|
||||
`Steps`, wrapped in some fabric-specific format, like a DTO. It does not
|
||||
have to know how to split up the input data, or how to aggregate the
|
||||
result of multiple `Step` executions. Generally speaking it probably also
|
||||
doesn't need to know about resilience or failover, since those are
|
||||
features of the fabric in many cases, and anyway Spring Batch always
|
||||
provides restartability independent of the fabric: a failed `Job` can
|
||||
always be restarted and only the failed `Steps` will be
|
||||
re-executed.
|
||||
|
||||
The `PartitionHandler` interface can have
|
||||
specialized implementations for a variety of fabric types: e.g. simple
|
||||
RMI remoting, EJB remoting, custom web service, JMS, Java Spaces, shared
|
||||
memory grids (like Terracotta or Coherence), grid execution fabrics
|
||||
(like GridGain). Spring Batch does not contain implementations for any
|
||||
proprietary grid or remoting fabrics.
|
||||
|
||||
Spring Batch does however provide a useful implementation of
|
||||
`PartitionHandler` that executes `Steps` locally in
|
||||
separate threads of execution, using the
|
||||
`TaskExecutor` strategy from Spring. The
|
||||
implementation is called
|
||||
`TaskExecutorPartitionHandler`, and it is the
|
||||
default for a step configured with the XML namespace as above. It can
|
||||
also be configured explicitly like this:
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<step id="step1.master">
|
||||
<partition step="step1" handler="handler"/>
|
||||
</step>
|
||||
|
||||
<bean class="org.spr...TaskExecutorPartitionHandler">
|
||||
<property name="taskExecutor" ref="taskExecutor"/>
|
||||
<property name="step" ref="step1" />
|
||||
<property name="gridSize" value="10" />
|
||||
</bean>
|
||||
----
|
||||
|
||||
The `gridSize` determines the number of separate
|
||||
step executions to create, so it can be matched to the size of the
|
||||
thread pool in the `TaskExecutor`, or else it can
|
||||
be set to be larger than the number of threads available, in which case
|
||||
the blocks of work are smaller.
|
||||
|
||||
The `TaskExecutorPartitionHandler` is quite
|
||||
useful for IO intensive `Steps`, like copying large numbers of files or
|
||||
replicating filesystems into content management systems. It can also be
|
||||
used for remote execution by providing a `Step` implementation that is a
|
||||
proxy for a remote invocation (e.g. using Spring Remoting).
|
||||
|
||||
[[stepExecutionSplitter]]
|
||||
|
||||
|
||||
==== Partitioner
|
||||
|
||||
The `Partitioner` has a simpler responsibility: to generate
|
||||
execution contexts as input parameters for new step executions only (no
|
||||
need to worry about restarts). It has a single method:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public interface Partitioner {
|
||||
Map<String, ExecutionContext> partition(int gridSize);
|
||||
}
|
||||
----
|
||||
|
||||
The return value from this method associates a unique name for
|
||||
each step execution (the `String`), with input
|
||||
parameters in the form of an `ExecutionContext`.
|
||||
The names show up later in the Batch meta data as the step name in the
|
||||
partitioned `StepExecutions`. The
|
||||
`ExecutionContext` is just a bag of name-value
|
||||
pairs, so it might contain a range of primary keys, or line numbers, or
|
||||
the location of an input file. The remote `Step`
|
||||
then normally binds to the context input using `#{...}`
|
||||
placeholders (late binding in step scope), as illustrated in the next
|
||||
section.
|
||||
|
||||
The names of the step executions (the keys in the
|
||||
`Map` returned by
|
||||
`Partitioner`) need to be unique amongst the step
|
||||
executions of a `Job`, but do not have any other specific requirements.
|
||||
The easiest way to do this, and to make the names meaningful for users,
|
||||
is to use a prefix+suffix naming convention, where the prefix is the
|
||||
name of the step that is being executed (which itself is unique in the
|
||||
`Job`), and the suffix is just a counter. There is
|
||||
a `SimplePartitioner` in the framework that uses
|
||||
this convention.
|
||||
|
||||
An optional interface
|
||||
`PartitioneNameProvider` can be used to
|
||||
provide the partition names separately from the partitions
|
||||
themselves. If a Partitioner implements
|
||||
this interface then on a restart only the names will be queried.
|
||||
If partitioning is expensive this can be a useful optimisation.
|
||||
Obviously the names provided by the
|
||||
`PartitioneNameProvider` must match those
|
||||
provided by the `Partitioner`.
|
||||
|
||||
[[bindingInputDataToSteps]]
|
||||
|
||||
|
||||
==== Binding Input Data to Steps
|
||||
|
||||
It is very efficient for the steps that are executed by the
|
||||
`PartitionHandler` to have identical configuration, and for their input
|
||||
parameters to be bound at runtime from the `ExecutionContext`. This is
|
||||
easy to do with the StepScope feature of Spring Batch (covered in more
|
||||
detail in the section on <<step.adoc#late-binding,Late Binding>>). For example
|
||||
if the `Partitioner` creates
|
||||
`ExecutionContext` instances with an attribute key
|
||||
`fileName`, pointing to a different file (or
|
||||
directory) for each step invocation, the
|
||||
`Partitioner` output might look like this:
|
||||
|
||||
.Example step execution name to execution context provided by `Partitioner` targeting directory processing
|
||||
|
||||
|===============
|
||||
|__Step Execution Name (key)__|__ExecutionContext (value)__
|
||||
|filecopy:partition0|fileName=/home/data/one
|
||||
|filecopy:partition1|fileName=/home/data/two
|
||||
|filecopy:partition2|fileName=/home/data/three
|
||||
|===============
|
||||
|
||||
|
||||
Then the file name can be bound to a step using late binding to
|
||||
the execution context:
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<bean id="itemReader" scope="step"
|
||||
class="org.spr...MultiResourceItemReader">
|
||||
<property name="resource" value="#{stepExecutionContext[fileName]}/*"/>
|
||||
</bean>
|
||||
----
|
||||
|
||||
578
spring-batch-docs/asciidoc/schema-appendix.adoc
Normal file
@@ -0,0 +1,578 @@
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
[[metaDataSchema]]
|
||||
|
||||
[appendix]
|
||||
== Meta-Data Schema
|
||||
|
||||
[[metaDataSchemaOverview]]
|
||||
|
||||
|
||||
=== Overview
|
||||
|
||||
The Spring Batch Meta-Data tables very closely match the Domain
|
||||
objects that represent them in Java. For example,
|
||||
`JobInstance`, `JobExecution`,
|
||||
`JobParameters`, and
|
||||
`StepExecution` map to BATCH_JOB_INSTANCE,
|
||||
BATCH_JOB_EXECUTION, BATCH_JOB_EXECUTION_PARAMS, and BATCH_STEP_EXECUTION,
|
||||
respectively. `ExecutionContext` maps to both
|
||||
BATCH_JOB_EXECUTION_CONTEXT and BATCH_STEP_EXECUTION_CONTEXT. The
|
||||
`JobRepository` is responsible for saving and storing
|
||||
each Java object into its correct table. The following appendix describes
|
||||
the meta-data tables in detail, along with many of the design decisions
|
||||
that were made when creating them. When viewing the various table creation
|
||||
statements below, it is important to realize that the data types used are
|
||||
as generic as possible. Spring Batch provides many schemas as examples,
|
||||
which all have varying data types due to variations in individual database
|
||||
vendors' handling of data types. Below is an ERD model of all 6 tables and
|
||||
their relationships to one another:
|
||||
|
||||
.Spring Batch Meta-Data ERD
|
||||
image::{batch-asciidoc}images/meta-data-erd.png[Spring Batch Meta-Data ERD, scaledwidth="60%"]
|
||||
|
||||
|
||||
[[exampleDDLScripts]]
|
||||
|
||||
|
||||
==== Example DDL Scripts
|
||||
|
||||
The Spring Batch Core JAR file contains example
|
||||
scripts to create the relational tables for a number of database
|
||||
platforms (which are in turn auto-detected by the job repository factory
|
||||
bean or namespace equivalent). These scripts can be used as is, or
|
||||
modified with additional indexes and constraints as desired. The file
|
||||
names are in the form `schema-\*.sql`, where "*" is the
|
||||
short name of the target database platform. The scripts are in
|
||||
the package `org.springframework.batch.core`.
|
||||
|
||||
[[metaDataVersion]]
|
||||
|
||||
|
||||
==== Version
|
||||
|
||||
Many of the database tables discussed in this appendix contain a
|
||||
version column. This column is important because Spring Batch employs an
|
||||
optimistic locking strategy when dealing with updates to the database.
|
||||
This means that each time a record is 'touched' (updated) the value in
|
||||
the version column is incremented by one. When the repository goes back
|
||||
to try and save the value, if the version number has change it will
|
||||
throw `OptimisticLockingFailureException`,
|
||||
indicating there has been an error with concurrent access. This check is
|
||||
necessary since, even though different batch jobs may be running in
|
||||
different machines, they are all using the same database tables.
|
||||
|
||||
[[metaDataIdentity]]
|
||||
|
||||
|
||||
==== Identity
|
||||
|
||||
BATCH_JOB_INSTANCE, BATCH_JOB_EXECUTION, and BATCH_STEP_EXECUTION
|
||||
each contain columns ending in _ID. These fields act as primary keys for
|
||||
their respective tables. However, they are not database generated keys,
|
||||
but rather they are generated by separate sequences. This is necessary
|
||||
because after inserting one of the domain objects into the database, the
|
||||
key it is given needs to be set on the actual object so that they can be
|
||||
uniquely identified in Java. Newer database drivers (Jdbc 3.0 and up)
|
||||
support this feature with database generated keys, but rather than
|
||||
requiring it, sequences were used. Each variation of the schema will
|
||||
contain some form of the following:
|
||||
|
||||
|
||||
[source, sql]
|
||||
----
|
||||
CREATE SEQUENCE BATCH_STEP_EXECUTION_SEQ;
|
||||
CREATE SEQUENCE BATCH_JOB_EXECUTION_SEQ;
|
||||
CREATE SEQUENCE BATCH_JOB_SEQ;
|
||||
----
|
||||
|
||||
Many database vendors don't support sequences. In these cases,
|
||||
work-arounds are used, such as the following for MySQL:
|
||||
|
||||
|
||||
[source, sql]
|
||||
----
|
||||
CREATE TABLE BATCH_STEP_EXECUTION_SEQ (ID BIGINT NOT NULL) type=InnoDB;
|
||||
INSERT INTO BATCH_STEP_EXECUTION_SEQ values(0);
|
||||
CREATE TABLE BATCH_JOB_EXECUTION_SEQ (ID BIGINT NOT NULL) type=InnoDB;
|
||||
INSERT INTO BATCH_JOB_EXECUTION_SEQ values(0);
|
||||
CREATE TABLE BATCH_JOB_SEQ (ID BIGINT NOT NULL) type=InnoDB;
|
||||
INSERT INTO BATCH_JOB_SEQ values(0);
|
||||
----
|
||||
|
||||
In the above case, a table is used in place of each sequence. The
|
||||
Spring core class `MySQLMaxValueIncrementer` will
|
||||
then increment the one column in this sequence in order to give similar
|
||||
functionality.
|
||||
|
||||
[[metaDataBatchJobInstance]]
|
||||
|
||||
|
||||
=== BATCH_JOB_INSTANCE
|
||||
|
||||
The BATCH_JOB_INSTANCE table holds all information relevant to a
|
||||
`JobInstance`, and serves as the top of the overall
|
||||
hierarchy. The following generic DDL statement is used to create
|
||||
it:
|
||||
|
||||
|
||||
[source, sql]
|
||||
----
|
||||
CREATE TABLE BATCH_JOB_INSTANCE (
|
||||
JOB_INSTANCE_ID BIGINT PRIMARY KEY ,
|
||||
VERSION BIGINT,
|
||||
JOB_NAME VARCHAR(100) NOT NULL ,
|
||||
JOB_KEY VARCHAR(2500)
|
||||
);
|
||||
----
|
||||
|
||||
Below are descriptions of each column in the table:
|
||||
|
||||
|
||||
* JOB_INSTANCE_ID: The unique id that will identify the instance,
|
||||
which is also the primary key. The value of this column should be
|
||||
obtainable by calling the `getId` method on
|
||||
`JobInstance`.
|
||||
|
||||
|
||||
* VERSION: See above section.
|
||||
|
||||
|
||||
* JOB_NAME: Name of the job obtained from the
|
||||
`Job` object. Because it is required to identify
|
||||
the instance, it must not be null.
|
||||
|
||||
|
||||
* JOB_KEY: A serialization of the
|
||||
`JobParameters` that uniquely identifies separate
|
||||
instances of the same job from one another.
|
||||
(`JobInstances` with the same job name must have
|
||||
different `JobParameters`, and thus, different
|
||||
JOB_KEY values).
|
||||
|
||||
[[metaDataBatchJobParams]]
|
||||
|
||||
|
||||
=== BATCH_JOB_EXECUTION_PARAMS
|
||||
|
||||
The BATCH_JOB_EXECUTION_PARAMS table holds all information relevant to the
|
||||
`JobParameters` object. It contains 0 or more
|
||||
key/value pairs passed to a `Job` and serve as a record of the parameters
|
||||
a job was run with. For each parameter that contributes to the generation of a job's identity,
|
||||
the IDENTIFYING flag is set to true. It should be noted that the table has been
|
||||
denormalized. Rather than creating a separate table for each type, there
|
||||
is one table with a column indicating the type:
|
||||
|
||||
|
||||
[source, sql]
|
||||
----
|
||||
CREATE TABLE BATCH_JOB_EXECUTION_PARAMS (
|
||||
JOB_EXECUTION_ID BIGINT NOT NULL ,
|
||||
TYPE_CD VARCHAR(6) NOT NULL ,
|
||||
KEY_NAME VARCHAR(100) NOT NULL ,
|
||||
STRING_VAL VARCHAR(250) ,
|
||||
DATE_VAL DATETIME DEFAULT NULL ,
|
||||
LONG_VAL BIGINT ,
|
||||
DOUBLE_VAL DOUBLE PRECISION ,
|
||||
IDENTIFYING CHAR(1) NOT NULL ,
|
||||
constraint JOB_EXEC_PARAMS_FK foreign key (JOB_EXECUTION_ID)
|
||||
references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID)
|
||||
);
|
||||
----
|
||||
|
||||
Below are descriptions for each column:
|
||||
|
||||
|
||||
* JOB_EXECUTION_ID: Foreign Key from the BATCH_JOB_EXECUTION table
|
||||
that indicates the job execution the parameter entry belongs to. It
|
||||
should be noted that multiple rows (i.e key/value pairs) may exist for
|
||||
each execution.
|
||||
|
||||
|
||||
* TYPE_CD: String representation of the type of value stored,
|
||||
which can be either a string, date, long, or double. Because the type
|
||||
must be known, it cannot be null.
|
||||
|
||||
|
||||
* KEY_NAME: The parameter key.
|
||||
|
||||
|
||||
* STRING_VAL: Parameter value, if the type is string.
|
||||
|
||||
|
||||
* DATE_VAL: Parameter value, if the type is date.
|
||||
|
||||
|
||||
* LONG_VAL: Parameter value, if the type is a long.
|
||||
|
||||
|
||||
* DOUBLE_VAL: Parameter value, if the type is double.
|
||||
|
||||
|
||||
* IDENTIFYING: Flag indicating if the parameter contributed to the identity of the related `JobInstance`.
|
||||
|
||||
It is worth noting that there is no primary key for this table. This
|
||||
is simply because the framework has no use for one, and thus doesn't
|
||||
require it. If a user so chooses, one may be added with a database
|
||||
generated key, without causing any issues to the framework itself.
|
||||
|
||||
[[metaDataBatchJobExecution]]
|
||||
|
||||
|
||||
=== BATCH_JOB_EXECUTION
|
||||
|
||||
The BATCH_JOB_EXECUTION table holds all information relevant to the
|
||||
`JobExecution` object. Every time a
|
||||
`Job` is run there will always be a new
|
||||
`JobExecution`, and a new row in this table:
|
||||
|
||||
|
||||
[source, sql]
|
||||
----
|
||||
CREATE TABLE BATCH_JOB_EXECUTION (
|
||||
JOB_EXECUTION_ID BIGINT PRIMARY KEY ,
|
||||
VERSION BIGINT,
|
||||
JOB_INSTANCE_ID BIGINT NOT NULL,
|
||||
CREATE_TIME TIMESTAMP NOT NULL,
|
||||
START_TIME TIMESTAMP DEFAULT NULL,
|
||||
END_TIME TIMESTAMP DEFAULT NULL,
|
||||
STATUS VARCHAR(10),
|
||||
EXIT_CODE VARCHAR(20),
|
||||
EXIT_MESSAGE VARCHAR(2500),
|
||||
LAST_UPDATED TIMESTAMP,
|
||||
JOB_CONFIGURATION_LOCATION VARCHAR(2500) NULL,
|
||||
constraint JOB_INSTANCE_EXECUTION_FK foreign key (JOB_INSTANCE_ID)
|
||||
references BATCH_JOB_INSTANCE(JOB_INSTANCE_ID)
|
||||
) ;
|
||||
----
|
||||
|
||||
Below are descriptions for each column:
|
||||
|
||||
|
||||
* JOB_EXECUTION_ID: Primary key that uniquely identifies this
|
||||
execution. The value of this column is obtainable by calling the
|
||||
`getId` method of the
|
||||
`JobExecution` object.
|
||||
|
||||
|
||||
* VERSION: See above section.
|
||||
|
||||
|
||||
* JOB_INSTANCE_ID: Foreign key from the BATCH_JOB_INSTANCE table
|
||||
indicating the instance to which this execution belongs. There may be
|
||||
more than one execution per instance.
|
||||
|
||||
|
||||
* CREATE_TIME: Timestamp representing the time that the execution
|
||||
was created.
|
||||
|
||||
|
||||
* START_TIME: Timestamp representing the time the execution was
|
||||
started.
|
||||
|
||||
|
||||
* END_TIME: Timestamp representing the time the execution was
|
||||
finished, regardless of success or failure. An empty value in this
|
||||
column even though the job is not currently running indicates that
|
||||
there has been some type of error and the framework was unable to
|
||||
perform a last save before failing.
|
||||
|
||||
|
||||
* STATUS: Character string representing the status of the
|
||||
execution. This may be COMPLETED, STARTED, etc. The object
|
||||
representation of this column is the
|
||||
`BatchStatus` enumeration.
|
||||
|
||||
|
||||
* EXIT_CODE: Character string representing the exit code of the
|
||||
execution. In the case of a command line job, this may be converted
|
||||
into a number.
|
||||
|
||||
|
||||
* EXIT_MESSAGE: Character string representing a more detailed
|
||||
description of how the job exited. In the case of failure, this might
|
||||
include as much of the stack trace as is possible.
|
||||
|
||||
|
||||
* LAST_UPDATED: Timestamp representing the last time this
|
||||
execution was persisted.
|
||||
|
||||
[[metaDataBatchStepExecution]]
|
||||
|
||||
|
||||
=== BATCH_STEP_EXECUTION
|
||||
|
||||
The BATCH_STEP_EXECUTION table holds all information relevant to the
|
||||
`StepExecution` object. This table is very similar in
|
||||
many ways to the BATCH_JOB_EXECUTION table and there will always be at
|
||||
least one entry per `Step` for each
|
||||
`JobExecution` created:
|
||||
|
||||
|
||||
[source, sql]
|
||||
----
|
||||
CREATE TABLE BATCH_STEP_EXECUTION (
|
||||
STEP_EXECUTION_ID BIGINT PRIMARY KEY ,
|
||||
VERSION BIGINT NOT NULL,
|
||||
STEP_NAME VARCHAR(100) NOT NULL,
|
||||
JOB_EXECUTION_ID BIGINT NOT NULL,
|
||||
START_TIME TIMESTAMP NOT NULL ,
|
||||
END_TIME TIMESTAMP DEFAULT NULL,
|
||||
STATUS VARCHAR(10),
|
||||
COMMIT_COUNT BIGINT ,
|
||||
READ_COUNT BIGINT ,
|
||||
FILTER_COUNT BIGINT ,
|
||||
WRITE_COUNT BIGINT ,
|
||||
READ_SKIP_COUNT BIGINT ,
|
||||
WRITE_SKIP_COUNT BIGINT ,
|
||||
PROCESS_SKIP_COUNT BIGINT ,
|
||||
ROLLBACK_COUNT BIGINT ,
|
||||
EXIT_CODE VARCHAR(20) ,
|
||||
EXIT_MESSAGE VARCHAR(2500) ,
|
||||
LAST_UPDATED TIMESTAMP,
|
||||
constraint JOB_EXECUTION_STEP_FK foreign key (JOB_EXECUTION_ID)
|
||||
references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID)
|
||||
) ;
|
||||
----
|
||||
|
||||
Below are descriptions for each column:
|
||||
|
||||
|
||||
* STEP_EXECUTION_ID: Primary key that uniquely identifies this
|
||||
execution. The value of this column should be obtainable by calling
|
||||
the `getId` method of the
|
||||
`StepExecution` object.
|
||||
|
||||
|
||||
* VERSION: See above section.
|
||||
|
||||
|
||||
* STEP_NAME: The name of the step to which this execution
|
||||
belongs.
|
||||
|
||||
|
||||
* JOB_EXECUTION_ID: Foreign key from the BATCH_JOB_EXECUTION table
|
||||
indicating the `JobExecution` to which this `StepExecution` belongs. There
|
||||
may be only one `StepExecution` for a given
|
||||
`JobExecution` for a given
|
||||
`Step` name.
|
||||
|
||||
|
||||
* START_TIME: Timestamp representing the time the execution was
|
||||
started.
|
||||
|
||||
|
||||
* END_TIME: Timestamp representing the time the execution was
|
||||
finished, regardless of success or failure. An empty value in this
|
||||
column even though the job is not currently running indicates that
|
||||
there has been some type of error and the framework was unable to
|
||||
perform a last save before failing.
|
||||
|
||||
|
||||
* STATUS: Character string representing the status of the
|
||||
execution. This may be COMPLETED, STARTED, etc. The object
|
||||
representation of this column is the
|
||||
`BatchStatus` enumeration.
|
||||
|
||||
|
||||
* COMMIT_COUNT: The number of times in which the step has
|
||||
committed a transaction during this execution.
|
||||
|
||||
|
||||
* READ_COUNT: The number of items read during this
|
||||
execution.
|
||||
|
||||
|
||||
* FILTER_COUNT: The number of items filtered out of this
|
||||
execution.
|
||||
|
||||
|
||||
* WRITE_COUNT: The number of items written and committed during
|
||||
this execution.
|
||||
|
||||
|
||||
* READ_SKIP_COUNT: The number of items skipped on read during this
|
||||
execution.
|
||||
|
||||
|
||||
* WRITE_SKIP_COUNT: The number of items skipped on write during
|
||||
this execution.
|
||||
|
||||
|
||||
* PROCESS_SKIP_COUNT: The number of items skipped during
|
||||
processing during this execution.
|
||||
|
||||
|
||||
* ROLLBACK_COUNT: The number of rollbacks during this execution.
|
||||
Note that this count includes each time rollback occurs, including
|
||||
rollbacks for retry and those in the skip recovery procedure.
|
||||
|
||||
|
||||
* EXIT_CODE: Character string representing the exit code of the
|
||||
execution. In the case of a command line job, this may be converted
|
||||
into a number.
|
||||
|
||||
|
||||
* EXIT_MESSAGE: Character string representing a more detailed
|
||||
description of how the job exited. In the case of failure, this might
|
||||
include as much of the stack trace as is possible.
|
||||
|
||||
|
||||
* LAST_UPDATED: Timestamp representing the last time this
|
||||
execution was persisted.
|
||||
|
||||
[[metaDataBatchJobExecutionContext]]
|
||||
|
||||
|
||||
=== BATCH_JOB_EXECUTION_CONTEXT
|
||||
|
||||
The BATCH_JOB_EXECUTION_CONTEXT table holds all information relevant
|
||||
to an `Job's`
|
||||
`ExecutionContext`. There is exactly one
|
||||
`Job` `ExecutionContext` per
|
||||
`JobExecution`, and it contains all of the job-level
|
||||
data that is needed for a particular job execution. This data typically
|
||||
represents the state that must be retrieved after a failure so that a
|
||||
`JobInstance` can 'start from where it left
|
||||
off'.
|
||||
|
||||
|
||||
[source, sql]
|
||||
----
|
||||
CREATE TABLE BATCH_JOB_EXECUTION_CONTEXT (
|
||||
JOB_EXECUTION_ID BIGINT PRIMARY KEY,
|
||||
SHORT_CONTEXT VARCHAR(2500) NOT NULL,
|
||||
SERIALIZED_CONTEXT CLOB,
|
||||
constraint JOB_EXEC_CTX_FK foreign key (JOB_EXECUTION_ID)
|
||||
references BATCH_JOB_EXECUTION(JOB_EXECUTION_ID)
|
||||
) ;
|
||||
----
|
||||
|
||||
Below are descriptions for each column:
|
||||
|
||||
|
||||
* JOB_EXECUTION_ID: Foreign key representing the
|
||||
`JobExecution` to which the context belongs.
|
||||
There may be more than one row associated to a given execution.
|
||||
|
||||
|
||||
* SHORT_CONTEXT: A string version of the
|
||||
SERIALIZED_CONTEXT.
|
||||
|
||||
|
||||
* SERIALIZED_CONTEXT: The entire context, serialized.
|
||||
|
||||
[[metaDataBatchStepExecutionContext]]
|
||||
|
||||
|
||||
=== BATCH_STEP_EXECUTION_CONTEXT
|
||||
|
||||
The BATCH_STEP_EXECUTION_CONTEXT table holds all information
|
||||
relevant to an `Step's`
|
||||
`ExecutionContext`. There is exactly one
|
||||
`ExecutionContext` per
|
||||
`StepExecution`, and it contains all of the data that
|
||||
needs to persisted for a particular step execution. This data typically
|
||||
represents the state that must be retrieved after a failure so that a
|
||||
`JobInstance` can 'start from where it left
|
||||
off'.
|
||||
|
||||
|
||||
[source, sql]
|
||||
----
|
||||
CREATE TABLE BATCH_STEP_EXECUTION_CONTEXT (
|
||||
STEP_EXECUTION_ID BIGINT PRIMARY KEY,
|
||||
SHORT_CONTEXT VARCHAR(2500) NOT NULL,
|
||||
SERIALIZED_CONTEXT CLOB,
|
||||
constraint STEP_EXEC_CTX_FK foreign key (STEP_EXECUTION_ID)
|
||||
references BATCH_STEP_EXECUTION(STEP_EXECUTION_ID)
|
||||
) ;
|
||||
----
|
||||
|
||||
Below are descriptions for each column:
|
||||
|
||||
|
||||
* STEP_EXECUTION_ID: Foreign key representing the
|
||||
`StepExecution` to which the context belongs.
|
||||
There may be more than one row associated to a given execution.
|
||||
|
||||
|
||||
* SHORT_CONTEXT: A string version of the
|
||||
SERIALIZED_CONTEXT.
|
||||
|
||||
|
||||
* SERIALIZED_CONTEXT: The entire context, serialized.
|
||||
|
||||
[[metaDataArchiving]]
|
||||
|
||||
|
||||
=== Archiving
|
||||
|
||||
Because there are entries in multiple tables every time a batch job
|
||||
is run, it is common to create an archive strategy for the meta-data
|
||||
tables. The tables themselves are designed to show a record of what
|
||||
happened in the past, and generally won't affect the run of any job, with
|
||||
a couple of notable exceptions pertaining to restart:
|
||||
|
||||
|
||||
* The framework will use the meta-data tables to determine if a
|
||||
particular `JobInstance` has been run before. If it has been run, and
|
||||
the job is not restartable, then an exception will be thrown.
|
||||
|
||||
|
||||
* If an entry for a `JobInstance` is removed without having
|
||||
completed successfully, the framework will think that the job is new,
|
||||
rather than a restart.
|
||||
|
||||
|
||||
* If a job is restarted, the framework will use any data that has
|
||||
been persisted to the `ExecutionContext` to restore the `Job's` state.
|
||||
Therefore, removing any entries from this table for jobs that haven't
|
||||
completed successfully will prevent them from starting at the correct
|
||||
point if run again.
|
||||
|
||||
[[multiByteCharacters]]
|
||||
|
||||
|
||||
=== International and Multi-byte Characters
|
||||
|
||||
If you are using multi-byte character sets (e.g. Chines or Cyrillic)
|
||||
in your business processing, then those characters might need to be
|
||||
persisted in the Spring Batch schema. Many users find that
|
||||
simply changing the schema to double the length of the `VARCHAR`
|
||||
columns is enough. Others prefer to configure the <<job.adoc#configuringJobRepository,JobRepository>> with `max-varchar-length` half the value of the `VARCHAR` column length is enough. Some users have also reported that
|
||||
they use `NVARCHAR` in place of `VARCHAR`
|
||||
in their schema definitions. The best result will depend on the database
|
||||
platform and the way the database server has been configured locally.
|
||||
|
||||
[[recommendationsForIndexingMetaDataTables]]
|
||||
|
||||
|
||||
=== Recommendations for Indexing Meta Data Tables
|
||||
|
||||
Spring Batch provides DDL samples for the meta-data tables in the
|
||||
Core jar file for several common database platforms. Index declarations
|
||||
are not included in that DDL because there are too many variations in how
|
||||
users may want to index depending on their precise platform, local
|
||||
conventions and also the business requirements of how the jobs will be
|
||||
operated. The table below provides some indication as to which columns are
|
||||
going to be used in a WHERE clause by the Dao implementations provided by
|
||||
Spring Batch, and how frequently they might be used, so that individual
|
||||
projects can make up their own minds about indexing.
|
||||
|
||||
.Where clauses in SQL statements (excluding primary keys) and their approximate frequency of use.
|
||||
|
||||
|===============
|
||||
|Default Table Name|Where Clause|Frequency
|
||||
|BATCH_JOB_INSTANCE|JOB_NAME = ? and JOB_KEY = ?|Every time a job is launched
|
||||
|BATCH_JOB_EXECUTION|JOB_INSTANCE_ID = ?|Every time a job is restarted
|
||||
|BATCH_EXECUTION_CONTEXT|EXECUTION_ID = ? and KEY_NAME = ?|On commit interval, a.k.a. chunk
|
||||
|BATCH_STEP_EXECUTION|VERSION = ?|On commit interval, a.k.a. chunk (and at start and end of
|
||||
step)
|
||||
|BATCH_STEP_EXECUTION|STEP_NAME = ? and JOB_EXECUTION_ID = ?|Before each step execution
|
||||
|
||||
|===============
|
||||
|
||||
|
||||
817
spring-batch-docs/asciidoc/spring-batch-integration.adoc
Normal file
@@ -0,0 +1,817 @@
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
[[springBatchIntegration]]
|
||||
|
||||
== Spring Batch Integration
|
||||
|
||||
[[spring-batch-integration-introduction]]
|
||||
|
||||
=== Spring Batch Integration Introduction
|
||||
|
||||
Many users of Spring Batch may encounter requirements that are
|
||||
outside the scope of Spring Batch, yet may be efficiently and
|
||||
concisely implemented using Spring Integration. Conversely, Spring
|
||||
Batch users may encounter Spring Batch requirements and need a way
|
||||
to efficiently integrate both frameworks. In this context several
|
||||
patterns and use-cases emerge and Spring Batch Integration will
|
||||
address those requirements.
|
||||
|
||||
The line between Spring Batch and Spring Integration is not always
|
||||
clear, but there are guidelines that one can follow. Principally,
|
||||
these are: think about granularity, and apply common patterns. Some
|
||||
of those common patterns are described in this reference manual
|
||||
section.
|
||||
|
||||
Adding messaging to a batch process enables automation of
|
||||
operations, and also separation and strategizing of key concerns.
|
||||
For example a message might trigger a job to execute, and then the
|
||||
sending of the message can be exposed in a variety of ways. Or when
|
||||
a job completes or fails that might trigger a message to be sent,
|
||||
and the consumers of those messages might have operational concerns
|
||||
that have nothing to do with the application itself. Messaging can
|
||||
also be embedded in a job, for example reading or writing items for
|
||||
processing via channels. Remote partitioning and remote chunking
|
||||
provide methods to distribute workloads over an number of workers.
|
||||
|
||||
|
||||
Some key concepts that we will cover are:
|
||||
|
||||
* <<spring-batch-integration.adoc#namespace-support,Namespace Support>>
|
||||
|
||||
|
||||
|
||||
* <<spring-batch-integration.adoc#launching-batch-jobs-through-messages,Launching Batch Jobs through Messages>>
|
||||
|
||||
|
||||
|
||||
* <<spring-batch-integration.adoc#providing-feedback-with-informational-messages,Providing Feedback with Informational Messages>>
|
||||
|
||||
|
||||
|
||||
* <<spring-batch-integration.adoc#asynchronous-processors,Asynchronous Processors>>
|
||||
|
||||
|
||||
|
||||
* <<spring-batch-integration.adoc#externalizing-batch-process-execution,Externalizing
|
||||
Batch Process Execution>>
|
||||
|
||||
|
||||
[[namespace-support]]
|
||||
|
||||
==== Namespace Support
|
||||
|
||||
Since Spring Batch Integration 1.3, dedicated XML Namespace
|
||||
support was added, with the aim to provide an easier configuration
|
||||
experience. In order to activate the namespace, add the following
|
||||
namespace declarations to your Spring XML Application Context
|
||||
file:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:batch-int="http://www.springframework.org/schema/batch-integration"
|
||||
xsi:schemaLocation="
|
||||
http://www.springframework.org/schema/batch-integration
|
||||
http://www.springframework.org/schema/batch-integration/spring-batch-integration.xsd">
|
||||
|
||||
...
|
||||
|
||||
</beans>
|
||||
----
|
||||
|
||||
A fully configured Spring XML Application Context file for Spring
|
||||
Batch Integration may look like the following:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<beans xmlns="http://www.springframework.org/schema/beans"
|
||||
xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
|
||||
xmlns:int="http://www.springframework.org/schema/integration"
|
||||
xmlns:batch="http://www.springframework.org/schema/batch"
|
||||
xmlns:batch-int="http://www.springframework.org/schema/batch-integration"
|
||||
xsi:schemaLocation="
|
||||
http://www.springframework.org/schema/batch-integration
|
||||
http://www.springframework.org/schema/batch-integration/spring-batch-integration.xsd
|
||||
http://www.springframework.org/schema/batch
|
||||
http://www.springframework.org/schema/batch/spring-batch.xsd
|
||||
http://www.springframework.org/schema/beans
|
||||
http://www.springframework.org/schema/beans/spring-beans.xsd
|
||||
http://www.springframework.org/schema/integration
|
||||
http://www.springframework.org/schema/integration/spring-integration.xsd">
|
||||
|
||||
...
|
||||
|
||||
</beans>
|
||||
----
|
||||
|
||||
Appending version numbers to the referenced XSD file is also
|
||||
allowed but, as a version-less declaration will always use the
|
||||
latest schema, we generally don't recommend appending the version
|
||||
number to the XSD name. Adding a version number, for instance,
|
||||
would create possibly issues when updating the Spring Batch
|
||||
Integration dependencies as they may require more recent versions
|
||||
of the XML schema.
|
||||
|
||||
|
||||
[[launching-batch-jobs-through-messages]]
|
||||
|
||||
==== Launching Batch Jobs through Messages
|
||||
|
||||
|
||||
When starting batch jobs using the core Spring Batch API you
|
||||
basically have 2 options:
|
||||
|
||||
* Command line via the `CommandLineJobRunner`
|
||||
* Programatically via either `JobOperator.start()` or `JobLauncher.run()`.
|
||||
|
||||
|
||||
|
||||
For example, you may want to use the
|
||||
`CommandLineJobRunner` when invoking Batch Jobs
|
||||
using a shell script. Alternatively, you may use the
|
||||
JobOperator directly, for example when using
|
||||
Spring Batch as part of a web application. However, what about
|
||||
more complex use-cases? Maybe you need to poll a remote (S)FTP
|
||||
server to retrieve the data for the Batch Job. Or your application
|
||||
has to support multiple different data sources simultaneously. For
|
||||
example, you may receive data files not only via the web, but also
|
||||
FTP etc. Maybe additional transformation of the input files is
|
||||
needed before invoking Spring Batch.
|
||||
|
||||
|
||||
|
||||
Therefore, it would be much more powerful to execute the batch job
|
||||
using Spring Integration and its numerous adapters. For example,
|
||||
you can use a __File Inbound Channel Adapter__ to
|
||||
monitor a directory in the file-system and start the Batch Job as
|
||||
soon as the input file arrives. Additionally you can create Spring
|
||||
Integration flows that use multiple different adapters to easily
|
||||
ingest data for your Batch Jobs from multiple sources
|
||||
simultaneously using configuration only. Implementing all these
|
||||
scenarios with Spring Integration is easy as it allow for an
|
||||
decoupled event-driven execution of the
|
||||
`JobLauncher`.
|
||||
|
||||
|
||||
|
||||
Spring Batch Integration provides the
|
||||
`JobLaunchingMessageHandler` class that you can
|
||||
use to launch batch jobs. The input for the
|
||||
`JobLaunchingMessageHandler` is provided by a
|
||||
Spring Integration message, which payload is of type
|
||||
JobLaunchRequest. This class is a wrapper around the Job
|
||||
that needs to be launched as well as the JobParameters
|
||||
necessary to launch the Batch job.
|
||||
|
||||
|
||||
|
||||
The following image illustrates the typical Spring Integration
|
||||
message flow in order to start a Batch job. The
|
||||
link:$$http://www.eaipatterns.com/toc.html$$[EIP (Enterprise IntegrationPatterns) website]
|
||||
provides a full overview of messaging icons and their descriptions.
|
||||
|
||||
.Launch Batch Job
|
||||
image::{batch-asciidoc}images/launch-batch-job.png[Launch Batch Job, scaledwidth="60%"]
|
||||
|
||||
|
||||
[[transforming-a-file-into-a-joblaunchrequest]]
|
||||
|
||||
===== Transforming a file into a JobLaunchRequest
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
package io.spring.sbi;
|
||||
|
||||
import org.springframework.batch.core.Job;
|
||||
import org.springframework.batch.core.JobParametersBuilder;
|
||||
import org.springframework.batch.integration.launch.JobLaunchRequest;
|
||||
import org.springframework.integration.annotation.Transformer;
|
||||
import org.springframework.messaging.Message;
|
||||
|
||||
import java.io.File;
|
||||
|
||||
public class FileMessageToJobRequest {
|
||||
private Job job;
|
||||
private String fileParameterName;
|
||||
|
||||
public void setFileParameterName(String fileParameterName) {
|
||||
this.fileParameterName = fileParameterName;
|
||||
}
|
||||
|
||||
public void setJob(Job job) {
|
||||
this.job = job;
|
||||
}
|
||||
|
||||
@Transformer
|
||||
public JobLaunchRequest toRequest(Message<File> message) {
|
||||
JobParametersBuilder jobParametersBuilder =
|
||||
new JobParametersBuilder();
|
||||
|
||||
jobParametersBuilder.addString(fileParameterName,
|
||||
message.getPayload().getAbsolutePath());
|
||||
|
||||
return new JobLaunchRequest(job, jobParametersBuilder.toJobParameters());
|
||||
}
|
||||
}
|
||||
----
|
||||
|
||||
[[the-jobexecution-response]]
|
||||
|
||||
===== The JobExecution Response
|
||||
|
||||
When a Batch Job is being executed, a
|
||||
`JobExecution` instance is returned. This
|
||||
instance can be used to determine the status of an execution. If
|
||||
a `JobExecution` was able to be created
|
||||
successfully, it will always be returned, regardless of whether
|
||||
or not the actual execution was successful.
|
||||
|
||||
|
||||
|
||||
The exact behavior on how the `JobExecution`
|
||||
instance is returned depends on the provided
|
||||
`TaskExecutor`. If a
|
||||
`synchronous` (single-threaded)
|
||||
`TaskExecutor` implementation is used, the
|
||||
`JobExecution` response is only returned
|
||||
`after` the job completes. When using an
|
||||
`asynchronous`
|
||||
`TaskExecutor`, the
|
||||
`JobExecution` instance is returned
|
||||
immediately. Users can then take the `id` of
|
||||
`JobExecution` instance
|
||||
(`JobExecution.getJobId()`) and query the
|
||||
`JobRepository` for the job's updated status
|
||||
using the `JobExplorer`. For more
|
||||
information, please refer to the Spring
|
||||
Batch reference documentation on
|
||||
link:$$http://docs.spring.io/spring-batch/reference/html/configureJob.html#queryingRepository$$[Querying the Repository].
|
||||
|
||||
|
||||
|
||||
The following configuration will create a file
|
||||
`inbound-channel-adapter` to listen for CSV
|
||||
files in the provided directory, hand them off to our
|
||||
transformer (`FileMessageToJobRequest`),
|
||||
launch the job via the __Job Launching Gateway__ then simply log the output of the
|
||||
`JobExecution` via the
|
||||
`logging-channel-adapter`.
|
||||
|
||||
|
||||
[[spring-batch-integration-configuration]]
|
||||
|
||||
===== Spring Batch Integration Configuration
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<int:channel id="inboundFileChannel"/>
|
||||
<int:channel id="outboundJobRequestChannel"/>
|
||||
<int:channel id="jobLaunchReplyChannel"/>
|
||||
|
||||
<int-file:inbound-channel-adapter id="filePoller"
|
||||
channel="inboundFileChannel"
|
||||
directory="file:/tmp/myfiles/"
|
||||
filename-pattern="*.csv">
|
||||
<int:poller fixed-rate="1000"/>
|
||||
</int-file:inbound-channel-adapter>
|
||||
|
||||
<int:transformer input-channel="inboundFileChannel"
|
||||
output-channel="outboundJobRequestChannel">
|
||||
<bean class="io.spring.sbi.FileMessageToJobRequest">
|
||||
<property name="job" ref="personJob"/>
|
||||
<property name="fileParameterName" value="input.file.name"/>
|
||||
</bean>
|
||||
</int:transformer>
|
||||
|
||||
<batch-int:job-launching-gateway request-channel="outboundJobRequestChannel"
|
||||
reply-channel="jobLaunchReplyChannel"/>
|
||||
|
||||
<int:logging-channel-adapter channel="jobLaunchReplyChannel"/>
|
||||
----
|
||||
|
||||
|
||||
Now that we are polling for files and launching jobs, we need to
|
||||
configure for example our Spring Batch
|
||||
`ItemReader` to utilize found file
|
||||
represented by the job parameter "input.file.name":
|
||||
|
||||
[[example-itemreader-configuration]]
|
||||
|
||||
===== Example ItemReader Configuration
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<bean id="itemReader" class="org.springframework.batch.item.file.FlatFileItemReader"
|
||||
scope="step">
|
||||
<property name="resource" value="file://#{jobParameters['input.file.name']}"/>
|
||||
...
|
||||
</bean>
|
||||
----
|
||||
|
||||
|
||||
The main points of interest here are injecting the value of
|
||||
`#{jobParameters['input.file.name']}`
|
||||
as the Resource property value and setting the `ItemReader` bean
|
||||
to be of __Step scope__ to take advantage of
|
||||
the late binding support which allows access to the
|
||||
`jobParameters` variable.
|
||||
|
||||
|
||||
[[availableAttributesOfTheJobLaunchingGateway]]
|
||||
=== Available Attributes of the Job-Launching Gateway
|
||||
|
||||
* `id` Identifies the underlying Spring bean definition, which is an instance of either:
|
||||
** `EventDrivenConsumer`
|
||||
** `PollingConsumer`
|
||||
|
||||
The exact implementation depends on whether the component's input channel is a:
|
||||
`SubscribableChannel` or `PollableChannel`
|
||||
|
||||
* `auto-startup` Boolean flag to indicate that the endpoint should start automatically on
|
||||
startup. The default is __true__.
|
||||
* `request-channel` The input `MessageChannel` of this endpoint.
|
||||
* `reply-channel` `Message Channel` to which the resulting `JobExecution` payload will be sent.
|
||||
* `reply-timeout` Allows you to specify how long this gateway will wait for the reply message
|
||||
to be sent successfully to the reply channel before throwing
|
||||
an exception. This attribute only applies when the channel
|
||||
might block, for example when using a bounded queue channel
|
||||
that is currently full. Also, keep in mind that when sending to a
|
||||
`DirectChannel`, the invocation will occur
|
||||
in the sender's thread. Therefore, the failing of the send
|
||||
operation may be caused by other components further downstream.
|
||||
The `reply-timeout` attribute maps to the
|
||||
`sendTimeout` property of the underlying
|
||||
`MessagingTemplate` instance. The attribute
|
||||
will default, if not specified, to<emphasis>-1</emphasis>,
|
||||
meaning that by default, the `Gateway` will wait indefinitely.
|
||||
The value is specified in milliseconds.
|
||||
* `job-launcher` Pass in a
|
||||
custom
|
||||
`JobLauncher`
|
||||
bean reference. This
|
||||
attribute is optional. If not specified the adapter will
|
||||
re-use the instance that is registered under the id
|
||||
`jobLauncher`. If no default instance
|
||||
exists an exception is thrown.
|
||||
* `order` Specifies the order for invocation when this endpoint is connected as a subscriber
|
||||
to a `SubscribableChannel`.
|
||||
|
||||
=== Sub-Elements
|
||||
When this `Gateway` is receiving messages from a
|
||||
`PollableChannel`, you must either provide
|
||||
a global default Poller or provide a Poller sub-element to the
|
||||
`Job Launching Gateway`:
|
||||
[source, xml]
|
||||
----
|
||||
<batch-int:job-launching-gateway request-channel="queueChannel"
|
||||
reply-channel="replyChannel" job-launcher="jobLauncher">
|
||||
<int:poller fixed-rate="1000">
|
||||
</batch-int:job-launching-gateway>
|
||||
----
|
||||
|
||||
[[providing-feedback-with-informational-messages]]
|
||||
|
||||
==== Providing Feedback with Informational Messages
|
||||
|
||||
|
||||
As Spring Batch jobs can run for long times, providing progress
|
||||
information will be critical. For example, stake-holders may want
|
||||
to be notified if a some or all parts of a Batch Job has failed.
|
||||
Spring Batch provides support for this information being gathered
|
||||
through:
|
||||
|
||||
|
||||
|
||||
* Active polling or
|
||||
|
||||
* Event-driven, using listeners.
|
||||
|
||||
|
||||
When starting a Spring Batch job asynchronously, e.g. by using the
|
||||
`Job Launching Gateway`, a
|
||||
`JobExecution` instance is returned. Thus,
|
||||
`JobExecution.getJobId()` can be used to
|
||||
continuously poll for status updates by retrieving updated
|
||||
instances of the `JobExecution` from the
|
||||
`JobRepository` using the
|
||||
`JobExplorer`. However, this is considered
|
||||
sub-optimal and an event-driven approach should be preferred.
|
||||
|
||||
|
||||
Therefore, Spring Batch provides listeners such as:
|
||||
|
||||
* StepListener
|
||||
* ChunkListener
|
||||
* JobExecutionListener
|
||||
|
||||
In the following example, a Spring Batch job was configured with a
|
||||
`StepExecutionListener`. Thus, Spring
|
||||
Integration will receive and process any step before/after step
|
||||
events. For example, the received
|
||||
`StepExecution` can be inspected using a
|
||||
`Router`. Based on the results of that
|
||||
inspection, various things can occur for example routing a message
|
||||
to a Mail Outbound Channel Adapter, so that an Email notification
|
||||
can be sent out based on some condition.
|
||||
|
||||
.Handling Informational Messages
|
||||
image::{batch-asciidoc}images/handling-informational-messages.png[Handling Informational Messages, scaledwidth="60%"]
|
||||
|
||||
|
||||
Below is an example of how a listener is configured to send a
|
||||
message to a `Gateway` for
|
||||
`StepExecution` events and log its output to a
|
||||
`logging-channel-adapter`:
|
||||
|
||||
First create the notifications integration beans:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<int:channel id="stepExecutionsChannel"/>
|
||||
|
||||
<int:gateway id="notificationExecutionsListener"
|
||||
service-interface="org.springframework.batch.core.StepExecutionListener"
|
||||
default-request-channel="stepExecutionsChannel"/>
|
||||
|
||||
<int:logging-channel-adapter channel="stepExecutionsChannel"/>
|
||||
----
|
||||
|
||||
|
||||
Then modify your job to add a step level listener:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<job id="importPayments">
|
||||
<step id="step1">
|
||||
<tasklet ../>
|
||||
<chunk ../>
|
||||
<listeners>
|
||||
<listener ref="notificationExecutionsListener"/>
|
||||
</listeners>
|
||||
</tasklet>
|
||||
...
|
||||
</step>
|
||||
</job>
|
||||
----
|
||||
|
||||
[[asynchronous-processors]]
|
||||
|
||||
==== Asynchronous Processors
|
||||
|
||||
|
||||
Asynchronous Processors help you to to scale the processing of
|
||||
items. In the asynchronous processor use-case, an
|
||||
`AsyncItemProcessor` serves as a dispatcher,
|
||||
executing the `ItemProcessor's` logic for an
|
||||
item on a new thread. The `Future` is passed to
|
||||
the AsynchItemWriter to be written once the
|
||||
processor completes.
|
||||
|
||||
|
||||
|
||||
Therefore, you can increase performance by using asynchronous item
|
||||
processing, basically allowing you to implement
|
||||
__fork-join__ scenarios. The
|
||||
`AsyncItemWriter` will gather the results and
|
||||
write back the chunk as soon as all the results become available.
|
||||
|
||||
|
||||
|
||||
Configuration of both the `AsyncItemProcessor`
|
||||
and `AsyncItemWriter` are simple, first the
|
||||
`AsyncItemProcessor`:
|
||||
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<bean id="processor"
|
||||
class="org.springframework.batch.integration.async.AsyncItemProcessor">
|
||||
<property name="delegate">
|
||||
<bean class="your.ItemProcessor"/>
|
||||
</property>
|
||||
<property name="taskExecutor">
|
||||
<bean class="org.springframework.core.task.SimpleAsyncTaskExecutor"/>
|
||||
</property>
|
||||
</bean>
|
||||
----
|
||||
|
||||
|
||||
The property "`delegate`" is actually
|
||||
a reference to your `ItemProcessor` bean and
|
||||
the "taskExecutor" property is a
|
||||
reference to the `TaskExecutor` of your choice.
|
||||
|
||||
Then we configure the `AsyncItemWriter`:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<bean id="itemWriter"
|
||||
class="org.springframework.batch.integration.async.AsyncItemWriter">
|
||||
<property name="delegate">
|
||||
<bean id="itemWriter" class="your.ItemWriter"/>
|
||||
</property>
|
||||
</bean>
|
||||
----
|
||||
|
||||
|
||||
Again, the property "`delegate`" is
|
||||
actually a reference to your `ItemWriter` bean.
|
||||
|
||||
|
||||
[[externalizing-batch-process-execution]]
|
||||
|
||||
==== Externalizing Batch Process Execution
|
||||
|
||||
|
||||
The integration approaches discussed so far suggest use-cases
|
||||
where Spring Integration wraps Spring Batch like an outer-shell.
|
||||
However, Spring Batch can also use Spring Integration internally.
|
||||
Using this approach, Spring Batch users can delegate the
|
||||
processing of items or even chunks to outside processes. This
|
||||
allows you to offload complex processing. Spring Batch Integration
|
||||
provides dedicated support for:
|
||||
|
||||
|
||||
|
||||
* Remote Chunking
|
||||
|
||||
|
||||
|
||||
* Remote Partitioning
|
||||
|
||||
|
||||
[[remote-chunking]]
|
||||
|
||||
===== Remote Chunking
|
||||
|
||||
.Remote Chunking
|
||||
image::{batch-asciidoc}images/remote-chunking-sbi.png[Remote Chunking, scaledwidth="60%"]
|
||||
|
||||
Taking things one step further, one can also externalize the
|
||||
chunk processing using the
|
||||
`ChunkMessageChannelItemWriter` which is
|
||||
provided by Spring Batch Integration which will send items out
|
||||
and collect the result. Once sent, Spring Batch will continue the
|
||||
process of reading and grouping items, without waiting for the results.
|
||||
Rather it is the responsibility of the `ChunkMessageChannelItemWriter`
|
||||
to gather the results and integrate them back into the Spring Batch process.
|
||||
|
||||
|
||||
Using Spring Integration you have full
|
||||
control over the concurrency of your processes, for instance by
|
||||
using a `QueueChannel` instead of a
|
||||
`DirectChannel`. Furthermore, by relying on
|
||||
Spring Integration's rich collection of Channel Adapters (E.g.
|
||||
JMS or AMQP), you can distribute chunks of a Batch job to
|
||||
external systems for processing.
|
||||
|
||||
A simple job with a step to be remotely chunked would have a
|
||||
configuration similar to the following:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<job id="personJob">
|
||||
<step id="step1">
|
||||
<tasklet>
|
||||
<chunk reader="itemReader" writer="itemWriter" commit-interval="200"/>
|
||||
</tasklet>
|
||||
...
|
||||
</step>
|
||||
</job>
|
||||
----
|
||||
|
||||
The `ItemReader` reference would point to the bean you would like
|
||||
to use for reading data on the master. The `ItemWriter` reference
|
||||
points to a special `ItemWriter`
|
||||
"`ChunkMessageChannelItemWriter`"
|
||||
as described above. The processor (if any) is left off the
|
||||
master configuration as it is configured on the slave. The
|
||||
following configuration provides a basic master setup. It's
|
||||
advised to check any additional component properties such as
|
||||
throttle limits and so on when implementing your use case.
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<bean id="connectionFactory" class="org.apache.activemq.ActiveMQConnectionFactory">
|
||||
<property name="brokerURL" value="tcp://localhost:61616"/>
|
||||
</bean>
|
||||
|
||||
<int-jms:outbound-channel-adapter id="requests" destination-name="requests"/>
|
||||
|
||||
<bean id="messagingTemplate"
|
||||
class="org.springframework.integration.core.MessagingTemplate">
|
||||
<property name="defaultChannel" ref="requests"/>
|
||||
<property name="receiveTimeout" value="2000"/>
|
||||
</bean>
|
||||
|
||||
<bean id="itemWriter"
|
||||
class="org.springframework.batch.integration.chunk.ChunkMessageChannelItemWriter"
|
||||
scope="step">
|
||||
<property name="messagingOperations" ref="messagingTemplate"/>
|
||||
<property name="replyChannel" ref="replies"/>
|
||||
</bean>
|
||||
|
||||
<bean id="chunkHandler"
|
||||
class="org.springframework.batch.integration.chunk.RemoteChunkHandlerFactoryBean">
|
||||
<property name="chunkWriter" ref="itemWriter"/>
|
||||
<property name="step" ref="step1"/>
|
||||
</bean>
|
||||
|
||||
<int:channel id="replies">
|
||||
<int:queue/>
|
||||
</int:channel>
|
||||
|
||||
<int-jms:message-driven-channel-adapter id="jmsReplies"
|
||||
destination-name="replies"
|
||||
channel="replies"/>
|
||||
----
|
||||
|
||||
|
||||
This configuration provides us with a number of beans. We
|
||||
configure our messaging middleware using ActiveMQ and
|
||||
inbound/outbound JMS adapters provided by Spring Integration. As
|
||||
shown, our `itemWriter` bean which is
|
||||
referenced by our job step utilizes the
|
||||
`ChunkMessageChannelItemWriter` for writing chunks over the
|
||||
configured middleware.
|
||||
|
||||
Now lets move on to the slave configuration:
|
||||
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<bean id="connectionFactory" class="org.apache.activemq.ActiveMQConnectionFactory">
|
||||
<property name="brokerURL" value="tcp://localhost:61616"/>
|
||||
</bean>
|
||||
|
||||
<int:channel id="requests"/>
|
||||
<int:channel id="replies"/>
|
||||
|
||||
<int-jms:message-driven-channel-adapter id="jmsIn"
|
||||
destination-name="requests"
|
||||
channel="requests"/>
|
||||
|
||||
<int-jms:outbound-channel-adapter id="outgoingReplies"
|
||||
destination-name="replies"
|
||||
channel="replies">
|
||||
</int-jms:outbound-channel-adapter>
|
||||
|
||||
<int:service-activator id="serviceActivator"
|
||||
input-channel="requests"
|
||||
output-channel="replies"
|
||||
ref="chunkProcessorChunkHandler"
|
||||
method="handleChunk"/>
|
||||
|
||||
<bean id="chunkProcessorChunkHandler"
|
||||
class="org.springframework.batch.integration.chunk.ChunkProcessorChunkHandler">
|
||||
<property name="chunkProcessor">
|
||||
<bean class="org.springframework.batch.core.step.item.SimpleChunkProcessor">
|
||||
<property name="itemWriter">
|
||||
<bean class="io.spring.sbi.PersonItemWriter"/>
|
||||
</property>
|
||||
<property name="itemProcessor">
|
||||
<bean class="io.spring.sbi.PersonItemProcessor"/>
|
||||
</property>
|
||||
</bean>
|
||||
</property>
|
||||
</bean>
|
||||
----
|
||||
|
||||
|
||||
Most of these configuration items should look familiar from the
|
||||
master configuration. Slaves do not need access to things like
|
||||
the Spring Batch `JobRepository` nor access
|
||||
to the actual job configuration file. The main bean of interest
|
||||
is the "`chunkProcessorChunkHandler`". The
|
||||
`chunkProcessor` property of `ChunkProcessorChunkHandler` takes a
|
||||
configured `SimpleChunkProcessor` which is where you would provide a reference to your
|
||||
`ItemWriter` and optionally your
|
||||
`ItemProcessor` that will run on the slave
|
||||
when it receives chunks from the master.
|
||||
|
||||
For more information, please also consult the Spring Batch
|
||||
manual, specifically the chapter on
|
||||
link:$$http://docs.spring.io/spring-batch/reference/html/scalability.html#remoteChunking$$[Remote Chunking].
|
||||
|
||||
|
||||
[[remote-partitioning]]
|
||||
|
||||
===== Remote Partitioning
|
||||
|
||||
.Remote Partitioning
|
||||
image::{batch-asciidoc}images/remote-partitioning.png[Remote Partitioning, scaledwidth="60%"]
|
||||
|
||||
|
||||
Remote Partitioning, on the other hand, is useful when the
|
||||
problem is not the processing of items, but the associated I/O
|
||||
represents the bottleneck. Using Remote Partitioning, work can
|
||||
be farmed out to slaves that execute complete Spring Batch
|
||||
steps. Thus, each slave has its own `ItemReader`, `ItemProcessor` and
|
||||
`ItemWriter`. For this purpose, Spring Batch
|
||||
Integration provides the `MessageChannelPartitionHandler`.
|
||||
|
||||
|
||||
|
||||
This implementation of the `PartitionHandler`
|
||||
interface uses MessageChannel instances to
|
||||
send instructions to remote workers and receive their responses.
|
||||
This provides a nice abstraction from the transports (E.g. JMS
|
||||
or AMQP) being used to communicate with the remote workers.
|
||||
|
||||
|
||||
|
||||
The reference manual section
|
||||
link:$$http://docs.spring.io/spring-batch/reference/html/scalability.html#partitioning$$[Remote Partitioning] provides an overview of the concepts and
|
||||
components needed to configure Remote Partitioning and shows an
|
||||
example of using the default
|
||||
`TaskExecutorPartitionHandler` to partition
|
||||
in separate local threads of execution. For Remote Partitioning
|
||||
to multiple JVM's, two additional components are required:
|
||||
|
||||
* Remoting fabric or grid environment
|
||||
* A `PartitionHandler` implementation that supports the desired
|
||||
remoting fabric or grid environment
|
||||
|
||||
|
||||
|
||||
Similar to Remote Chunking JMS can be used as the "remoting
|
||||
fabric" and the `PartitionHandler` implementation to be used
|
||||
as described above is the
|
||||
`MessageChannelPartitionHandler`. The example
|
||||
shown below assumes an existing partitioned job and focuses on
|
||||
the `MessageChannelPartitionHandler` and JMS
|
||||
configuration:
|
||||
|
||||
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<bean id="partitionHandler"
|
||||
class="org.springframework.batch.integration.partition.MessageChannelPartitionHandler">
|
||||
<property name="stepName" value="step1"/>
|
||||
<property name="gridSize" value="3"/>
|
||||
<property name="replyChannel" ref="outbound-replies"/>
|
||||
<property name="messagingOperations">
|
||||
<bean class="org.springframework.integration.core.MessagingTemplate">
|
||||
<property name="defaultChannel" ref="outbound-requests"/>
|
||||
<property name="receiveTimeout" value="100000"/>
|
||||
</bean>
|
||||
</property>
|
||||
</bean>
|
||||
|
||||
<int:channel id="outbound-requests"/>
|
||||
<int-jms:outbound-channel-adapter destination="requestsQueue"
|
||||
channel="outbound-requests"/>
|
||||
|
||||
<int:channel id="inbound-requests"/>
|
||||
<int-jms:message-driven-channel-adapter destination="requestsQueue"
|
||||
channel="inbound-requests"/>
|
||||
|
||||
<bean id="stepExecutionRequestHandler"
|
||||
class="org.springframework.batch.integration.partition.StepExecutionRequestHandler">
|
||||
<property name="jobExplorer" ref="jobExplorer"/>
|
||||
<property name="stepLocator" ref="stepLocator"/>
|
||||
</bean>
|
||||
|
||||
<int:service-activator ref="stepExecutionRequestHandler" input-channel="inbound-requests"
|
||||
output-channel="outbound-staging"/>
|
||||
|
||||
<int:channel id="outbound-staging"/>
|
||||
<int-jms:outbound-channel-adapter destination="stagingQueue"
|
||||
channel="outbound-staging"/>
|
||||
|
||||
<int:channel id="inbound-staging"/>
|
||||
<int-jms:message-driven-channel-adapter destination="stagingQueue"
|
||||
channel="inbound-staging"/>
|
||||
|
||||
<int:aggregator ref="partitionHandler" input-channel="inbound-staging"
|
||||
output-channel="outbound-replies"/>
|
||||
|
||||
<int:channel id="outbound-replies">
|
||||
<int:queue/>
|
||||
</int:channel>
|
||||
|
||||
<bean id="stepLocator"
|
||||
class="org.springframework.batch.integration.partition.BeanFactoryStepLocator" />
|
||||
----
|
||||
|
||||
Also ensure the partition `handler` attribute maps to the `partitionHandler` bean:
|
||||
|
||||
[source, xml]
|
||||
----
|
||||
<job id="personJob">
|
||||
<step id="step1.master">
|
||||
<partition partitioner="partitioner" handler="partitionHandler"/>
|
||||
...
|
||||
</step>
|
||||
</job>
|
||||
----
|
||||
|
||||
465
spring-batch-docs/asciidoc/spring-batch-intro.adoc
Normal file
@@ -0,0 +1,465 @@
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
[[spring-batch-intro]]
|
||||
|
||||
== Spring Batch Introduction
|
||||
|
||||
Many applications within the enterprise domain require bulk processing
|
||||
to perform business operations in mission critical environments. These
|
||||
business operations include automated, complex processing of large volumes
|
||||
of information that is most efficiently processed without user interaction.
|
||||
These operations typically include time based events (e.g. month-end
|
||||
calculations, notices or correspondence), periodic application of complex
|
||||
business rules processed repetitively across very large data sets (e.g.
|
||||
Insurance benefit determination or rate adjustments), or the integration of
|
||||
information that is received from internal and external systems that
|
||||
typically requires formatting, validation and processing in a transactional
|
||||
manner into the system of record. Batch processing is used to process
|
||||
billions of transactions every day for enterprises.
|
||||
|
||||
Spring Batch is a lightweight, comprehensive batch framework designed
|
||||
to enable the development of robust batch applications vital for the daily
|
||||
operations of enterprise systems. Spring Batch builds upon the productivity,
|
||||
POJO-based development approach, and general ease of use capabilities people
|
||||
have come to know from the Spring Framework, while making it easy for
|
||||
developers to access and leverage more advance enterprise services when
|
||||
necessary. Spring Batch is not a scheduling framework. There are many good
|
||||
enterprise schedulers available in both the commercial and open source
|
||||
spaces such as Quartz, Tivoli, Control-M, etc. It is intended to work in
|
||||
conjunction with a scheduler, not replace a scheduler.
|
||||
|
||||
Spring Batch provides reusable functions that are essential in
|
||||
processing large volumes of records, including logging/tracing, transaction
|
||||
management, job processing statistics, job restart, skip, and resource
|
||||
management. It also provides more advance technical services and features
|
||||
that will enable extremely high-volume and high performance batch jobs
|
||||
though optimization and partitioning techniques. Simple as well as complex,
|
||||
high-volume batch jobs can leverage the framework in a highly scalable
|
||||
manner to process significant volumes of information.
|
||||
|
||||
[[springBatchBackground]]
|
||||
|
||||
|
||||
=== Background
|
||||
|
||||
While open source software projects and associated communities have
|
||||
focused greater attention on web-based and SOA messaging-based
|
||||
architecture frameworks, there has been a notable lack of focus on
|
||||
reusable architecture frameworks to accommodate Java-based batch
|
||||
processing needs, despite continued needs to handle such processing within
|
||||
enterprise IT environments. The lack of a standard, reusable batch
|
||||
architecture has resulted in the proliferation of many one-off, in-house
|
||||
solutions developed within client enterprise IT functions.
|
||||
|
||||
SpringSource and Accenture have collaborated to change this.
|
||||
Accenture's hands-on industry and technical experience in implementing
|
||||
batch architectures, SpringSource's depth of technical experience, and
|
||||
Spring's proven programming model together mark a natural and powerful
|
||||
partnership to create high-quality, market relevant software aimed at
|
||||
filling an important gap in enterprise Java. Both companies are also
|
||||
currently working with a number of clients solving similar problems
|
||||
developing Spring-based batch architecture solutions. This has provided
|
||||
some useful additional detail and real-life constraints helping to ensure
|
||||
the solution can be applied to the real-world problems posed by clients.
|
||||
For these reasons and many more, SpringSource and Accenture have teamed to
|
||||
collaborate on the development of Spring Batch.
|
||||
|
||||
Accenture has contributed previously proprietary batch processing
|
||||
architecture frameworks, based upon decades worth of experience in
|
||||
building batch architectures with the last several generations of
|
||||
platforms, (i.e., COBOL/Mainframe, C++/Unix, and now Java/anywhere) to the
|
||||
Spring Batch project along with committer resources to drive support,
|
||||
enhancements, and the future roadmap.
|
||||
|
||||
The collaborative effort between Accenture and SpringSource aims to
|
||||
promote the standardization of software processing approaches, frameworks,
|
||||
and tools that can be consistently leveraged by enterprise users when
|
||||
creating batch applications. Companies and government agencies desiring to
|
||||
deliver standard, proven solutions to their enterprise IT environments
|
||||
will benefit from Spring Batch.
|
||||
|
||||
[[springBatchUsageScenarios]]
|
||||
|
||||
|
||||
=== Usage Scenarios
|
||||
|
||||
A typical batch program generally reads a large number of records
|
||||
from a database, file, or queue, processes the data in some fashion, and
|
||||
then writes back data in a modified form. Spring Batch automates this
|
||||
basic batch iteration, providing the capability to process similar
|
||||
transactions as a set, typically in an offline environment without any
|
||||
user interaction. Batch jobs are part of most IT projects and Spring Batch
|
||||
is the only open source framework that provides a robust, enterprise-scale
|
||||
solution.
|
||||
|
||||
Business Scenarios
|
||||
|
||||
* Commit batch process periodically
|
||||
|
||||
|
||||
* Concurrent batch processing: parallel processing of a
|
||||
job
|
||||
|
||||
|
||||
* Staged, enterprise message-driven processing
|
||||
|
||||
|
||||
* Massively parallel batch processing
|
||||
|
||||
|
||||
* Manual or scheduled restart after failure
|
||||
|
||||
|
||||
* Sequential processing of dependent steps (with extensions to
|
||||
workflow-driven batches)
|
||||
|
||||
|
||||
* Partial processing: skip records (e.g. on rollback)
|
||||
|
||||
|
||||
* Whole-batch transaction: for cases with a small batch size or
|
||||
existing stored procedures/scripts
|
||||
|
||||
|
||||
|
||||
Technical Objectives
|
||||
|
||||
* Batch developers use the Spring programming model: concentrate
|
||||
on business logic; let the framework take care of
|
||||
infrastructure.
|
||||
|
||||
|
||||
* Clear separation of concerns between the infrastructure, the
|
||||
batch execution environment, and the batch application.
|
||||
|
||||
|
||||
* Provide common, core execution services as interfaces that all
|
||||
projects can implement.
|
||||
|
||||
|
||||
* Provide simple and default implementations of the core
|
||||
execution interfaces that can be used 'out of the box'.
|
||||
|
||||
|
||||
* Easy to configure, customize, and extend services, by
|
||||
leveraging the spring framework in all layers.
|
||||
|
||||
|
||||
* All existing core services should be easy to replace or
|
||||
extend, without any impact to the infrastructure layer.
|
||||
|
||||
|
||||
* Provide a simple deployment model, with the architecture JARs
|
||||
completely separate from the application, built using Maven.
|
||||
|
||||
|
||||
|
||||
[[springBatchArchitecture]]
|
||||
|
||||
|
||||
=== Spring Batch Architecture
|
||||
|
||||
|
||||
|
||||
Spring Batch is designed with extensibility and a diverse group of
|
||||
end users in mind. The figure below shows a sketch of the layered
|
||||
architecture that supports the extensibility and ease of use for end-user
|
||||
developers.
|
||||
|
||||
.Spring Batch Layered Architecture
|
||||
image::{batch-asciidoc}images/spring-batch-layers.png[Figure 1.1: Spring Batch Layered Architecture, scaledwidth="60%"]
|
||||
|
||||
|
||||
|
||||
This layered architecture highlights three major high level
|
||||
components: Application, Core, and Infrastructure. The application
|
||||
contains all batch jobs and custom code written by developers using Spring
|
||||
Batch. The Batch Core contains the core runtime classes necessary to
|
||||
launch and control a batch job. It includes things such as a
|
||||
JobLauncher, Job, and
|
||||
Step implementations. Both Application and Core are
|
||||
built on top of a common infrastructure. This infrastructure contains
|
||||
common readers and writers, and services such as the
|
||||
RetryTemplate, which are used both by application
|
||||
developers(`ItemReader` and
|
||||
`ItemWriter`) and the core framework itself.
|
||||
(retry)
|
||||
|
||||
[[batchArchitectureConsiderations]]
|
||||
|
||||
|
||||
=== General Batch Principles and Guidelines
|
||||
|
||||
The following are a number of key principles, guidelines, and general considerations to take into consideration when building a batch solution.
|
||||
|
||||
|
||||
* A batch architecture typically affects on-line architecture and vice versa. Design with both architectures and environments in mind using common building blocks when possible.
|
||||
|
||||
|
||||
* Simplify as much as possible and avoid building complex logical structures in single batch applications.
|
||||
|
||||
|
||||
* Process data as close to where the data physically resides as possible or vice versa (i.e., keep your data where your processing occurs).
|
||||
|
||||
|
||||
* Minimize system resource use, especially I/O. Perform as many operations as possible in internal memory.
|
||||
|
||||
|
||||
* Review application I/O (analyze SQL statements) to ensure that unnecessary physical I/O is avoided. In particular, the following four common flaws need to be looked for:
|
||||
|
||||
* Reading data for every transaction when the data could be read once and kept cached or in the working storage;
|
||||
|
||||
|
||||
* Rereading data for a transaction where the data was read earlier in the same transaction;
|
||||
|
||||
|
||||
* Causing unnecessary table or index scans;
|
||||
|
||||
|
||||
* Not specifying key values in the WHERE clause of an SQL statement.
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
* Do not do things twice in a batch run. For instance, if you need data summarization for reporting purposes, increment stored totals if possible when data is being initially processed, so your reporting application does not have to reprocess the same data.
|
||||
|
||||
|
||||
* Allocate enough memory at the beginning of a batch application to avoid time-consuming reallocation during the process.
|
||||
|
||||
|
||||
* Always assume the worst with regard to data integrity. Insert adequate checks and record validation to maintain data integrity.
|
||||
|
||||
|
||||
* Implement checksums for internal validation where possible. For example, flat files should have a trailer record telling the total of records in the file and an aggregate of the key fields.
|
||||
|
||||
|
||||
* Plan and execute stress tests as early as possible in a production-like environment with realistic data volumes.
|
||||
|
||||
|
||||
* In large batch systems backups can be challenging, especially if the system is running concurrent with on-line on a 24-7 basis. Database backups are typically well taken care of in the on-line design, but file backups should be considered to be just as important. If the system depends on flat files, file backup procedures should not only be in place and documented, but regularly tested as well.
|
||||
|
||||
[[batchProcessingStrategy]]
|
||||
|
||||
|
||||
=== Batch Processing Strategies
|
||||
|
||||
To help design and implement batch systems, basic batch application building blocks and patterns should be provided to the designers and programmers in form of sample structure charts and code shells. When starting to design a batch job, the business logic should be decomposed into a series of steps which can be implemented using the following standard building blocks:
|
||||
|
||||
|
||||
* __Conversion Applications:__ For each type of file supplied by or generated to an external system, a conversion application will need to be created to convert the transaction records supplied into a standard format required for processing. This type of batch application can partly or entirely consist of translation utility modules (see Basic Batch Services).
|
||||
|
||||
|
||||
* __Validation Applications:__ Validation applications ensure that all input/output records are correct and consistent. Validation is typically based on file headers and trailers, checksums and validation algorithms as well as record level cross-checks.
|
||||
|
||||
|
||||
* __Extract Applications:__ An application that reads a set of records from a database or input file, selects records based on predefined rules, and writes the records to an output file.
|
||||
|
||||
|
||||
* __Extract/Update Applications:__ An application that reads records from a database or an input file, and makes changes to a database or an output file driven by the data found in each input record.
|
||||
|
||||
|
||||
* __Processing and Updating Applications:__ An application that performs processing on input transactions from an extract or a validation application. The processing will usually involve reading a database to obtain data required for processing, potentially updating the database and creating records for output processing.
|
||||
|
||||
|
||||
* __Output/Format Applications:__ Applications reading an input file, restructures data from this record according to a standard format, and produces an output file for printing or transmission to another program or system.
|
||||
|
||||
Additionally a basic application shell should be provided for business logic that cannot be built using the previously mentioned building blocks.
|
||||
|
||||
In addition to the main building blocks, each application may use one or more of standard utility steps, such as:
|
||||
|
||||
|
||||
* Sort - A Program that reads an input file and produces an output file where records have been re-sequenced according to a sort key field in the records. Sorts are usually performed by standard system utilities.
|
||||
|
||||
|
||||
* Split - A program that reads a single input file, and writes each record to one of several output files based on a field value. Splits can be tailored or performed by parameter-driven standard system utilities.
|
||||
|
||||
|
||||
* Merge - A program that reads records from multiple input files and produces one output file with combined data from the input files. Merges can be tailored or performed by parameter-driven standard system utilities.
|
||||
|
||||
Batch applications can additionally be categorized by their input source:
|
||||
|
||||
|
||||
* Database-driven applications are driven by rows or values retrieved from the database.
|
||||
|
||||
|
||||
* File-driven applications are driven by records or values retrieved from a file.
|
||||
|
||||
|
||||
* Message-driven applications are driven by messages retrieved from a message queue.
|
||||
|
||||
The foundation of any batch system is the processing strategy. Factors affecting the selection of the strategy include: estimated batch system volume, concurrency with on-line or with another batch systems, available batch windows (and with more enterprises wanting to be up and running 24x7, this leaves no obvious batch windows).
|
||||
|
||||
Typical processing options for batch are:
|
||||
|
||||
|
||||
* Normal processing in a batch window during off-line
|
||||
|
||||
|
||||
* Concurrent batch / on-line processing
|
||||
|
||||
|
||||
* Parallel processing of many different batch runs or jobs at the same time
|
||||
|
||||
|
||||
* Partitioning (i.e. processing of many instances of the same job at the same time)
|
||||
|
||||
|
||||
* A combination of these
|
||||
|
||||
The order in the list above reflects the implementation complexity, processing in a batch window being the easiest and partitioning the most complex to implement.
|
||||
|
||||
Some or all of these options may be supported by a commercial scheduler.
|
||||
|
||||
In the following section these processing options are discussed in more detail. It is important to notice that the commit and locking strategy adopted by batch processes will be dependent on the type of processing performed, and as a rule of thumb and the on-line locking strategy should also use the same principles. Therefore, the batch architecture cannot be simply an afterthought when designing an overall architecture.
|
||||
|
||||
The locking strategy can use only normal database locks, or an additional custom locking service can be implemented in the architecture. The locking service would track database locking (for example by storing the necessary information in a dedicated db-table) and give or deny permissions to the application programs requesting a db operation. Retry logic could also be implemented by this architecture to avoid aborting a batch job in case of a lock situation.
|
||||
|
||||
*1. Normal processing in a batch window*
|
||||
For simple batch processes running in a separate batch window, where the data being updated is not required by on-line users or other batch processes, concurrency is not an issue and a single commit can be done at the end of the batch run.
|
||||
|
||||
In most cases a more robust approach is more appropriate. A thing to keep in mind is that batch systems have a tendency to grow as time goes by, both in terms of complexity and the data volumes they will handle. If no locking strategy is in place and the system still relies on a single commit point, modifying the batch programs can be painful. Therefore, even with the simplest batch systems, consider the need for commit logic for restart-recovery options as well as the information concerning the more complex cases below.
|
||||
|
||||
*2. Concurrent batch / on-line processing*
|
||||
Batch applications processing data that can simultaneously be updated by on-line users, should not lock any data (either in the database or in files) which could be required by on-line users for more than a few seconds. Also updates should be committed to the database at the end of every few transaction. This minimizes the portion of data that is unavailable to other processes and the elapsed time the data is unavailable.
|
||||
|
||||
Another option to minimize physical locking is to have a logical row-level locking implemented using either an Optimistic Locking Pattern or a Pessimistic Locking Pattern.
|
||||
|
||||
|
||||
* Optimistic locking assumes a low likelihood of record contention. It typically means inserting a timestamp column in each database table used concurrently by both batch and on-line processing. When an application fetches a row for processing, it also fetches the timestamp. As the application then tries to update the processed row, the update uses the original timestamp in the WHERE clause. If the timestamp matches, the data and the timestamp will be updated successfully. If the timestamp does not match, this indicates that another application has updated the same row between the fetch and the update attempt and therefore the update cannot be performed.
|
||||
|
||||
|
||||
* Pessimistic locking is any locking strategy that assumes there is a high likelihood of record contention and therefore either a physical or logical lock needs to be obtained at retrieval time. One type of pessimistic logical locking uses a dedicated lock-column in the database table. When an application retrieves the row for update, it sets a flag in the lock column. With the flag in place, other applications attempting to retrieve the same row will logically fail. When the application that set the flag updates the row, it also clears the flag, enabling the row to be retrieved by other applications. Please note, that the integrity of data must be maintained also between the initial fetch and the setting of the flag, for example by using db locks (e.g., SELECT FOR UPDATE). Note also that this method suffers from the same downside as physical locking except that it is somewhat easier to manage building a time-out mechanism that will get the lock released if the user goes to lunch while the record is locked.
|
||||
|
||||
These patterns are not necessarily suitable for batch processing, but they might be used for concurrent batch and on-line processing (e.g. in cases where the database doesn't support row-level locking). As a general rule, optimistic locking is more suitable for on-line applications, while pessimistic locking is more suitable for batch applications. Whenever logical locking is used, the same scheme must be used for all applications accessing data entities protected by logical locks.
|
||||
|
||||
Note that both of these solutions only address locking a single record. Often we may need to lock a logically related group of records. With physical locks, you have to manage these very carefully in order to avoid potential deadlocks. With logical locks, it is usually best to build a logical lock manager that understands the logical record groups you want to protect and can ensure that locks are coherent and non-deadlocking. This logical lock manager usually uses its own tables for lock management, contention reporting, time-out mechanism, etc.
|
||||
|
||||
*3. Parallel Processing*
|
||||
Parallel processing allows multiple batch runs / jobs to run in parallel to minimize the total elapsed batch processing time. This is not a problem as long as the jobs are not sharing the same files, db-tables or index spaces. If they do, this service should be implemented using partitioned data. Another option is to build an architecture module for maintaining interdependencies using a control table. A control table should contain a row for each shared resource and whether it is in use by an application or not. The batch architecture or the application in a parallel job would then retrieve information from that table to determine if it can get access to the resource it needs or not.
|
||||
|
||||
If the data access is not a problem, parallel processing can be implemented through the use of additional threads to process in parallel. In the mainframe environment, parallel job classes have traditionally been used, in order to ensure adequate CPU time for all the processes. Regardless, the solution has to be robust enough to ensure time slices for all the running processes.
|
||||
|
||||
Other key issues in parallel processing include load balancing and the availability of general system resources such as files, database buffer pools etc. Also note that the control table itself can easily become a critical resource.
|
||||
|
||||
*4. Partitioning*
|
||||
Using partitioning allows multiple versions of large batch applications to run concurrently. The purpose of this is to reduce the elapsed time required to process long batch jobs. Processes which can be successfully partitioned are those where the input file can be split and/or the main database tables partitioned to allow the application to run against different sets of data.
|
||||
|
||||
In addition, processes which are partitioned must be designed to only process their assigned data set. A partitioning architecture has to be closely tied to the database design and the database partitioning strategy. Please note, that the database partitioning doesn't necessarily mean physical partitioning of the database, although in most cases this is advisable. The following picture illustrates the partitioning approach:
|
||||
|
||||
.Partitioned Process
|
||||
image::{batch-asciidoc}images/partitioned.png[Figure 1.2: Partitioned Process, scaledwidth="60%"]
|
||||
|
||||
|
||||
The architecture should be flexible enough to allow dynamic configuration of the number of partitions. Both automatic and user controlled configuration should be considered. Automatic configuration may be based on parameters such as the input file size and/or the number of input records.
|
||||
|
||||
*4.1 Partitioning Approaches*
|
||||
The following lists some of the possible partitioning approaches. Selecting a partitioning approach has to be done on a case-by-case basis.
|
||||
|
||||
_1. Fixed and Even Break-Up of Record Set_
|
||||
|
||||
This involves breaking the input record set into an even number of portions (e.g. 10, where each portion will have exactly 1/10th of the entire record set). Each portion is then processed by one instance of the batch/extract application.
|
||||
|
||||
In order to use this approach, preprocessing will be required to split the recordset up. The result of this split will be a lower and upper bound placement number which can be used as input to the batch/extract application in order to restrict its processing to its portion alone.
|
||||
|
||||
Preprocessing could be a large overhead as it has to calculate and determine the bounds of each portion of the record set.
|
||||
|
||||
_2. Breakup by a Key Column_
|
||||
|
||||
This involves breaking up the input record set by a key column such as a location code, and assigning data from each key to a batch instance. In order to achieve this, column values can either be
|
||||
|
||||
_3. Assigned to a batch instance via a partitioning table (see below for details)._
|
||||
|
||||
_4. Assigned to a batch instance by a portion of the value (e.g. values 0000-0999, 1000 - 1999, etc.)_
|
||||
|
||||
Under option 1, addition of new values will mean a manual reconfiguration of the batch/extract to ensure that the new value is added to a particular instance.
|
||||
|
||||
Under option 2, this will ensure that all values are covered via an instance of the batch job. However, the number of values processed by one instance is dependent on the distribution of column values (i.e. there may be a large number of locations in the 0000-0999 range, and few in the 1000-1999 range). Under this option, the data range should be designed with partitioning in mind.
|
||||
|
||||
Under both options, the optimal even distribution of records to batch instances cannot be realized. There is no dynamic configuration of the number of batch instances used.
|
||||
|
||||
_5. Breakup by Views_
|
||||
|
||||
This approach is basically breakup by a key column, but on the database level. It involves breaking up the recordset into views. These views will be used by each instance of the batch application during its processing. The breakup will be done by grouping the data.
|
||||
|
||||
With this option, each instance of a batch application will have to be configured to hit a particular view (instead of the master table). Also, with the addition of new data values, this new group of data will have to be included into a view. There is no dynamic configuration capability, as a change in the number of instances will result in a change to the views.
|
||||
|
||||
_6. Addition of a Processing Indicator_
|
||||
|
||||
This involves the addition of a new column to the input table, which acts as an indicator. As a preprocessing step, all indicators would be marked to non-processed. During the record fetch stage of the batch application, records are read on the condition that that record is marked non-processed, and once they are read (with lock), they are marked processing. When that record is completed, the indicator is updated to either complete or error. Many instances of a batch application can be started without a change, as the additional column ensures that a record is only processed once.
|
||||
|
||||
With this option, I/O on the table increases dynamically. In the case of an updating batch application, this impact is reduced, as a write will have to occur anyway.
|
||||
|
||||
_7. Extract Table to a Flat File_
|
||||
|
||||
This involves the extraction of the table into a file. This file can then be split into multiple segments and used as input to the batch instances.
|
||||
|
||||
With this option, the additional overhead of extracting the table into a file, and splitting it, may cancel out the effect of multi-partitioning. Dynamic configuration can be achieved via changing the file splitting script.
|
||||
|
||||
_8. Use of a Hashing Column_
|
||||
|
||||
This scheme involves the addition of a hash column (key/index) to the database tables used to retrieve the driver record. This hash column will have an indicator to determine which instance of the batch application will process this particular row. For example, if there are three batch instances to be started, then an indicator of 'A' will mark that row for processing by instance 1, an indicator of 'B' will mark that row for processing by instance 2, etc.
|
||||
|
||||
The procedure used to retrieve the records would then have an additional WHERE clause to select all rows marked by a particular indicator. The inserts in this table would involve the addition of the marker field, which would be defaulted to one of the instances (e.g. 'A').
|
||||
|
||||
A simple batch application would be used to update the indicators such as to redistribute the load between the different instances. When a sufficiently large number of new rows have been added, this batch can be run (anytime, except in the batch window) to redistribute the new rows to other instances.
|
||||
|
||||
Additional instances of the batch application only require the running of the batch application as above to redistribute the indicators to cater for a new number of instances.
|
||||
|
||||
*4.2 Database and Application design Principles*
|
||||
|
||||
An architecture that supports multi-partitioned applications which run against partitioned database tables using the key column approach, should include a central partition repository for storing partition parameters. This provides flexibility and ensures maintainability. The repository will generally consist of a single table known as the partition table.
|
||||
|
||||
Information stored in the partition table will be static and in general should be maintained by the DBA. The table should consist of one row of information for each partition of a multi-partitioned application. The table should have columns for: Program ID Code, Partition Number (Logical ID of the partition), Low Value of the db key column for this partition, High Value of the db key column for this partition.
|
||||
|
||||
On program start-up the program id and partition number should be passed to the application from the architecture (Control Processing Tasklet). These variables are used to read the partition table, to determine what range of data the application is to process (if a key column approach is used). In addition the partition number must be used throughout the processing to:
|
||||
|
||||
|
||||
* Add to the output files/database updates in order for the merge process to work properly
|
||||
|
||||
|
||||
* Report normal processing to the batch log and any errors that occur during execution to the architecture error handler
|
||||
|
||||
*4.3 Minimizing Deadlocks*
|
||||
|
||||
When applications run in parallel or partitioned, contention in database resources and deadlocks may occur. It is critical that the database design team eliminates potential contention situations as far as possible as part of the database design.
|
||||
|
||||
Also ensure that the database index tables are designed with deadlock prevention and performance in mind.
|
||||
|
||||
Deadlocks or hot spots often occur in administration or architecture tables such as log tables, control tables, and lock tables. The implications of these should be taken into account as well. A realistic stress test is crucial for identifying the possible bottlenecks in the architecture.
|
||||
|
||||
To minimize the impact of conflicts on data, the architecture should provide services such as wait-and-retry intervals when attaching to a database or when encountering a deadlock. This means a built-in mechanism to react to certain database return codes and instead of issuing an immediate error handling, waiting a predetermined amount of time and retrying the database operation.
|
||||
|
||||
*4.4 Parameter Passing and Validation*
|
||||
|
||||
The partition architecture should be relatively transparent to application developers. The architecture should perform all tasks associated with running the application in a partitioned mode including:
|
||||
|
||||
|
||||
* Retrieve partition parameters before application start-up
|
||||
|
||||
|
||||
* Validate partition parameters before application start-up
|
||||
|
||||
|
||||
* Pass parameters to application at start-up
|
||||
|
||||
The validation should include checks to ensure that:
|
||||
|
||||
|
||||
* the application has sufficient partitions to cover the whole data range
|
||||
|
||||
|
||||
* there are no gaps between partitions
|
||||
|
||||
If the database is partitioned, some additional validation may be necessary to ensure that a single partition does not span database partitions.
|
||||
|
||||
Also the architecture should take into consideration the consolidation of partitions. Key questions include:
|
||||
|
||||
|
||||
* Must all the partitions be finished before going into the next job step?
|
||||
|
||||
|
||||
* What happens if one of the partitions aborts?
|
||||
|
||||
1903
spring-batch-docs/asciidoc/step.adoc
Normal file
691
spring-batch-docs/asciidoc/stylesheets/spring.css
Normal file
@@ -0,0 +1,691 @@
|
||||
@import url(https://fonts.googleapis.com/css?family=Varela+Round|Montserrat:400,700);
|
||||
/*! normalize.css v2.1.2 | MIT License | git.io/normalize */
|
||||
/* ========================================================================== HTML5 display definitions ========================================================================== */
|
||||
/** Correct `block` display not defined in IE 8/9. */
|
||||
article, aside, details, figcaption, figure, footer, header, hgroup, main, nav, section, summary { display: block; }
|
||||
|
||||
/** Correct `inline-block` display not defined in IE 8/9. */
|
||||
audio, canvas, video { display: inline-block; }
|
||||
|
||||
/** Prevent modern browsers from displaying `audio` without controls. Remove excess height in iOS 5 devices. */
|
||||
audio:not([controls]) { display: none; height: 0; }
|
||||
|
||||
/** Address `[hidden]` styling not present in IE 8/9. Hide the `template` element in IE, Safari, and Firefox < 22. */
|
||||
[hidden], template { display: none; }
|
||||
|
||||
script { display: none !important; }
|
||||
|
||||
/* ========================================================================== Base ========================================================================== */
|
||||
/** 1. Set default font family to sans-serif. 2. Prevent iOS text size adjust after orientation change, without disabling user zoom. */
|
||||
html { font-family: sans-serif; /* 1 */ -ms-text-size-adjust: 100%; /* 2 */ -webkit-text-size-adjust: 100%; /* 2 */ }
|
||||
|
||||
/** Remove default margin. */
|
||||
body { margin: 0; }
|
||||
|
||||
/* ========================================================================== Links ========================================================================== */
|
||||
/** Remove the gray background color from active links in IE 10. */
|
||||
a { background: transparent; }
|
||||
|
||||
/** Address `outline` inconsistency between Chrome and other browsers. */
|
||||
a:focus { outline: thin dotted; }
|
||||
|
||||
/** Improve readability when focused and also mouse hovered in all browsers. */
|
||||
a:active, a:hover { outline: 0; }
|
||||
|
||||
/* ========================================================================== Typography ========================================================================== */
|
||||
/** Address variable `h1` font-size and margin within `section` and `article` contexts in Firefox 4+, Safari 5, and Chrome. */
|
||||
h1 { font-size: 2em; margin: 0.67em 0; }
|
||||
|
||||
/** Address styling not present in IE 8/9, Safari 5, and Chrome. */
|
||||
abbr[title] { border-bottom: 1px dotted; }
|
||||
|
||||
/** Address style set to `bolder` in Firefox 4+, Safari 5, and Chrome. */
|
||||
b, strong { font-weight: bold; }
|
||||
|
||||
/** Address styling not present in Safari 5 and Chrome. */
|
||||
dfn { font-style: italic; }
|
||||
|
||||
/** Address differences between Firefox and other browsers. */
|
||||
hr { -moz-box-sizing: content-box; box-sizing: content-box; height: 0; }
|
||||
|
||||
/** Address styling not present in IE 8/9. */
|
||||
mark { background: #ff0; color: #000; }
|
||||
|
||||
/** Correct font family set oddly in Safari 5 and Chrome. */
|
||||
code, kbd, pre, samp { font-family: monospace, serif; font-size: 1em; }
|
||||
|
||||
/** Improve readability of pre-formatted text in all browsers. */
|
||||
pre { white-space: pre-wrap; }
|
||||
|
||||
/** Set consistent quote types. */
|
||||
q { quotes: "\201C" "\201D" "\2018" "\2019"; }
|
||||
|
||||
/** Address inconsistent and variable font size in all browsers. */
|
||||
small { font-size: 80%; }
|
||||
|
||||
/** Prevent `sub` and `sup` affecting `line-height` in all browsers. */
|
||||
sub, sup { font-size: 75%; line-height: 0; position: relative; vertical-align: baseline; }
|
||||
|
||||
sup { top: -0.5em; }
|
||||
|
||||
sub { bottom: -0.25em; }
|
||||
|
||||
/* ========================================================================== Embedded content ========================================================================== */
|
||||
/** Remove border when inside `a` element in IE 8/9. */
|
||||
img { border: 0; }
|
||||
|
||||
/** Correct overflow displayed oddly in IE 9. */
|
||||
svg:not(:root) { overflow: hidden; }
|
||||
|
||||
/* ========================================================================== Figures ========================================================================== */
|
||||
/** Address margin not present in IE 8/9 and Safari 5. */
|
||||
figure { margin: 0; }
|
||||
|
||||
/* ========================================================================== Forms ========================================================================== */
|
||||
/** Define consistent border, margin, and padding. */
|
||||
fieldset { border: 1px solid #c0c0c0; margin: 0 2px; padding: 0.35em 0.625em 0.75em; }
|
||||
|
||||
/** 1. Correct `color` not being inherited in IE 8/9. 2. Remove padding so people aren't caught out if they zero out fieldsets. */
|
||||
legend { border: 0; /* 1 */ padding: 0; /* 2 */ }
|
||||
|
||||
/** 1. Correct font family not being inherited in all browsers. 2. Correct font size not being inherited in all browsers. 3. Address margins set differently in Firefox 4+, Safari 5, and Chrome. */
|
||||
button, input, select, textarea { font-family: inherit; /* 1 */ font-size: 100%; /* 2 */ margin: 0; /* 3 */ }
|
||||
|
||||
/** Address Firefox 4+ setting `line-height` on `input` using `!important` in the UA stylesheet. */
|
||||
button, input { line-height: normal; }
|
||||
|
||||
/** Address inconsistent `text-transform` inheritance for `button` and `select`. All other form control elements do not inherit `text-transform` values. Correct `button` style inheritance in Chrome, Safari 5+, and IE 8+. Correct `select` style inheritance in Firefox 4+ and Opera. */
|
||||
button, select { text-transform: none; }
|
||||
|
||||
/** 1. Avoid the WebKit bug in Android 4.0.* where (2) destroys native `audio` and `video` controls. 2. Correct inability to style clickable `input` types in iOS. 3. Improve usability and consistency of cursor style between image-type `input` and others. */
|
||||
button, html input[type="button"], input[type="reset"], input[type="submit"] { -webkit-appearance: button; /* 2 */ cursor: pointer; /* 3 */ }
|
||||
|
||||
/** Re-set default cursor for disabled elements. */
|
||||
button[disabled], html input[disabled] { cursor: default; }
|
||||
|
||||
/** 1. Address box sizing set to `content-box` in IE 8/9. 2. Remove excess padding in IE 8/9. */
|
||||
input[type="checkbox"], input[type="radio"] { box-sizing: border-box; /* 1 */ padding: 0; /* 2 */ }
|
||||
|
||||
/** 1. Address `appearance` set to `searchfield` in Safari 5 and Chrome. 2. Address `box-sizing` set to `border-box` in Safari 5 and Chrome (include `-moz` to future-proof). */
|
||||
input[type="search"] { -webkit-appearance: textfield; /* 1 */ -moz-box-sizing: content-box; -webkit-box-sizing: content-box; /* 2 */ box-sizing: content-box; }
|
||||
|
||||
/** Remove inner padding and search cancel button in Safari 5 and Chrome on OS X. */
|
||||
input[type="search"]::-webkit-search-cancel-button, input[type="search"]::-webkit-search-decoration { -webkit-appearance: none; }
|
||||
|
||||
/** Remove inner padding and border in Firefox 4+. */
|
||||
button::-moz-focus-inner, input::-moz-focus-inner { border: 0; padding: 0; }
|
||||
|
||||
/** 1. Remove default vertical scrollbar in IE 8/9. 2. Improve readability and alignment in all browsers. */
|
||||
textarea { overflow: auto; /* 1 */ vertical-align: top; /* 2 */ }
|
||||
|
||||
/* ========================================================================== Tables ========================================================================== */
|
||||
/** Remove most spacing between table cells. */
|
||||
table { border-collapse: collapse; border-spacing: 0; }
|
||||
|
||||
meta.foundation-mq-small { font-family: "only screen and (min-width: 768px)"; width: 768px; }
|
||||
|
||||
meta.foundation-mq-medium { font-family: "only screen and (min-width:1280px)"; width: 1280px; }
|
||||
|
||||
meta.foundation-mq-large { font-family: "only screen and (min-width:1440px)"; width: 1440px; }
|
||||
|
||||
*, *:before, *:after { -moz-box-sizing: border-box; -webkit-box-sizing: border-box; box-sizing: border-box; }
|
||||
|
||||
html, body { font-size: 100%; }
|
||||
|
||||
body { background: white; color: #34302d; padding: 0; margin: 0; font-family: "Varela Round", sans-serif; font-weight: normal; font-style: normal; line-height: 1; position: relative; cursor: auto; }
|
||||
|
||||
a:hover { cursor: pointer; }
|
||||
|
||||
img, object, embed { max-width: 100%; height: auto; }
|
||||
|
||||
object, embed { height: 100%; }
|
||||
|
||||
img { -ms-interpolation-mode: bicubic; }
|
||||
|
||||
#map_canvas img, #map_canvas embed, #map_canvas object, .map_canvas img, .map_canvas embed, .map_canvas object { max-width: none !important; }
|
||||
|
||||
.left { float: left !important; }
|
||||
|
||||
.right { float: right !important; }
|
||||
|
||||
.text-left { text-align: left !important; }
|
||||
|
||||
.text-right { text-align: right !important; }
|
||||
|
||||
.text-center { text-align: center !important; }
|
||||
|
||||
.text-justify { text-align: justify !important; }
|
||||
|
||||
.hide { display: none; }
|
||||
|
||||
.antialiased { -webkit-font-smoothing: antialiased; }
|
||||
|
||||
img { display: inline-block; vertical-align: middle; }
|
||||
|
||||
textarea { height: auto; min-height: 50px; }
|
||||
|
||||
select { width: 100%; }
|
||||
|
||||
object, svg { display: inline-block; vertical-align: middle; }
|
||||
|
||||
.center { margin-left: auto; margin-right: auto; }
|
||||
|
||||
.spread { width: 100%; }
|
||||
|
||||
p.lead, .paragraph.lead > p, #preamble > .sectionbody > .paragraph:first-of-type p { font-size: 1.21875em; line-height: 1.6; }
|
||||
|
||||
.subheader, .admonitionblock td.content > .title, .audioblock > .title, .exampleblock > .title, .imageblock > .title, .listingblock > .title, .literalblock > .title, .stemblock > .title, .openblock > .title, .paragraph > .title, .quoteblock > .title, table.tableblock > .title, .verseblock > .title, .videoblock > .title, .dlist > .title, .olist > .title, .ulist > .title, .qlist > .title, .hdlist > .title { line-height: 1.45; color: #0b0a0a; font-weight: normal; margin-top: 0; margin-bottom: 0.25em; }
|
||||
|
||||
/* Typography resets */
|
||||
div, dl, dt, dd, ul, ol, li, h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6, pre, form, p, blockquote, th, td { margin: 0; padding: 0; direction: ltr; }
|
||||
|
||||
/* Default Link Styles */
|
||||
a { color: #548e2e; text-decoration: underline; line-height: inherit; }
|
||||
a:hover, a:focus { color: #487a28; }
|
||||
a img { border: none; }
|
||||
|
||||
/* Default paragraph styles */
|
||||
p { font-family: inherit; font-weight: normal; font-size: 1em; line-height: 1.6; margin-bottom: 1.25em; text-rendering: optimizeLegibility; }
|
||||
p aside { font-size: 0.875em; line-height: 1.35; font-style: italic; }
|
||||
|
||||
/* Default header styles */
|
||||
h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6 { font-family: Montserrat, sans-serif; font-weight: 400; font-style: normal; color: #34302d; text-rendering: optimizeLegibility; margin-top: 1em; margin-bottom: 0.5em; line-height: 1.0125em; }
|
||||
h1 small, h2 small, h3 small, #toctitle small, .sidebarblock > .content > .title small, h4 small, h5 small, h6 small { font-size: 60%; color: #867c74; line-height: 0; }
|
||||
|
||||
h1 { font-size: 2.125em; }
|
||||
|
||||
h2 { font-size: 1.6875em; }
|
||||
|
||||
h3, #toctitle, .sidebarblock > .content > .title { font-size: 1.375em; }
|
||||
|
||||
h4 { font-size: 1.125em; }
|
||||
|
||||
h5 { font-size: 1.125em; }
|
||||
|
||||
h6 { font-size: 1em; }
|
||||
|
||||
hr { border: solid #ddddd8; border-width: 1px 0 0; clear: both; margin: 1.25em 0 1.1875em; height: 0; }
|
||||
|
||||
/* Helpful Typography Defaults */
|
||||
em, i { font-style: italic; line-height: inherit; }
|
||||
|
||||
strong, b { font-weight: bold; line-height: inherit; }
|
||||
|
||||
small { font-size: 60%; line-height: inherit; }
|
||||
|
||||
code { font-family: Monaco, Menlo, Consolas, "Courier New", monospace; font-weight: normal; color: rgba(0, 0, 0, 0.9); }
|
||||
|
||||
/* Lists */
|
||||
ul, ol, dl { font-size: 1em; line-height: 1.6; margin-bottom: 1.25em; list-style-position: outside; font-family: inherit; }
|
||||
|
||||
ul, ol { margin-left: 1.5em; }
|
||||
ul.no-bullet, ol.no-bullet { margin-left: 1.5em; }
|
||||
|
||||
/* Unordered Lists */
|
||||
ul li ul, ul li ol { margin-left: 1.25em; margin-bottom: 0; font-size: 1em; /* Override nested font-size change */ }
|
||||
ul.square li ul, ul.circle li ul, ul.disc li ul { list-style: inherit; }
|
||||
ul.square { list-style-type: square; }
|
||||
ul.circle { list-style-type: circle; }
|
||||
ul.disc { list-style-type: disc; }
|
||||
ul.no-bullet { list-style: none; }
|
||||
|
||||
/* Ordered Lists */
|
||||
ol li ul, ol li ol { margin-left: 1.25em; margin-bottom: 0; }
|
||||
|
||||
/* Definition Lists */
|
||||
dl dt { margin-bottom: 0.3125em; font-weight: bold; }
|
||||
dl dd { margin-bottom: 1.25em; }
|
||||
|
||||
/* Abbreviations */
|
||||
abbr, acronym { text-transform: uppercase; font-size: 90%; color: #34302d; border-bottom: 1px dotted #dddddd; cursor: help; }
|
||||
|
||||
abbr { text-transform: none; }
|
||||
|
||||
/* Blockquotes */
|
||||
blockquote { margin: 0 0 1.25em; padding: 0.5625em 1.25em 0 1.1875em; border-left: 1px solid #dddddd; }
|
||||
blockquote cite { display: block; font-size: 0.9375em; color: rgba(0, 0, 0, 0.6); }
|
||||
blockquote cite:before { content: "\2014 \0020"; }
|
||||
blockquote cite a, blockquote cite a:visited { color: rgba(0, 0, 0, 0.6); }
|
||||
|
||||
blockquote, blockquote p { line-height: 1.6; color: rgba(0, 0, 0, 0.85); }
|
||||
|
||||
/* Microformats */
|
||||
.vcard { display: inline-block; margin: 0 0 1.25em 0; border: 1px solid #dddddd; padding: 0.625em 0.75em; }
|
||||
.vcard li { margin: 0; display: block; }
|
||||
.vcard .fn { font-weight: bold; font-size: 0.9375em; }
|
||||
|
||||
.vevent .summary { font-weight: bold; }
|
||||
.vevent abbr { cursor: auto; text-decoration: none; font-weight: bold; border: none; padding: 0 0.0625em; }
|
||||
|
||||
@media only screen and (min-width: 768px) { h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6 { line-height: 1.2; }
|
||||
h1 { font-size: 2.75em; }
|
||||
h2 { font-size: 2.3125em; }
|
||||
h3, #toctitle, .sidebarblock > .content > .title { font-size: 1.6875em; }
|
||||
h4 { font-size: 1.4375em; } }
|
||||
/* Tables */
|
||||
table { background: white; margin-bottom: 1.25em; border: solid 1px #dedede; }
|
||||
table thead, table tfoot { background: #f7f8f7; font-weight: bold; }
|
||||
table thead tr th, table thead tr td, table tfoot tr th, table tfoot tr td { padding: 0.5em 0.625em 0.625em; font-size: inherit; color: #34302d; text-align: left; }
|
||||
table tr th, table tr td { padding: 0.5625em 0.625em; font-size: inherit; color: #34302d; }
|
||||
table tr.even, table tr.alt, table tr:nth-of-type(even) { background: #f8f8f7; }
|
||||
table thead tr th, table tfoot tr th, table tbody tr td, table tr td, table tfoot tr td { display: table-cell; line-height: 1.6; }
|
||||
|
||||
body { -moz-osx-font-smoothing: grayscale; -webkit-font-smoothing: antialiased; tab-size: 4; }
|
||||
|
||||
h1, h2, h3, #toctitle, .sidebarblock > .content > .title, h4, h5, h6 { line-height: 1.2; word-spacing: -0.05em; }
|
||||
|
||||
.clearfix:before, .clearfix:after, .float-group:before, .float-group:after { content: " "; display: table; }
|
||||
.clearfix:after, .float-group:after { clear: both; }
|
||||
|
||||
*:not(pre) > code { font-size: 0.9375em; font-style: normal !important; letter-spacing: 0; padding: 0.1em 0.5ex; word-spacing: -0.15em; background-color: #f7f7f8; -webkit-border-radius: 4px; border-radius: 4px; line-height: 1.45; text-rendering: optimizeSpeed; word-wrap: break-word; }
|
||||
*:not(pre) > code.nobreak { word-wrap: normal; }
|
||||
*:not(pre) > code.nowrap { white-space: nowrap; }
|
||||
|
||||
pre, pre > code { line-height: 1.45; color: rgba(0, 0, 0, 0.9); font-family: Monaco, Menlo, Consolas, "Courier New", monospace; font-weight: normal; text-rendering: optimizeSpeed; }
|
||||
|
||||
em em { font-style: normal; }
|
||||
|
||||
strong strong { font-weight: normal; }
|
||||
|
||||
.keyseq { color: #6b625c; }
|
||||
|
||||
kbd { font-family: Monaco, Menlo, Consolas, "Courier New", monospace; display: inline-block; color: #34302d; font-size: 0.65em; line-height: 1.45; background-color: #f7f7f7; border: 1px solid #ccc; -webkit-border-radius: 3px; border-radius: 3px; -webkit-box-shadow: 0 1px 0 rgba(0, 0, 0, 0.2), 0 0 0 0.1em white inset; box-shadow: 0 1px 0 rgba(0, 0, 0, 0.2), 0 0 0 0.1em white inset; margin: 0 0.15em; padding: 0.2em 0.5em; vertical-align: middle; position: relative; top: -0.1em; white-space: nowrap; }
|
||||
|
||||
.keyseq kbd:first-child { margin-left: 0; }
|
||||
|
||||
.keyseq kbd:last-child { margin-right: 0; }
|
||||
|
||||
.menuseq, .menu { color: #191715; }
|
||||
|
||||
b.button:before, b.button:after { position: relative; top: -1px; font-weight: normal; }
|
||||
|
||||
b.button:before { content: "["; padding: 0 3px 0 2px; }
|
||||
|
||||
b.button:after { content: "]"; padding: 0 2px 0 3px; }
|
||||
|
||||
p a > code:hover { color: rgba(0, 0, 0, 0.9); }
|
||||
|
||||
#header, #content, #footnotes, #footer { width: 100%; margin-left: auto; margin-right: auto; margin-top: 0; margin-bottom: 0; max-width: 62.5em; *zoom: 1; position: relative; padding-left: 0.9375em; padding-right: 0.9375em; }
|
||||
#header:before, #header:after, #content:before, #content:after, #footnotes:before, #footnotes:after, #footer:before, #footer:after { content: " "; display: table; }
|
||||
#header:after, #content:after, #footnotes:after, #footer:after { clear: both; }
|
||||
|
||||
#content { margin-top: 1.25em; }
|
||||
|
||||
#content:before { content: none; }
|
||||
|
||||
#header > h1:first-child { color: rgba(0, 0, 0, 0.85); margin-top: 2.25rem; margin-bottom: 0; }
|
||||
#header > h1:first-child + #toc { margin-top: 8px; border-top: 1px solid #ddddd8; }
|
||||
#header > h1:only-child, body.toc2 #header > h1:nth-last-child(2) { border-bottom: 1px solid #ddddd8; padding-bottom: 8px; }
|
||||
#header .details { border-bottom: 1px solid #ddddd8; line-height: 1.45; padding-top: 0.25em; padding-bottom: 0.25em; padding-left: 0.25em; color: rgba(0, 0, 0, 0.6); display: -ms-flexbox; display: -webkit-flex; display: flex; -ms-flex-flow: row wrap; -webkit-flex-flow: row wrap; flex-flow: row wrap; }
|
||||
#header .details span:first-child { margin-left: -0.125em; }
|
||||
#header .details span.email a { color: rgba(0, 0, 0, 0.85); }
|
||||
#header .details br { display: none; }
|
||||
#header .details br + span:before { content: "\00a0\2013\00a0"; }
|
||||
#header .details br + span.author:before { content: "\00a0\22c5\00a0"; color: rgba(0, 0, 0, 0.85); }
|
||||
#header .details br + span#revremark:before { content: "\00a0|\00a0"; }
|
||||
#header #revnumber { text-transform: capitalize; }
|
||||
#header #revnumber:after { content: "\00a0"; }
|
||||
|
||||
#content > h1:first-child:not([class]) { color: rgba(0, 0, 0, 0.85); border-bottom: 1px solid #ddddd8; padding-bottom: 8px; margin-top: 0; padding-top: 1rem; margin-bottom: 1.25rem; }
|
||||
|
||||
#toc { border-bottom: 1px solid #efefed; padding-bottom: 0.5em; }
|
||||
#toc > ul { margin-left: 0.125em; }
|
||||
#toc ul.sectlevel0 > li > a { font-style: italic; }
|
||||
#toc ul.sectlevel0 ul.sectlevel1 { margin: 0.5em 0; }
|
||||
#toc ul { font-family: Montserrat, sans-serif; list-style-type: none; }
|
||||
#toc li { line-height: 1.3334; margin-top: 0.3334em; }
|
||||
#toc a { text-decoration: none; }
|
||||
#toc a:active { text-decoration: underline; }
|
||||
|
||||
#toctitle { color: #0b0a0a; font-size: 1.2em; }
|
||||
|
||||
@media only screen and (min-width: 768px) { #toctitle { font-size: 1.375em; }
|
||||
body.toc2 { padding-left: 15em; padding-right: 0; }
|
||||
#toc.toc2 { margin-top: 0 !important; background-color: #f1f1f1; position: fixed; width: 15em; left: 0; top: 0; border-right: 1px solid #efefed; border-top-width: 0 !important; border-bottom-width: 0 !important; z-index: 1000; padding: 1.25em 1em; height: 100%; overflow: auto; }
|
||||
#toc.toc2 #toctitle { margin-top: 0; margin-bottom: 0.8rem; font-size: 1.2em; }
|
||||
#toc.toc2 > ul { font-size: 0.9em; margin-bottom: 0; }
|
||||
#toc.toc2 ul ul { margin-left: 0; padding-left: 1em; }
|
||||
#toc.toc2 ul.sectlevel0 ul.sectlevel1 { padding-left: 0; margin-top: 0.5em; margin-bottom: 0.5em; }
|
||||
body.toc2.toc-right { padding-left: 0; padding-right: 15em; }
|
||||
body.toc2.toc-right #toc.toc2 { border-right-width: 0; border-left: 1px solid #efefed; left: auto; right: 0; } }
|
||||
@media only screen and (min-width: 1280px) { body.toc2 { padding-left: 20em; padding-right: 0; }
|
||||
#toc.toc2 { width: 20em; }
|
||||
#toc.toc2 #toctitle { font-size: 1.375em; }
|
||||
#toc.toc2 > ul { font-size: 0.95em; }
|
||||
#toc.toc2 ul ul { padding-left: 1.25em; }
|
||||
body.toc2.toc-right { padding-left: 0; padding-right: 20em; } }
|
||||
#content #toc { border-style: solid; border-width: 1px; border-color: #d7d7d7; margin-bottom: 1.25em; padding: 1.25em; background: #f1f1f1; -webkit-border-radius: 4px; border-radius: 4px; }
|
||||
#content #toc > :first-child { margin-top: 0; }
|
||||
#content #toc > :last-child { margin-bottom: 0; }
|
||||
|
||||
#footer { max-width: 100%; background-color: #34302d; padding: 1.25em; }
|
||||
|
||||
#footer-text { color: #cbcfd2; line-height: 1.44; }
|
||||
|
||||
.sect1 { padding-bottom: 0.625em; }
|
||||
|
||||
@media only screen and (min-width: 768px) { .sect1 { padding-bottom: 1.25em; } }
|
||||
.sect1 + .sect1 { border-top: 1px solid #efefed; }
|
||||
|
||||
#content h1 > a.anchor, h2 > a.anchor, h3 > a.anchor, #toctitle > a.anchor, .sidebarblock > .content > .title > a.anchor, h4 > a.anchor, h5 > a.anchor, h6 > a.anchor { position: absolute; z-index: 1001; width: 1.5ex; margin-left: -1.5ex; display: block; text-decoration: none !important; visibility: hidden; text-align: center; font-weight: normal; }
|
||||
#content h1 > a.anchor:before, h2 > a.anchor:before, h3 > a.anchor:before, #toctitle > a.anchor:before, .sidebarblock > .content > .title > a.anchor:before, h4 > a.anchor:before, h5 > a.anchor:before, h6 > a.anchor:before { content: "\00A7"; font-size: 0.85em; display: block; padding-top: 0.1em; }
|
||||
#content h1:hover > a.anchor, #content h1 > a.anchor:hover, h2:hover > a.anchor, h2 > a.anchor:hover, h3:hover > a.anchor, #toctitle:hover > a.anchor, .sidebarblock > .content > .title:hover > a.anchor, h3 > a.anchor:hover, #toctitle > a.anchor:hover, .sidebarblock > .content > .title > a.anchor:hover, h4:hover > a.anchor, h4 > a.anchor:hover, h5:hover > a.anchor, h5 > a.anchor:hover, h6:hover > a.anchor, h6 > a.anchor:hover { visibility: visible; }
|
||||
#content h1 > a.link, h2 > a.link, h3 > a.link, #toctitle > a.link, .sidebarblock > .content > .title > a.link, h4 > a.link, h5 > a.link, h6 > a.link { color: #34302d; text-decoration: none; }
|
||||
#content h1 > a.link:hover, h2 > a.link:hover, h3 > a.link:hover, #toctitle > a.link:hover, .sidebarblock > .content > .title > a.link:hover, h4 > a.link:hover, h5 > a.link:hover, h6 > a.link:hover { color: #262321; }
|
||||
|
||||
.audioblock, .imageblock, .literalblock, .listingblock, .stemblock, .videoblock { margin-bottom: 1.25em; }
|
||||
|
||||
.admonitionblock td.content > .title, .audioblock > .title, .exampleblock > .title, .imageblock > .title, .listingblock > .title, .literalblock > .title, .stemblock > .title, .openblock > .title, .paragraph > .title, .quoteblock > .title, table.tableblock > .title, .verseblock > .title, .videoblock > .title, .dlist > .title, .olist > .title, .ulist > .title, .qlist > .title, .hdlist > .title { text-rendering: optimizeLegibility; text-align: left; font-family: "Varela Round", sans-serif; font-size: 1rem; font-style: italic; }
|
||||
|
||||
table.tableblock > caption.title { white-space: nowrap; overflow: visible; max-width: 0; }
|
||||
|
||||
.paragraph.lead > p, #preamble > .sectionbody > .paragraph:first-of-type p { color: rgba(0, 0, 0, 0.85); }
|
||||
|
||||
table.tableblock #preamble > .sectionbody > .paragraph:first-of-type p { font-size: inherit; }
|
||||
|
||||
.admonitionblock > table { border-collapse: separate; border: 0; background: none; width: 100%; }
|
||||
.admonitionblock > table td.icon { text-align: center; width: 80px; }
|
||||
.admonitionblock > table td.icon img { max-width: initial; }
|
||||
.admonitionblock > table td.icon .title { font-weight: bold; font-family: Montserrat, sans-serif; text-transform: uppercase; }
|
||||
.admonitionblock > table td.content { padding-left: 1.125em; padding-right: 1.25em; border-left: 1px solid #ddddd8; color: rgba(0, 0, 0, 0.6); }
|
||||
.admonitionblock > table td.content > :last-child > :last-child { margin-bottom: 0; }
|
||||
|
||||
.exampleblock > .content { border-style: solid; border-width: 1px; border-color: #e6e6e6; margin-bottom: 1.25em; padding: 1.25em; background: white; -webkit-border-radius: 4px; border-radius: 4px; }
|
||||
.exampleblock > .content > :first-child { margin-top: 0; }
|
||||
.exampleblock > .content > :last-child { margin-bottom: 0; }
|
||||
|
||||
.sidebarblock { border-style: solid; border-width: 1px; border-color: #d7d7d7; margin-bottom: 1.25em; padding: 1.25em; background: #f1f1f1; -webkit-border-radius: 4px; border-radius: 4px; }
|
||||
.sidebarblock > :first-child { margin-top: 0; }
|
||||
.sidebarblock > :last-child { margin-bottom: 0; }
|
||||
.sidebarblock > .content > .title { color: #0b0a0a; margin-top: 0; text-align: center; }
|
||||
|
||||
.exampleblock > .content > :last-child > :last-child, .exampleblock > .content .olist > ol > li:last-child > :last-child, .exampleblock > .content .ulist > ul > li:last-child > :last-child, .exampleblock > .content .qlist > ol > li:last-child > :last-child, .sidebarblock > .content > :last-child > :last-child, .sidebarblock > .content .olist > ol > li:last-child > :last-child, .sidebarblock > .content .ulist > ul > li:last-child > :last-child, .sidebarblock > .content .qlist > ol > li:last-child > :last-child { margin-bottom: 0; }
|
||||
|
||||
.literalblock pre, .listingblock pre:not(.highlight), .listingblock pre[class="highlight"], .listingblock pre[class^="highlight "], .listingblock pre.CodeRay, .listingblock pre.prettyprint { background: whitesmoke; }
|
||||
.sidebarblock .literalblock pre, .sidebarblock .listingblock pre:not(.highlight), .sidebarblock .listingblock pre[class="highlight"], .sidebarblock .listingblock pre[class^="highlight "], .sidebarblock .listingblock pre.CodeRay, .sidebarblock .listingblock pre.prettyprint { background: #f2f1f1; }
|
||||
|
||||
.literalblock pre, .literalblock pre[class], .listingblock pre, .listingblock pre[class] { border: 1px solid #dddddd; -webkit-border-radius: 4px; border-radius: 4px; word-wrap: break-word; padding: 1em; font-size: 0.8125em; }
|
||||
.literalblock pre.nowrap, .literalblock pre[class].nowrap, .listingblock pre.nowrap, .listingblock pre[class].nowrap { overflow-x: auto; white-space: pre; word-wrap: normal; }
|
||||
@media only screen and (min-width: 768px) { .literalblock pre, .literalblock pre[class], .listingblock pre, .listingblock pre[class] { font-size: 0.90625em; } }
|
||||
@media only screen and (min-width: 1280px) { .literalblock pre, .literalblock pre[class], .listingblock pre, .listingblock pre[class] { font-size: 1em; } }
|
||||
|
||||
.literalblock.output pre { color: whitesmoke; background-color: rgba(0, 0, 0, 0.9); }
|
||||
|
||||
.listingblock pre.highlightjs { padding: 0; }
|
||||
.listingblock pre.highlightjs > code { padding: 1em; -webkit-border-radius: 4px; border-radius: 4px; }
|
||||
|
||||
.listingblock > .content { position: relative; }
|
||||
|
||||
.listingblock code[data-lang]:before { display: none; content: attr(data-lang); position: absolute; font-size: 0.75em; top: 0.425rem; right: 0.5rem; line-height: 1; text-transform: uppercase; color: #999; }
|
||||
|
||||
.listingblock:hover code[data-lang]:before { display: block; }
|
||||
|
||||
.listingblock.terminal pre .command:before { content: attr(data-prompt); padding-right: 0.5em; color: #999; }
|
||||
|
||||
.listingblock.terminal pre .command:not([data-prompt]):before { content: "$"; }
|
||||
|
||||
table.pyhltable { border-collapse: separate; border: 0; margin-bottom: 0; background: none; }
|
||||
|
||||
table.pyhltable td { vertical-align: top; padding-top: 0; padding-bottom: 0; line-height: 1.45; }
|
||||
|
||||
table.pyhltable td.code { padding-left: .75em; padding-right: 0; }
|
||||
|
||||
pre.pygments .lineno, table.pyhltable td:not(.code) { color: #999; padding-left: 0; padding-right: .5em; border-right: 1px solid #ddddd8; }
|
||||
|
||||
pre.pygments .lineno { display: inline-block; margin-right: .25em; }
|
||||
|
||||
table.pyhltable .linenodiv { background: none !important; padding-right: 0 !important; }
|
||||
|
||||
.quoteblock { margin: 0 1em 1.25em 1.5em; display: table; }
|
||||
.quoteblock > .title { margin-left: -1.5em; margin-bottom: 0.75em; }
|
||||
.quoteblock blockquote, .quoteblock blockquote p { color: rgba(0, 0, 0, 0.85); font-size: 1.15rem; line-height: 1.75; word-spacing: 0.1em; letter-spacing: 0; font-style: italic; text-align: justify; }
|
||||
.quoteblock blockquote { margin: 0; padding: 0; border: 0; }
|
||||
.quoteblock blockquote:before { content: "\201c"; float: left; font-size: 2.75em; font-weight: bold; line-height: 0.6em; margin-left: -0.6em; color: #0b0a0a; text-shadow: 0 1px 2px rgba(0, 0, 0, 0.1); }
|
||||
.quoteblock blockquote > .paragraph:last-child p { margin-bottom: 0; }
|
||||
.quoteblock .attribution { margin-top: 0.5em; margin-right: 0.5ex; text-align: right; }
|
||||
.quoteblock .quoteblock { margin-left: 0; margin-right: 0; padding: 0.5em 0; border-left: 3px solid rgba(0, 0, 0, 0.6); }
|
||||
.quoteblock .quoteblock blockquote { padding: 0 0 0 0.75em; }
|
||||
.quoteblock .quoteblock blockquote:before { display: none; }
|
||||
|
||||
.verseblock { margin: 0 1em 1.25em 1em; }
|
||||
.verseblock pre { font-family: "Open Sans", "DejaVu Sans", sans; font-size: 1.15rem; color: rgba(0, 0, 0, 0.85); font-weight: 300; text-rendering: optimizeLegibility; }
|
||||
.verseblock pre strong { font-weight: 400; }
|
||||
.verseblock .attribution { margin-top: 1.25rem; margin-left: 0.5ex; }
|
||||
|
||||
.quoteblock .attribution, .verseblock .attribution { font-size: 0.9375em; line-height: 1.45; font-style: italic; }
|
||||
.quoteblock .attribution br, .verseblock .attribution br { display: none; }
|
||||
.quoteblock .attribution cite, .verseblock .attribution cite { display: block; letter-spacing: -0.025em; color: rgba(0, 0, 0, 0.6); }
|
||||
|
||||
.quoteblock.abstract { margin: 0 0 1.25em 0; display: block; }
|
||||
.quoteblock.abstract blockquote, .quoteblock.abstract blockquote p { text-align: left; word-spacing: 0; }
|
||||
.quoteblock.abstract blockquote:before, .quoteblock.abstract blockquote p:first-of-type:before { display: none; }
|
||||
|
||||
table.tableblock { max-width: 100%; border-collapse: separate; }
|
||||
table.tableblock td > .paragraph:last-child p > p:last-child, table.tableblock th > p:last-child, table.tableblock td > p:last-child { margin-bottom: 0; }
|
||||
|
||||
table.tableblock, th.tableblock, td.tableblock { border: 0 solid #dedede; }
|
||||
|
||||
table.grid-all th.tableblock, table.grid-all td.tableblock { border-width: 0 1px 1px 0; }
|
||||
|
||||
table.grid-all tfoot > tr > th.tableblock, table.grid-all tfoot > tr > td.tableblock { border-width: 1px 1px 0 0; }
|
||||
|
||||
table.grid-cols th.tableblock, table.grid-cols td.tableblock { border-width: 0 1px 0 0; }
|
||||
|
||||
table.grid-all * > tr > .tableblock:last-child, table.grid-cols * > tr > .tableblock:last-child { border-right-width: 0; }
|
||||
|
||||
table.grid-rows th.tableblock, table.grid-rows td.tableblock { border-width: 0 0 1px 0; }
|
||||
|
||||
table.grid-all tbody > tr:last-child > th.tableblock, table.grid-all tbody > tr:last-child > td.tableblock, table.grid-all thead:last-child > tr > th.tableblock, table.grid-rows tbody > tr:last-child > th.tableblock, table.grid-rows tbody > tr:last-child > td.tableblock, table.grid-rows thead:last-child > tr > th.tableblock { border-bottom-width: 0; }
|
||||
|
||||
table.grid-rows tfoot > tr > th.tableblock, table.grid-rows tfoot > tr > td.tableblock { border-width: 1px 0 0 0; }
|
||||
|
||||
table.frame-all { border-width: 1px; }
|
||||
|
||||
table.frame-sides { border-width: 0 1px; }
|
||||
|
||||
table.frame-topbot { border-width: 1px 0; }
|
||||
|
||||
th.halign-left, td.halign-left { text-align: left; }
|
||||
|
||||
th.halign-right, td.halign-right { text-align: right; }
|
||||
|
||||
th.halign-center, td.halign-center { text-align: center; }
|
||||
|
||||
th.valign-top, td.valign-top { vertical-align: top; }
|
||||
|
||||
th.valign-bottom, td.valign-bottom { vertical-align: bottom; }
|
||||
|
||||
th.valign-middle, td.valign-middle { vertical-align: middle; }
|
||||
|
||||
table thead th, table tfoot th { font-weight: bold; }
|
||||
|
||||
tbody tr th { display: table-cell; line-height: 1.6; background: #f7f8f7; }
|
||||
|
||||
tbody tr th, tbody tr th p, tfoot tr th, tfoot tr th p { color: #34302d; font-weight: bold; }
|
||||
|
||||
p.tableblock > code:only-child { background: none; padding: 0; }
|
||||
|
||||
p.tableblock { font-size: 1em; }
|
||||
|
||||
td > div.verse { white-space: pre; }
|
||||
|
||||
ol { margin-left: 1.75em; }
|
||||
|
||||
ul li ol { margin-left: 1.5em; }
|
||||
|
||||
dl dd { margin-left: 1.125em; }
|
||||
|
||||
dl dd:last-child, dl dd:last-child > :last-child { margin-bottom: 0; }
|
||||
|
||||
ol > li p, ul > li p, ul dd, ol dd, .olist .olist, .ulist .ulist, .ulist .olist, .olist .ulist { margin-bottom: 0.625em; }
|
||||
|
||||
ul.unstyled, ol.unnumbered, ul.checklist, ul.none { list-style-type: none; }
|
||||
|
||||
ul.unstyled, ol.unnumbered, ul.checklist { margin-left: 0.625em; }
|
||||
|
||||
ul.checklist li > p:first-child > .fa-square-o:first-child, ul.checklist li > p:first-child > .fa-check-square-o:first-child { width: 1em; font-size: 0.85em; }
|
||||
|
||||
ul.checklist li > p:first-child > input[type="checkbox"]:first-child { width: 1em; position: relative; top: 1px; }
|
||||
|
||||
ul.inline { margin: 0 auto 0.625em auto; margin-left: -1.375em; margin-right: 0; padding: 0; list-style: none; overflow: hidden; }
|
||||
ul.inline > li { list-style: none; float: left; margin-left: 1.375em; display: block; }
|
||||
ul.inline > li > * { display: block; }
|
||||
|
||||
.unstyled dl dt { font-weight: normal; font-style: normal; }
|
||||
|
||||
ol.arabic { list-style-type: decimal; }
|
||||
|
||||
ol.decimal { list-style-type: decimal-leading-zero; }
|
||||
|
||||
ol.loweralpha { list-style-type: lower-alpha; }
|
||||
|
||||
ol.upperalpha { list-style-type: upper-alpha; }
|
||||
|
||||
ol.lowerroman { list-style-type: lower-roman; }
|
||||
|
||||
ol.upperroman { list-style-type: upper-roman; }
|
||||
|
||||
ol.lowergreek { list-style-type: lower-greek; }
|
||||
|
||||
.hdlist > table, .colist > table { border: 0; background: none; }
|
||||
.hdlist > table > tbody > tr, .colist > table > tbody > tr { background: none; }
|
||||
|
||||
td.hdlist1, td.hdlist2 { vertical-align: top; padding: 0 0.625em; }
|
||||
|
||||
td.hdlist1 { font-weight: bold; padding-bottom: 1.25em; }
|
||||
|
||||
.literalblock + .colist, .listingblock + .colist { margin-top: -0.5em; }
|
||||
|
||||
.colist > table tr > td:first-of-type { padding: 0 0.75em; line-height: 1; }
|
||||
.colist > table tr > td:first-of-type img { max-width: initial; }
|
||||
.colist > table tr > td:last-of-type { padding: 0.25em 0; }
|
||||
|
||||
.thumb, .th { line-height: 0; display: inline-block; border: solid 4px white; -webkit-box-shadow: 0 0 0 1px #dddddd; box-shadow: 0 0 0 1px #dddddd; }
|
||||
|
||||
.imageblock.left, .imageblock[style*="float: left"] { margin: 0.25em 0.625em 1.25em 0; }
|
||||
.imageblock.right, .imageblock[style*="float: right"] { margin: 0.25em 0 1.25em 0.625em; }
|
||||
.imageblock > .title { margin-bottom: 0; }
|
||||
.imageblock.thumb, .imageblock.th { border-width: 6px; }
|
||||
.imageblock.thumb > .title, .imageblock.th > .title { padding: 0 0.125em; }
|
||||
|
||||
.image.left, .image.right { margin-top: 0.25em; margin-bottom: 0.25em; display: inline-block; line-height: 0; }
|
||||
.image.left { margin-right: 0.625em; }
|
||||
.image.right { margin-left: 0.625em; }
|
||||
|
||||
a.image { text-decoration: none; display: inline-block; }
|
||||
a.image object { pointer-events: none; }
|
||||
|
||||
sup.footnote, sup.footnoteref { font-size: 0.875em; position: static; vertical-align: super; }
|
||||
sup.footnote a, sup.footnoteref a { text-decoration: none; }
|
||||
sup.footnote a:active, sup.footnoteref a:active { text-decoration: underline; }
|
||||
|
||||
#footnotes { padding-top: 0.75em; padding-bottom: 0.75em; margin-bottom: 0.625em; }
|
||||
#footnotes hr { width: 20%; min-width: 6.25em; margin: -0.25em 0 0.75em 0; border-width: 1px 0 0 0; }
|
||||
#footnotes .footnote { padding: 0 0.375em 0 0.225em; line-height: 1.3334; font-size: 0.875em; margin-left: 1.2em; text-indent: -1.05em; margin-bottom: 0.2em; }
|
||||
#footnotes .footnote a:first-of-type { font-weight: bold; text-decoration: none; }
|
||||
#footnotes .footnote:last-of-type { margin-bottom: 0; }
|
||||
#content #footnotes { margin-top: -0.625em; margin-bottom: 0; padding: 0.75em 0; }
|
||||
|
||||
.gist .file-data > table { border: 0; background: #fff; width: 100%; margin-bottom: 0; }
|
||||
.gist .file-data > table td.line-data { width: 99%; }
|
||||
|
||||
div.unbreakable { page-break-inside: avoid; }
|
||||
|
||||
.big { font-size: larger; }
|
||||
|
||||
.small { font-size: smaller; }
|
||||
|
||||
.underline { text-decoration: underline; }
|
||||
|
||||
.overline { text-decoration: overline; }
|
||||
|
||||
.line-through { text-decoration: line-through; }
|
||||
|
||||
.aqua { color: #00bfbf; }
|
||||
|
||||
.aqua-background { background-color: #00fafa; }
|
||||
|
||||
.black { color: black; }
|
||||
|
||||
.black-background { background-color: black; }
|
||||
|
||||
.blue { color: #0000bf; }
|
||||
|
||||
.blue-background { background-color: #0000fa; }
|
||||
|
||||
.fuchsia { color: #bf00bf; }
|
||||
|
||||
.fuchsia-background { background-color: #fa00fa; }
|
||||
|
||||
.gray { color: #606060; }
|
||||
|
||||
.gray-background { background-color: #7d7d7d; }
|
||||
|
||||
.green { color: #006000; }
|
||||
|
||||
.green-background { background-color: #007d00; }
|
||||
|
||||
.lime { color: #00bf00; }
|
||||
|
||||
.lime-background { background-color: #00fa00; }
|
||||
|
||||
.maroon { color: #600000; }
|
||||
|
||||
.maroon-background { background-color: #7d0000; }
|
||||
|
||||
.navy { color: #000060; }
|
||||
|
||||
.navy-background { background-color: #00007d; }
|
||||
|
||||
.olive { color: #606000; }
|
||||
|
||||
.olive-background { background-color: #7d7d00; }
|
||||
|
||||
.purple { color: #600060; }
|
||||
|
||||
.purple-background { background-color: #7d007d; }
|
||||
|
||||
.red { color: #bf0000; }
|
||||
|
||||
.red-background { background-color: #fa0000; }
|
||||
|
||||
.silver { color: #909090; }
|
||||
|
||||
.silver-background { background-color: #bcbcbc; }
|
||||
|
||||
.teal { color: #006060; }
|
||||
|
||||
.teal-background { background-color: #007d7d; }
|
||||
|
||||
.white { color: #bfbfbf; }
|
||||
|
||||
.white-background { background-color: #fafafa; }
|
||||
|
||||
.yellow { color: #bfbf00; }
|
||||
|
||||
.yellow-background { background-color: #fafa00; }
|
||||
|
||||
span.icon > .fa { cursor: default; }
|
||||
|
||||
.admonitionblock td.icon [class^="fa icon-"] { font-size: 2.5em; text-shadow: 1px 1px 2px rgba(0, 0, 0, 0.5); cursor: default; }
|
||||
.admonitionblock td.icon .icon-note:before { content: "\f05a"; color: #3f6a22; }
|
||||
.admonitionblock td.icon .icon-tip:before { content: "\f0eb"; text-shadow: 1px 1px 2px rgba(155, 155, 0, 0.8); color: #111; }
|
||||
.admonitionblock td.icon .icon-warning:before { content: "\f071"; color: #bf6900; }
|
||||
.admonitionblock td.icon .icon-caution:before { content: "\f06d"; color: #bf3400; }
|
||||
.admonitionblock td.icon .icon-important:before { content: "\f06a"; color: #bf0000; }
|
||||
|
||||
.conum[data-value] { display: inline-block; color: #fff !important; background-color: #34302d; -webkit-border-radius: 100px; border-radius: 100px; text-align: center; font-size: 0.75em; width: 1.67em; height: 1.67em; line-height: 1.67em; font-family: "Open Sans", "DejaVu Sans", sans-serif; font-style: normal; font-weight: bold; }
|
||||
.conum[data-value] * { color: #fff !important; }
|
||||
.conum[data-value] + b { display: none; }
|
||||
.conum[data-value]:after { content: attr(data-value); }
|
||||
pre .conum[data-value] { position: relative; top: -0.125em; }
|
||||
|
||||
b.conum * { color: inherit !important; }
|
||||
|
||||
.conum:not([data-value]):empty { display: none; }
|
||||
|
||||
.admonitionblock { border-left: 4px solid #6db33f; background-color: #ebf1e7; padding: 1.2em 0; margin: 30px 0; width: auto; }
|
||||
|
||||
#toc a:hover { text-decoration: underline; }
|
||||
|
||||
.admonitionblock > table td.content { border-left: none; }
|
||||
316
spring-batch-docs/asciidoc/testing.adoc
Normal file
@@ -0,0 +1,316 @@
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
[[testing]]
|
||||
|
||||
== Unit Testing
|
||||
|
||||
Just as with other application styles, it is extremely important to
|
||||
unit test any code written as part of a batch job as well. The Spring core
|
||||
documentation covers how to unit and integration test with Spring in great
|
||||
detail, so it won't be repeated here. It is important, however, to think
|
||||
about how to 'end to end' test a batch job, which is what this chapter will
|
||||
focus on. The spring-batch-test project includes classes that will help
|
||||
facilitate this end-to-end test approach.
|
||||
|
||||
[[creatingUnitTestClass]]
|
||||
|
||||
|
||||
=== Creating a Unit Test Class
|
||||
|
||||
In order for the unit test to run a batch job, the framework must
|
||||
load the job's ApplicationContext. Two annotations are used to trigger
|
||||
this:
|
||||
|
||||
|
||||
* `@RunWith(SpringJUnit4ClassRunner.class)`:
|
||||
Indicates that the class should use Spring's JUnit facilities
|
||||
|
||||
|
||||
* `@ContextConfiguration(locations = {...})`:
|
||||
Indicates which XML files contain the ApplicationContext.
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
@RunWith(SpringJUnit4ClassRunner.class)
|
||||
@ContextConfiguration(locations = { "/simple-job-launcher-context.xml",
|
||||
"/jobs/skipSampleJob.xml" })
|
||||
public class SkipSampleFunctionalTests { ... }
|
||||
----
|
||||
|
||||
[[endToEndTesting]]
|
||||
|
||||
|
||||
=== End-To-End Testing of Batch Jobs
|
||||
|
||||
'End To End' testing can be defined as testing the complete run of a
|
||||
batch job from beginning to end. This allows for a test that sets up a
|
||||
test condition, executes the job, and verifies the end result.
|
||||
|
||||
In the example below, the batch job reads from the database and
|
||||
writes to a flat file. The test method begins by setting up the database
|
||||
with test data. It clears the CUSTOMER table and then inserts 10 new
|
||||
records. The test then launches the `Job` using the
|
||||
`launchJob()` method. The
|
||||
`launchJob()` method is provided by the
|
||||
`JobLauncherTestUtils` class. Also provided by the
|
||||
utils class is `launchJob(JobParameters)`, which
|
||||
allows the test to give particular parameters. The
|
||||
`launchJob()` method returns the
|
||||
`JobExecution` object which is useful for asserting
|
||||
particular information about the `Job` run. In the
|
||||
case below, the test verifies that the `Job` ended
|
||||
with status "COMPLETED".
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
@RunWith(SpringJUnit4ClassRunner.class)
|
||||
@ContextConfiguration(locations = { "/simple-job-launcher-context.xml",
|
||||
"/jobs/skipSampleJob.xml" })
|
||||
public class SkipSampleFunctionalTests {
|
||||
|
||||
@Autowired
|
||||
private JobLauncherTestUtils jobLauncherTestUtils;
|
||||
|
||||
private SimpleJdbcTemplate simpleJdbcTemplate;
|
||||
|
||||
@Autowired
|
||||
public void setDataSource(DataSource dataSource) {
|
||||
this.simpleJdbcTemplate = new SimpleJdbcTemplate(dataSource);
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testJob() throws Exception {
|
||||
simpleJdbcTemplate.update("delete from CUSTOMER");
|
||||
for (int i = 1; i <= 10; i++) {
|
||||
simpleJdbcTemplate.update("insert into CUSTOMER values (?, 0, ?, 100000)",
|
||||
i, "customer" + i);
|
||||
}
|
||||
|
||||
JobExecution jobExecution = jobLauncherTestUtils.launchJob().getStatus();
|
||||
|
||||
|
||||
Assert.assertEquals("COMPLETED", jobExecution.getExitStatus());
|
||||
}
|
||||
}
|
||||
----
|
||||
|
||||
[[testingIndividualSteps]]
|
||||
|
||||
|
||||
=== Testing Individual Steps
|
||||
|
||||
For complex batch jobs, test cases in the end-to-end testing
|
||||
approach may become unmanageable. It these cases, it may be more useful to
|
||||
have test cases to test individual steps on their own. The
|
||||
`AbstractJobTests` class contains a method
|
||||
`launchStep` that takes a step name and runs just
|
||||
that particular `Step`. This approach allows for more
|
||||
targeted tests by allowing the test to set up data for just that step and
|
||||
to validate its results directly.
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
JobExecution jobExecution = jobLauncherTestUtils.launchStep("loadFileStep");
|
||||
----
|
||||
|
||||
|
||||
|
||||
=== Testing Step-Scoped Components
|
||||
|
||||
Often the components that are configured for your steps at runtime
|
||||
use step scope and late binding to inject context from the step or job
|
||||
execution. These are tricky to test as standalone components unless you
|
||||
have a way to set the context as if they were in a step execution. That is
|
||||
the goal of two components in Spring Batch: the
|
||||
`StepScopeTestExecutionListener` and the
|
||||
`StepScopeTestUtils`.
|
||||
|
||||
The listener is declared at the class level, and its job is to
|
||||
create a step execution context for each test method. For example:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
@ContextConfiguration
|
||||
@TestExecutionListeners( { DependencyInjectionTestExecutionListener.class,
|
||||
StepScopeTestExecutionListener.class })
|
||||
@RunWith(SpringJUnit4ClassRunner.class)
|
||||
public class StepScopeTestExecutionListenerIntegrationTests {
|
||||
|
||||
// This component is defined step-scoped, so it cannot be injected unless
|
||||
// a step is active...
|
||||
@Autowired
|
||||
private ItemReader<String> reader;
|
||||
|
||||
public StepExecution getStepExection() {
|
||||
StepExecution execution = MetaDataInstanceFactory.createStepExecution();
|
||||
execution.getExecutionContext().putString("input.data", "foo,bar,spam");
|
||||
return execution;
|
||||
}
|
||||
|
||||
@Test
|
||||
public void testReader() {
|
||||
// The reader is initialized and bound to the input data
|
||||
assertNotNull(reader.read());
|
||||
}
|
||||
|
||||
}
|
||||
----
|
||||
|
||||
There are two `TestExecutionListeners`, one
|
||||
from the regular Spring Test framework and handles dependency injection
|
||||
from the configured application context, injecting the reader, and the
|
||||
other is the Spring Batch
|
||||
`StepScopeTestExecutionListener`. It works by looking
|
||||
for a factory method in the test case for a
|
||||
`StepExecution`, and using that as the context for
|
||||
the test method, as if that execution was active in a `Step` at runtime. The
|
||||
factory method is detected by its signature (it just has to return a
|
||||
`StepExecution`). If a factory method is not provided
|
||||
then a default `StepExecution` is created.
|
||||
|
||||
The listener approach is convenient if you want the duration of the
|
||||
step scope to be the execution of the test method. For a more flexible,
|
||||
but more invasive approach you can use the
|
||||
`StepScopeTestUtils`. For example, to count the
|
||||
number of items available in the reader above:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
int count = StepScopeTestUtils.doInStepScope(stepExecution,
|
||||
new Callable<Integer>() {
|
||||
public Integer call() throws Exception {
|
||||
|
||||
int count = 0;
|
||||
|
||||
while (reader.read() != null) {
|
||||
count++;
|
||||
}
|
||||
return count;
|
||||
}
|
||||
});
|
||||
----
|
||||
|
||||
[[validatingOutputFiles]]
|
||||
|
||||
|
||||
=== Validating Output Files
|
||||
|
||||
When a batch job writes to the database, it is easy to query the
|
||||
database to verify that the output is as expected. However, if the batch
|
||||
job writes to a file, it is equally important that the output be verified.
|
||||
Spring Batch provides a class `AssertFile` to
|
||||
facilitate the verification of output files. The method
|
||||
`assertFileEquals` takes two
|
||||
`File` objects (or two
|
||||
`Resource` objects) and asserts, line by line, that
|
||||
the two files have the same content. Therefore, it is possible to create a
|
||||
file with the expected output and to compare it to the actual
|
||||
result:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
private static final String EXPECTED_FILE = "src/main/resources/data/input.txt";
|
||||
private static final String OUTPUT_FILE = "target/test-outputs/output.txt";
|
||||
|
||||
AssertFile.assertFileEquals(new FileSystemResource(EXPECTED_FILE),
|
||||
new FileSystemResource(OUTPUT_FILE));
|
||||
----
|
||||
|
||||
[[mockingDomainObjects]]
|
||||
|
||||
|
||||
=== Mocking Domain Objects
|
||||
|
||||
Another common issue encountered while writing unit and integration
|
||||
tests for Spring Batch components is how to mock domain objects. A good
|
||||
example is a `StepExecutionListener`, as illustrated
|
||||
below:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
public class NoWorkFoundStepExecutionListener extends StepExecutionListenerSupport {
|
||||
|
||||
public ExitStatus afterStep(StepExecution stepExecution) {
|
||||
if (stepExecution.getReadCount() == 0) {
|
||||
throw new NoWorkFoundException("Step has not processed any items");
|
||||
}
|
||||
return stepExecution.getExitStatus();
|
||||
}
|
||||
}
|
||||
----
|
||||
|
||||
The above listener is provided by the framework and checks a
|
||||
`StepExecution` for an empty read count, thus
|
||||
signifying that no work was done. While this example is fairly simple, it
|
||||
serves to illustrate the types of problems that may be encountered when
|
||||
attempting to unit test classes that implement interfaces requiring Spring
|
||||
Batch domain objects. Consider the above listener's unit test:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
private NoWorkFoundStepExecutionListener tested = new NoWorkFoundStepExecutionListener();
|
||||
|
||||
@Test
|
||||
public void testAfterStep() {
|
||||
StepExecution stepExecution = new StepExecution("NoProcessingStep",
|
||||
new JobExecution(new JobInstance(1L, new JobParameters(),
|
||||
"NoProcessingJob")));
|
||||
|
||||
stepExecution.setReadCount(0);
|
||||
|
||||
try {
|
||||
tested.afterStep(stepExecution);
|
||||
fail();
|
||||
} catch (NoWorkFoundException e) {
|
||||
assertEquals("Step has not processed any items", e.getMessage());
|
||||
}
|
||||
}</pre>
|
||||
----
|
||||
|
||||
Because the Spring Batch domain model follows good object orientated
|
||||
principles, the `StepExecution` requires a
|
||||
`JobExecution`, which requires a
|
||||
`JobInstance` and
|
||||
`JobParameters` in order to create a valid
|
||||
`StepExecution`. While this is good in a solid domain
|
||||
model, it does make creating stub objects for unit testing verbose. To
|
||||
address this issue, the Spring Batch test module includes a factory for
|
||||
creating domain objects: `MetaDataInstanceFactory`.
|
||||
Given this factory, the unit test can be updated to be more
|
||||
concise:
|
||||
|
||||
|
||||
[source, java]
|
||||
----
|
||||
private NoWorkFoundStepExecutionListener tested = new NoWorkFoundStepExecutionListener();
|
||||
|
||||
@Test
|
||||
public void testAfterStep() {
|
||||
StepExecution stepExecution = MetaDataInstanceFactory.createStepExecution();
|
||||
|
||||
stepExecution.setReadCount(0);
|
||||
|
||||
try {
|
||||
tested.afterStep(stepExecution);
|
||||
fail();
|
||||
} catch (NoWorkFoundException e) {
|
||||
assertEquals("Step has not processed any items", e.getMessage());
|
||||
}
|
||||
}</pre>
|
||||
----
|
||||
|
||||
The above method for creating a simple
|
||||
`StepExecution` is just one convenience method
|
||||
available within the factory. A full method listing can be found in its
|
||||
link:$$http://docs.spring.io/spring-batch/apidocs/org/springframework/batch/test/MetaDataInstanceFactory.html$$[Javadoc].
|
||||
|
||||
@@ -1,18 +1,25 @@
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE appendix PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
|
||||
"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd">
|
||||
<appendix id="transactions">
|
||||
<title>Batch Processing and Transactions</title>
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
<section id="transactionsNoRetry">
|
||||
<title>Simple Batching with No Retry</title>
|
||||
[[transactions]]
|
||||
|
||||
<para>Consider the following simple example of a nested batch with no
|
||||
[appendix]
|
||||
== Batch Processing and Transactions
|
||||
|
||||
[[transactionsNoRetry]]
|
||||
|
||||
|
||||
=== Simple Batching with No Retry
|
||||
|
||||
Consider the following simple example of a nested batch with no
|
||||
retries. This is a very common scenario for batch processing, where
|
||||
an input source is processed until exhausted, but we commit
|
||||
periodically at the end of a "chunk" of processing.</para>
|
||||
periodically at the end of a "chunk" of processing.
|
||||
|
||||
|
||||
----
|
||||
|
||||
<programlisting>
|
||||
1 | REPEAT(until=exhausted) {
|
||||
|
|
||||
2 | TX {
|
||||
@@ -23,26 +30,30 @@
|
||||
| }
|
||||
|
|
||||
| }
|
||||
</programlisting>
|
||||
|
||||
----
|
||||
|
||||
<para>The input operation (3.1) could be a message-based receive
|
||||
The input operation (3.1) could be a message-based receive
|
||||
(e.g. JMS), or a file-based read, but to recover and continue
|
||||
processing with a chance of completing the whole job, it must be
|
||||
transactional. The same applies to the operation at (3.2) - it must
|
||||
be either transactional or idempotent.</para>
|
||||
be either transactional or idempotent.
|
||||
|
||||
<para>If the chunk at REPEAT(3) fails because of a database exception at
|
||||
(3.2), then TX(2) will roll back the whole chunk.</para>
|
||||
</section>
|
||||
If the chunk at REPEAT(3) fails because of a database exception at
|
||||
(3.2), then TX(2) will roll back the whole chunk.
|
||||
|
||||
<section id="transactionStatelessRetry">
|
||||
<title>Simple Stateless Retry</title>
|
||||
[[transactionStatelessRetry]]
|
||||
|
||||
<para>It is also useful to use a retry for an operation which is not
|
||||
|
||||
=== Simple Stateless Retry
|
||||
|
||||
It is also useful to use a retry for an operation which is not
|
||||
transactional, like a call to a web-service or other remote
|
||||
resource. For example:</para>
|
||||
resource. For example:
|
||||
|
||||
|
||||
----
|
||||
|
||||
<programlisting>
|
||||
0 | TX {
|
||||
1 | input;
|
||||
1.1 | output;
|
||||
@@ -50,24 +61,28 @@
|
||||
2.1 | remote access;
|
||||
| }
|
||||
| }
|
||||
</programlisting>
|
||||
|
||||
----
|
||||
|
||||
<para>This is actually one of the most useful applications of a retry,
|
||||
This is actually one of the most useful applications of a retry,
|
||||
since a remote call is much more likely to fail and be retryable
|
||||
than a database update. As long as the remote access (2.1)
|
||||
eventually succeeds, the transaction TX(0) will commit. If the
|
||||
remote access (2.1) eventually fails, then the transaction TX(0) is
|
||||
guaranteed to roll back.</para>
|
||||
</section>
|
||||
guaranteed to roll back.
|
||||
|
||||
<section id="repeatRetry">
|
||||
<title>Typical Repeat-Retry Pattern</title>
|
||||
[[repeatRetry]]
|
||||
|
||||
<para>The most typical batch processing pattern is to add a retry to the
|
||||
|
||||
=== Typical Repeat-Retry Pattern
|
||||
|
||||
The most typical batch processing pattern is to add a retry to the
|
||||
inner block of the chunk in the Simple Batching example.
|
||||
Consider this:</para>
|
||||
Consider this:
|
||||
|
||||
|
||||
----
|
||||
|
||||
<programlisting>
|
||||
1 | REPEAT(until=exhausted, exception=not critical) {
|
||||
|
|
||||
2 | TX {
|
||||
@@ -85,41 +100,39 @@
|
||||
| }
|
||||
|
|
||||
| }
|
||||
</programlisting>
|
||||
|
||||
----
|
||||
|
||||
<para>The inner RETRY(4) block is marked as "stateful" - see the
|
||||
The inner RETRY(4) block is marked as "stateful" - see the
|
||||
typical use case for a description of a stateful
|
||||
retry. This means that if the the retry PROCESS(5) block fails, the
|
||||
behaviour of the RETRY(4) is as follows.</para>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>Throw an exception, rolling back the transaction TX(2) at the
|
||||
behaviour of the RETRY(4) is as follows.
|
||||
|
||||
|
||||
* Throw an exception, rolling back the transaction TX(2) at the
|
||||
chunk level, and allowing the item to be re-presented to the input
|
||||
queue.</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>When the item re-appears, it might be retried depending on the
|
||||
queue.
|
||||
|
||||
|
||||
* When the item re-appears, it might be retried depending on the
|
||||
retry policy in place, executing PROCESS(5) again. The second and
|
||||
subsequent attempts might fail again and rethrow the exception.</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>Eventually the item re-appears for the final time: the retry
|
||||
subsequent attempts might fail again and rethrow the exception.
|
||||
|
||||
|
||||
* Eventually the item re-appears for the final time: the retry
|
||||
policy disallows another attempt, so PROCESS(5) is never
|
||||
executed. In this case we follow a RECOVER(6) path, effectively
|
||||
"skipping" the item that was received and is being processed.</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
"skipping" the item that was received and is being processed.
|
||||
|
||||
<para>Notice that the notation used for the RETRY(4) in the plan above
|
||||
Notice that the notation used for the RETRY(4) in the plan above
|
||||
shows explictly that the the input step (4.1) is part of the retry.
|
||||
It also makes clear that there are two alternate paths for
|
||||
processing: the normal case is denoted by PROCESS(5), and the
|
||||
recovery path is a separate block, RECOVER(6). The two alternate
|
||||
paths are completely distinct: only one is ever taken in normal
|
||||
circumstances.</para>
|
||||
circumstances.
|
||||
|
||||
<para>In special cases (e.g. a special <classname>TranscationValidException</classname>
|
||||
In special cases (e.g. a special TranscationValidException
|
||||
type), the retry policy might be able to determine that the
|
||||
RECOVER(6) path can be taken on the last attempt after PROCESS(5)
|
||||
has just failed, instead of waiting for the item to be re-presented.
|
||||
@@ -127,17 +140,17 @@
|
||||
knowledge of what has happened inside the PROCESS(5) block, which is
|
||||
not usually available - e.g. if the output included write
|
||||
access before the failure, then the exception should be rethrown to
|
||||
ensure transactional integrity.</para>
|
||||
ensure transactional integrity.
|
||||
|
||||
<para>The completion policy in the outer, REPEAT(1) is crucial to the
|
||||
The completion policy in the outer, REPEAT(1) is crucial to the
|
||||
success of the above plan. If the output(5.1) fails it may throw an
|
||||
exception (it usually does, as described), in which case the
|
||||
transaction TX(2) fails and the exception could propagate up through
|
||||
the outer batch REPEAT(1). We do not want the whole batch to stop
|
||||
because the RETRY(4) might still be successful if we try again, so
|
||||
we add the exception=not critical to the outer REPEAT(1).</para>
|
||||
we add the exception=not critical to the outer REPEAT(1).
|
||||
|
||||
<para>Note, however, that if the TX(2) fails and we <emphasis>do</emphasis> try again, by
|
||||
Note, however, that if the TX(2) fails and we __do__ try again, by
|
||||
virtue of the outer completion policy, the item that is next
|
||||
processed in the inner REPEAT(3) is not guaranteed to be the one
|
||||
that just failed. It might well be, but it depends on the
|
||||
@@ -149,18 +162,21 @@
|
||||
after 10 consecutive attempts, but not necessarily at the same item.
|
||||
This is consistent with the overall retry strategy: it is the inner
|
||||
RETRY(4) that is aware of the history of each item, and can decide
|
||||
whether or not to have another attempt at it.</para>
|
||||
</section>
|
||||
whether or not to have another attempt at it.
|
||||
|
||||
<section id="asyncChunkProcessing">
|
||||
<title>Asynchronous Chunk Processing</title>
|
||||
[[asyncChunkProcessing]]
|
||||
|
||||
<para>The inner batches or chunks in the typical example
|
||||
|
||||
=== Asynchronous Chunk Processing
|
||||
|
||||
The inner batches or chunks in the typical example
|
||||
above can be executed concurrently by configuring the outer batch to
|
||||
use an <classname>AsyncTaskExecutor</classname>. The outer batch waits for all the
|
||||
chunks to complete before completing.</para>
|
||||
use an AsyncTaskExecutor. The outer batch waits for all the
|
||||
chunks to complete before completing.
|
||||
|
||||
|
||||
----
|
||||
|
||||
<programlisting>
|
||||
1 | REPEAT(until=exhausted, concurrent, exception=not critical) {
|
||||
|
|
||||
2 | TX {
|
||||
@@ -178,19 +194,23 @@
|
||||
| }
|
||||
|
|
||||
| }
|
||||
</programlisting>
|
||||
</section>
|
||||
|
||||
----
|
||||
|
||||
<section id="asyncItemProcessing">
|
||||
<title>Asynchronous Item Processing</title>
|
||||
[[asyncItemProcessing]]
|
||||
|
||||
<para>The individual items in chunks in the typical
|
||||
|
||||
=== Asynchronous Item Processing
|
||||
|
||||
The individual items in chunks in the typical
|
||||
can also in principle be processed concurrently. In this case the
|
||||
transaction boundary has to move to the level of the individual
|
||||
item, so that each transaction is on a single thread:
|
||||
</para>
|
||||
|
||||
|
||||
|
||||
----
|
||||
|
||||
<programlisting>
|
||||
1 | REPEAT(until=exhausted, exception=not critical) {
|
||||
|
|
||||
2 | REPEAT(size=5, concurrent) {
|
||||
@@ -208,26 +228,30 @@
|
||||
| }
|
||||
|
|
||||
| }
|
||||
</programlisting>
|
||||
|
||||
----
|
||||
|
||||
<para>This plan sacrifices the optimisation benefit, that the simple plan
|
||||
This plan sacrifices the optimisation benefit, that the simple plan
|
||||
had, of having all the transactional resources chunked together. It
|
||||
is only useful if the cost of the processing (5) is much higher than
|
||||
the cost of transaction management (3).</para>
|
||||
</section>
|
||||
the cost of transaction management (3).
|
||||
|
||||
<section id="transactionPropagation">
|
||||
<title>Interactions Between Batching and Transaction Propagation</title>
|
||||
[[transactionPropagation]]
|
||||
|
||||
<para>There is a tighter coupling between batch-retry and TX management
|
||||
|
||||
=== Interactions Between Batching and Transaction Propagation
|
||||
|
||||
There is a tighter coupling between batch-retry and TX management
|
||||
than we would ideally like. In particular a stateless retry cannot
|
||||
be used to retry database operations with a transaction manager that
|
||||
doesn't support NESTED propagation.
|
||||
</para>
|
||||
|
||||
|
||||
<para>For a simple example using retry without repeat, consider this:</para>
|
||||
For a simple example using retry without repeat, consider this:
|
||||
|
||||
|
||||
----
|
||||
|
||||
<programlisting>
|
||||
1 | TX {
|
||||
|
|
||||
1.1 | input;
|
||||
@@ -239,16 +263,19 @@
|
||||
| }
|
||||
|
|
||||
| }
|
||||
</programlisting>
|
||||
|
||||
----
|
||||
|
||||
<para>Again, and for the same reason, the inner transaction TX(3) can
|
||||
Again, and for the same reason, the inner transaction TX(3) can
|
||||
cause the outer transaction TX(1) to fail, even if the RETRY(2) is
|
||||
eventually successful.</para>
|
||||
eventually successful.
|
||||
|
||||
<para>Unfortunately the same effect percolates from the retry block up to
|
||||
the surrounding repeat batch if there is one:</para>
|
||||
Unfortunately the same effect percolates from the retry block up to
|
||||
the surrounding repeat batch if there is one:
|
||||
|
||||
|
||||
----
|
||||
|
||||
<programlisting>
|
||||
1 | TX {
|
||||
|
|
||||
2 | REPEAT(size=5) {
|
||||
@@ -262,45 +289,46 @@
|
||||
| }
|
||||
|
|
||||
| }
|
||||
</programlisting>
|
||||
|
||||
<para>Now if TX(3) rolls back it can pollute the whole batch at TX(1) and
|
||||
force it to roll back at the end.</para>
|
||||
|
||||
<para>What about non-default propagation?</para>
|
||||
|
||||
<itemizedlist>
|
||||
<listitem>
|
||||
<para>In the last example PROPAGATION_REQUIRES_NEW at TX(3) will
|
||||
----
|
||||
|
||||
Now if TX(3) rolls back it can pollute the whole batch at TX(1) and
|
||||
force it to roll back at the end.
|
||||
|
||||
What about non-default propagation?
|
||||
|
||||
|
||||
* In the last example PROPAGATION_REQUIRES_NEW at TX(3) will
|
||||
prevent the outer TX(1) from being polluted if both transactions
|
||||
are eventually successful. But if TX(3) commits and TX(1) rolls
|
||||
back, then TX(3) stays committed, so we violate the transaction
|
||||
contract for TX(1). If TX(3) rolls back, TX(1) does not necessarily (but it probably
|
||||
will in practice because the retry will throw a roll back
|
||||
exception).</para>
|
||||
</listitem>
|
||||
<listitem>
|
||||
<para>PROPAGATION_NESTED at TX(3) works as we require in the retry
|
||||
exception).
|
||||
|
||||
|
||||
* PROPAGATION_NESTED at TX(3) works as we require in the retry
|
||||
case (and for a batch with skips): TX(3) can commit, but
|
||||
subsequently be rolled back by the outer transaction TX(1). If
|
||||
TX(3) rolls back, again TX(1) will roll back in practice. This
|
||||
option is only available on some platforms, e.g. not Hibernate or
|
||||
JTA, but it is the only one that works consistently.</para>
|
||||
</listitem>
|
||||
</itemizedlist>
|
||||
JTA, but it is the only one that works consistently.
|
||||
|
||||
<para>So NESTED is best if the retry block contains any database access.</para>
|
||||
</section>
|
||||
So NESTED is best if the retry block contains any database access.
|
||||
|
||||
<section id="specialTransactionOrthonogonal">
|
||||
<title>Special Case: Transactions with Orthogonal Resources</title>
|
||||
[[specialTransactionOrthonogonal]]
|
||||
|
||||
<para>Default propagation is always OK for simple cases where there are no
|
||||
|
||||
=== Special Case: Transactions with Orthogonal Resources
|
||||
|
||||
Default propagation is always OK for simple cases where there are no
|
||||
nested database transactions. Consider this (where the SESSION and
|
||||
TX are not global XA resources, so their resources are orthogonal):
|
||||
</para>
|
||||
|
||||
|
||||
|
||||
----
|
||||
|
||||
<programlisting>
|
||||
0 | SESSION {
|
||||
1 | input;
|
||||
2 | RETRY {
|
||||
@@ -309,36 +337,40 @@
|
||||
| }
|
||||
| }
|
||||
| }
|
||||
</programlisting>
|
||||
|
||||
----
|
||||
|
||||
<para>Here there is a transactional message SESSION(0), but it doesn't
|
||||
Here there is a transactional message SESSION(0), but it doesn't
|
||||
participate in other transactions with
|
||||
<classname>PlatformTransactionManager</classname>, so doesn't propagate when TX(3)
|
||||
PlatformTransactionManager, so doesn't propagate when TX(3)
|
||||
starts. There is no database access outside the RETRY(2) block. If
|
||||
TX(3) fails and then eventually succeeds on a retry, SESSION(0) can
|
||||
commit (it can do this independent of a TX block). This is similar
|
||||
to the vanilla "best-efforts-one-phase-commit" scenario - the worst
|
||||
that can happen is a duplicate message when the RETRY(2) succeeds
|
||||
and the SESSION(0) cannot commit, e.g. because the message system is
|
||||
unavailable.</para>
|
||||
</section>
|
||||
unavailable.
|
||||
|
||||
<section id="statelessRetryCannotRecover">
|
||||
<title>Stateless Retry Cannot Recover</title>
|
||||
[[statelessRetryCannotRecover]]
|
||||
|
||||
<para>The distinction between a stateless and a stateful retry in the
|
||||
|
||||
=== Stateless Retry Cannot Recover
|
||||
|
||||
The distinction between a stateless and a stateful retry in the
|
||||
typical example above is important. It is actually
|
||||
ultimately a transactional constraint that forces the distinction,
|
||||
and this constraint also makes it obvious why the distinction
|
||||
exists.
|
||||
</para>
|
||||
|
||||
|
||||
<para>We start with the observation that there is no way to skip an item
|
||||
We start with the observation that there is no way to skip an item
|
||||
that failed and successfully commit the rest of the chunk unless we
|
||||
wrap the item processing in a transaction. So we simplify the
|
||||
typical batch execution plan to look like this:</para>
|
||||
typical batch execution plan to look like this:
|
||||
|
||||
|
||||
----
|
||||
|
||||
<programlisting>
|
||||
0 | REPEAT(until=exhausted) {
|
||||
|
|
||||
1 | TX {
|
||||
@@ -357,23 +389,23 @@
|
||||
| }
|
||||
|
|
||||
| }
|
||||
</programlisting>
|
||||
|
||||
----
|
||||
|
||||
<para>Here we have a stateless RETRY(3) with a RECOVER(5) path that kicks
|
||||
Here we have a stateless RETRY(3) with a RECOVER(5) path that kicks
|
||||
in after the final attempt fails. The "stateless" label just means
|
||||
that the block will be repeated without rethrowing any exception up
|
||||
to some limit. This will only work if the transaction TX(4) has
|
||||
propagation NESTED.</para>
|
||||
propagation NESTED.
|
||||
|
||||
<para>If the TX(3) has default propagation properties and it rolls back,
|
||||
If the TX(3) has default propagation properties and it rolls back,
|
||||
it will pollute the outer TX(1). The inner transaction is assumed by
|
||||
the transaction manager to have corrupted the transactional
|
||||
resource, and so it cannot be used again.</para>
|
||||
resource, and so it cannot be used again.
|
||||
|
||||
<para>Support for NESTED propagation is sufficiently rare that we choose
|
||||
Support for NESTED propagation is sufficiently rare that we choose
|
||||
not to support recovery with stateless retries in current versions of
|
||||
Spring Batch. The same effect can always be achieved (at the
|
||||
expense of repeating more processing) using the
|
||||
typical pattern above.</para>
|
||||
</section>
|
||||
</appendix>
|
||||
typical pattern above.
|
||||
|
||||
51
spring-batch-docs/asciidoc/whatsnew.adoc
Normal file
@@ -0,0 +1,51 @@
|
||||
:batch-asciidoc: http://docs.spring.io/spring-batch/reference/html/
|
||||
:toc: left
|
||||
:toclevels: 4
|
||||
|
||||
[[whatsNew]]
|
||||
|
||||
== What's New in Spring Batch 4.0
|
||||
|
||||
The Spring Batch 4.0 release has three major themes:
|
||||
|
||||
|
||||
* Java 8 Requirement
|
||||
|
||||
|
||||
* Dependencies re-baseline
|
||||
|
||||
|
||||
* Builders for ItemReaders and ItemWriters
|
||||
|
||||
[[whatsNewJava]]
|
||||
|
||||
|
||||
=== Java 8 Requirement
|
||||
|
||||
Spring Batch has historically followed Spring Framework's baselines for both
|
||||
java version as well as third party dependencies. With Spring Batch 4, the Spring
|
||||
Framework version is being upgraded to Spring Framework 5. As such, the java
|
||||
version requirement for Spring Batch is also increasing to Java 8.
|
||||
|
||||
|
||||
[[whatsNewDependencies]]
|
||||
|
||||
|
||||
=== Dependencies re-baseline
|
||||
|
||||
In order to continue to integrate with supported versions of the third party
|
||||
libraries Spring Batch utilizes, Spring Batch 4 is updating the dependencies across
|
||||
the board. The new dependency versions are in alignment with Spring Framework 5.
|
||||
|
||||
|
||||
[[whatsNewBuilders]]
|
||||
|
||||
|
||||
=== Provide builders for the ItemReaders and ItemWriters
|
||||
|
||||
Spring Batch 4 is providing a collection of builders for all of the ItemReaders
|
||||
and ItemWriters that come with the framework. As of this release, builders for the
|
||||
`FlatFileItemReader`, `FlatFileItemWriter`, `JdbcCursorItemReader`, and
|
||||
`JdbcBatchItemWriter` are available. More information can be found in the javadoc
|
||||
for Spring Batch.
|
||||
|
||||
@@ -36,11 +36,6 @@ import org.springframework.util.ClassUtils;
|
||||
* object which can be consumed and manipulated as necessary by {@link org.springframework.batch.item.ItemProcessor ItemProcessor} or any
|
||||
* output service.
|
||||
* <p>
|
||||
* {@link LdifReader LdifReader} usage is mimics that of the FlatFileItemReader for all intensive purposes. Adjustments have been made to
|
||||
* process records instead of lines, however. As such, the {@link #recordsToSkip recordsToSkip} attribute indicates the number of records
|
||||
* from the top of the file that should not be processed. Implementations of the {@link RecordCallbackHandler RecordCallbackHandler}
|
||||
* interface can be used to execute operations on those skipped records.
|
||||
* <p>
|
||||
* As with the {@link org.springframework.batch.item.file.FlatFileItemReader FlatFileItemReader}, the {@link #strict strict} option
|
||||
* differentiates between whether or not to require the resource to exist before processing. In the case of a value set to false, a warning
|
||||
* is logged instead of an exception being thrown.
|
||||
|
||||
@@ -1,22 +0,0 @@
|
||||
------
|
||||
Spring Batch in the Media
|
||||
------
|
||||
Dave Syer
|
||||
------
|
||||
November 2007
|
||||
|
||||
Spring Batch In the Media
|
||||
|
||||
* http://www.theserverside.com/tt/articles/article.tss?l=SpringBatchOverview
|
||||
|
||||
* http://www.theserverside.com/news/thread.tss?thread_id=47506#242493
|
||||
|
||||
* http://blog.decaresystems.ie/index.php/2007/04/12/spring-batch/
|
||||
|
||||
* http://www.itweek.co.uk/itweek/news/2189502/accenture-launches-batch
|
||||
|
||||
* http://www.theserverside.com/tt/articles/article.tss?l=SpringBatchOverview
|
||||
|
||||
* http://www.infoq.com/interviews/johnson-spring-portfolio
|
||||
|
||||
* http://www.infoq.com/news/2008/06/spring-batch
|
||||
@@ -1,377 +0,0 @@
|
||||
------
|
||||
Building Spring Batch
|
||||
------
|
||||
Dave Syer
|
||||
------
|
||||
April 2007
|
||||
|
||||
Building Spring Batch
|
||||
|
||||
Spring Batch is organised as a reactor build in Maven (m2). To
|
||||
build from the command line use
|
||||
|
||||
+---
|
||||
$ mvn install
|
||||
+---
|
||||
|
||||
or the goal of your choice (compile, test, etc.). This builds the
|
||||
artifact (e.g. jar file) from the project in the current directory,
|
||||
and deploys it to you local m2 repo at
|
||||
<<<${user.home}/.m2/repository>>>. If there are any dependency resolution
|
||||
problems try
|
||||
|
||||
+---
|
||||
$ mvn install -P bootstrap
|
||||
+---
|
||||
|
||||
which enables some additional, non-standard repositories (which
|
||||
should not be present in ther deployed artifacts). You should only
|
||||
need to do this once, because then all the dependencies will be
|
||||
installed in your local repository. To get the source code (where
|
||||
available) for all dependencies, you can use
|
||||
|
||||
+---
|
||||
$ mvn dependency:sources -P bootstrap
|
||||
+---
|
||||
|
||||
See below for instructions on how
|
||||
to build the documentation and web site.
|
||||
|
||||
By default the whole project (including subprojects) will be built
|
||||
using Maven's "reactor" plugin. This can be expensive. To build
|
||||
only one module, cd to that directory first. Or at the top level
|
||||
use -N (for non-recursive) to exclude subprojects.
|
||||
|
||||
+---
|
||||
$ mvn -N install
|
||||
+---
|
||||
|
||||
* Skipping Tests
|
||||
|
||||
The profile <<fast>> skips all the tests, so
|
||||
|
||||
+---
|
||||
mvn -o install -P fast
|
||||
+---
|
||||
|
||||
is the quickest way to update your local repo (assuming the tests
|
||||
are OK). It is equivalent of setting <<<-Dmaven.test.skip=true>>>.
|
||||
|
||||
* Running Individual Tests
|
||||
|
||||
The standard way to do this with Maven is -Dtest= with the class name (not fully qualified), e.g.
|
||||
|
||||
+---
|
||||
$ mvn test -Dtest=FootballJobFunctionalTests
|
||||
+---
|
||||
|
||||
In the samples you can also add additional system properties, which will be used to override bean properties. This can be done with an argLine property, e.g.
|
||||
|
||||
+---
|
||||
$ mvn test -Dtest=FootballJobFunctionalTests -DargLine='-Dplayer.file.name=player.csv -Dgames.file.name=games.csv'
|
||||
+---
|
||||
|
||||
or by specifying forkMode=never (in which case the test is run in the same process as Maven):
|
||||
|
||||
+---
|
||||
$ mvn test -DforkMode=never -Dtest=FootballJobFunctionalTests -Dplayer.file.name=player.csv -Dgames.file.name=games.csv -Djob.commit.interval=50
|
||||
+---
|
||||
|
||||
* Eclipse IDE
|
||||
|
||||
Our policy is to commit Eclipse (and only Eclipse) meta data to
|
||||
source control. This will work out of the box for you if you use
|
||||
the (excellent) Q4E Eclipse-plugin
|
||||
(http://q4e.googlecode.com/svn/trunk/updatesite). With this plugin,
|
||||
each of the reactor modules at the top level builds independently
|
||||
and feeds changes into other projects in your workspace. It is not
|
||||
recommended to use the Maven Eclipse plugin (<<<mvn
|
||||
eclipse:eclipse>>>) because it cannot track dependencies across the
|
||||
Eclipse workspace. It will also create Eclipse meta-data every time
|
||||
you run it, conflicting with the version under source control.
|
||||
|
||||
* Dependencies
|
||||
|
||||
If you get multiple versions of the same jar across projects, or a
|
||||
jar is appearing in the classpath that you don't think is necessary,
|
||||
look into the dependency structure and try and exclude it from
|
||||
wherever it is being transitively included. To see the dependencies
|
||||
for a project look in the site for the dependency report.
|
||||
Alternatively (very useful for quickly locating a rogue jar) use
|
||||
|
||||
+---
|
||||
$ mvn -P snapshots dependency:tree
|
||||
+---
|
||||
|
||||
We use the "snapshots" profile here so that we get a snapshot of the
|
||||
dependency plugin (older versions did not have the tree goal, but
|
||||
newer versions are not stable enough to use in production).
|
||||
|
||||
* Subversion and Line Endings
|
||||
|
||||
Please use
|
||||
|
||||
+---
|
||||
*.xml = svn:eol-style=LF
|
||||
*.sql = svn:eol-style=LF
|
||||
*.txt = svn:eol-style=LF
|
||||
*.java = svn:eol-style=LF
|
||||
*.apt = svn:eol-style=LF
|
||||
*.properties = svn:eol-style=LF
|
||||
+---
|
||||
|
||||
in your <<<~/.subversion>>> (or <<<c:\Documents and Settings\<uid>\Application Data\Subversion/config>>>). If anyone forgets to do that then the property can be recursively set using Tortoise (type in the property key and value and use the recursive checkbox).
|
||||
|
||||
* Documentation
|
||||
|
||||
With the exception of reference docs, please put content in the
|
||||
project that it is most closely associated with. Here is a
|
||||
{{{./sitemap.html}site map}} to help you decide.
|
||||
|
||||
** Quotidian Web Content
|
||||
|
||||
Maven allows you to choose from a range of source format for
|
||||
building web content. For Spring Batch we prefer the "almost plain
|
||||
text" version. See files under <<<src/site/apt>>> in all the projects
|
||||
for examples, and also refer to the
|
||||
{{{http://maven.apache.org/guides/mini/guide-apt-format.html}Apt
|
||||
Format Guide}} on the Maven website.
|
||||
|
||||
N.B. you put .apt source files in a subdirectory called <<<apt>>>,
|
||||
but they are moved to the top level when the site is built. Thus
|
||||
<<<apt/index.apt>>> becomes <<<index.html>>>.
|
||||
|
||||
*** Using emacs to edit .apt files
|
||||
|
||||
Because the .apt format relies on indentation in plain text files,
|
||||
the emacs auto-fill feature in text mode makes editing very
|
||||
convenient. Put this in your .emacs
|
||||
|
||||
+---
|
||||
(setq auto-mode-alist (cons '("\\.apt\\'" . text-mode) auto-mode-alist))
|
||||
+---
|
||||
|
||||
Then use <<<M-q>>> to auto-fill the current paragraph. Emacs
|
||||
adjusts the indentation of all the lines to match the first one (or
|
||||
the first two if the second is different.
|
||||
|
||||
If anyone knows how to do this with Eclipse or other editors, let us
|
||||
know and we'll put a note here.
|
||||
|
||||
** Reference Guide
|
||||
|
||||
The <<<docs>>> project is reserved for reference guides in the
|
||||
normal Spring docbook format. Each chapter of the reference guide
|
||||
is in a separate xml file under <<<src/site/docbook/reference>>>.
|
||||
The easiest way to work with the reference guide is to cd to the
|
||||
<<<docs>>> module, and run Maven from there.
|
||||
|
||||
Use the DTD with a validating XML editor (e.g. Eclipse) to explore
|
||||
the docbook format. Also look at existing examples in Spring Batch
|
||||
and in the Core Spring Framework source code.
|
||||
|
||||
[Section numbers] There is no need to explicitly create section numbers in the
|
||||
XML - this is done for you by the build when everything is stitched
|
||||
together into a book.
|
||||
|
||||
[Source location] You put docbook .xml source files in a
|
||||
subdirectory called <<<docbook>>>, but they are moved to the top
|
||||
level when the site is built. Thus
|
||||
<<<docbook/reference/index.xml>>> becomes
|
||||
<<<reference/index.html>>>.
|
||||
|
||||
[XMLMind] If you use {{{http://www.xmlmind.com}XMLMind}} to edit the
|
||||
reference guide add the following line to
|
||||
<<<<XMLMind>/addon/config/docbook/common.incl>>>:
|
||||
|
||||
+---
|
||||
<cfg:saveOptions xmlns="" cdataSectionElements="screen programlisting" />
|
||||
+---
|
||||
|
||||
** Adding a new chapter to the Reference Guide
|
||||
|
||||
Here is a skeleton chapter including the DTD to get you started on a
|
||||
new chapter.
|
||||
|
||||
+---
|
||||
<?xml version="1.0" encoding="UTF-8"?>
|
||||
<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
|
||||
"http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd">
|
||||
<chapter id="chapter-id">
|
||||
<title>Chapter Title</title>
|
||||
<section>
|
||||
<title>Introduction</title>
|
||||
<para></para>
|
||||
</section>
|
||||
</chapter>
|
||||
+---
|
||||
|
||||
Create a file with the template above, and put it in
|
||||
<<<docbook/reference>>>. Use lower case, dash separated file names
|
||||
(XML style), e.g. <<<my-new-chapter.xml>>>.
|
||||
|
||||
Add the chapter to the master book in <<<index.xml>>> using
|
||||
|
||||
+---
|
||||
<xi:include href="my-new-chapter.xml"/>
|
||||
+---
|
||||
|
||||
* Adding graphics
|
||||
|
||||
Put (e.g.) PNG image content in <<<src/site/resources/images>>>, and
|
||||
then refer to the file using the <<<images/>>> directory prefix.
|
||||
|
||||
** In .apt
|
||||
|
||||
With no whitespace add the image name in square brackets (\[\]):
|
||||
|
||||
+---
|
||||
[images/MyFigure.png] Caption content here is not rendered by default
|
||||
in a browser (it's the ALT content)...
|
||||
+---
|
||||
|
||||
** In docbook
|
||||
|
||||
Use the \<mediaobject\> element:
|
||||
|
||||
+---
|
||||
<mediaobject>
|
||||
<imageobject role="fo">
|
||||
<imagedata fileref="src/site/resources/reference/images/mypic.png" format="PNG" align="center"/>
|
||||
</imageobject>
|
||||
<imageobject role="html">
|
||||
<imagedata fileref="images/mypic.png" format="PNG" align="center"/>
|
||||
</imageobject>
|
||||
<caption>
|
||||
<para>
|
||||
Figure 1: the figure caption...
|
||||
</para>
|
||||
</caption>
|
||||
</mediaobject>
|
||||
+---
|
||||
|
||||
* Program Listings in Docbook (Including XML)
|
||||
|
||||
Use CDATA to save you from having to use the HTML escapes for all
|
||||
the special characters. E.g.
|
||||
|
||||
+---
|
||||
<programlisting><![CDATA[
|
||||
<!-- ... my program listing here -->
|
||||
]]>
|
||||
</programlisting>
|
||||
+---
|
||||
|
||||
* Dynamic Editing
|
||||
|
||||
To see your changes to web site content as soon as you have typed
|
||||
it, use
|
||||
|
||||
+---
|
||||
mvn site:run
|
||||
+---
|
||||
|
||||
and go to http://localhost:8080.
|
||||
|
||||
In a project with unit tests, you can skip the tests and go straight
|
||||
to the documentation using
|
||||
|
||||
+---
|
||||
mvn -o site:run -P fast
|
||||
+---
|
||||
|
||||
If you are offline, or want to speed things up a bit, the "-o" stops
|
||||
Maven from trying to resolve dependencies on the internet.
|
||||
|
||||
Use -N to build only the current project, not subprojects, So this
|
||||
is pretty useful at the top level:
|
||||
|
||||
+---
|
||||
mvn -N -o site:run -P fast
|
||||
+---
|
||||
|
||||
In the <<<docs>>> project the docbook reference guide shows up at
|
||||
http://localhost:8080/reference/*.html, where * is the name of an
|
||||
xml file with a chapter in it. There is no link to these pages on
|
||||
the site because the real docbook generated output is much nicer,
|
||||
but this is still pretty useful for debugging and dynamic
|
||||
editing.
|
||||
|
||||
Note that the formatting is a bit limited compared to the whole
|
||||
docbook stylesheet - Maven uses Doxia to squish all of docbook into
|
||||
some simple wiki-like formatting rules. In particular it can't
|
||||
generate the index page in the format we need it, so you may see
|
||||
errors from <<<mvn site:run>>> if you visit that page. One of the
|
||||
features is that the <<<\<xi:include\>>>> syntax we use to build the
|
||||
index and table of contents in the docbook-generated pages does not
|
||||
work. Images are another problem. Use the generated content from
|
||||
<<<mvn site>>> to view these artifacts.
|
||||
|
||||
* Building and deploying the web site
|
||||
|
||||
There is a bug in the m2 reactor (MNG-740) which means that we have
|
||||
to install the parent pom to the local repo first.
|
||||
|
||||
So do it this way:
|
||||
|
||||
+---
|
||||
$ mvn install -P fast
|
||||
$ mvn -P staging clean site site:deploy
|
||||
+---
|
||||
|
||||
Remove "-P staging" to deploy to the real website (requires ssh
|
||||
access to static.springframework.org).
|
||||
|
||||
The "-P staging" is to deploy to <<<target/staging>>>, so we
|
||||
don't get accidental updates to the site. To test the site contents
|
||||
navigate with your browser to that directory. The site:stage goal
|
||||
deos not work properly for this build: all the subprojects are not
|
||||
integrated into the staging site, so use site:deploy instead.
|
||||
|
||||
The static website content is not deleted during the deployment
|
||||
process - merely replaced. If you need to clean everything up from
|
||||
scratch you need to delete the contents on the server as well
|
||||
(using ssh).
|
||||
|
||||
Problems?
|
||||
|
||||
Make sure your source code is up to date. Delete everything from
|
||||
your local Spring Batch repo
|
||||
<<<${user.home}/.m2/repository/org/springframework/batch>>>. If
|
||||
necessary, delete a project or directory and update from SVN again.
|
||||
|
||||
Try
|
||||
|
||||
+---
|
||||
$ mvn install
|
||||
+---
|
||||
|
||||
or
|
||||
|
||||
+---
|
||||
$ mvn clean install
|
||||
+---
|
||||
|
||||
or
|
||||
|
||||
+---
|
||||
$ mvn clean install -P fast
|
||||
+---
|
||||
|
||||
from the top level, and
|
||||
|
||||
+---
|
||||
$ mvn -U ...
|
||||
+---
|
||||
|
||||
from wherever you are (top level or sub-project). The latter will
|
||||
update any older plugins you have in your local Maven repository.
|
||||
Some people have had trouble building the web site without this.
|
||||
|
||||
If you get <<<OutOfMemoryError>>> e.g. building the site, use
|
||||
MAVEN_OPTS to boost the heap size (on the command line if you have a
|
||||
sensible shell):
|
||||
|
||||
+---
|
||||
$ MAVEN_OPTS=-Xmx256m mvn site
|
||||
+---
|
||||
@@ -1,159 +0,0 @@
|
||||
------
|
||||
Asynchronous Chunk Processing Use Case
|
||||
------
|
||||
Dave Syer
|
||||
------
|
||||
January 2007
|
||||
|
||||
Use Case: Asynchronous Chunk Processing
|
||||
|
||||
* Goal
|
||||
|
||||
Increased the efficiency of chunk processing by having it execute
|
||||
asynchronously: in multiple threads. Maintain transactional
|
||||
intergrity of the chunk.
|
||||
|
||||
* Scope
|
||||
|
||||
* All chunks might conceivably benefit from parallel processing, so
|
||||
we don't want any unnecessary restrictions on the batch operation,
|
||||
or its implementation. A should be possible for Client to write a
|
||||
batch operation without reference to the fact that it might run in
|
||||
an asynchronous chunk.
|
||||
|
||||
* Preconditions
|
||||
|
||||
* Input data exists with non-trivial size: chunks contain more than
|
||||
one record.
|
||||
|
||||
* Batch processing of a record is slow, or can be delayed, so that
|
||||
the asynchronous processing can take longer than launching the
|
||||
threads.
|
||||
|
||||
* A chunk can be made to fail after at least one record is
|
||||
processed.
|
||||
|
||||
* Success
|
||||
|
||||
* A chunk is processed and the results inspected to verify that all
|
||||
records were processed.
|
||||
|
||||
* Transactional behaviour is verified by rolling back a chunk and
|
||||
verifying that no records were processed.
|
||||
|
||||
* Description
|
||||
|
||||
The vanilla case proceeds as for normal {{{./chunks.html}chunk
|
||||
processing}}, but:
|
||||
|
||||
[[1]] Within a chunk, Container processes records in parallel.
|
||||
|
||||
[[1]] At the end of a chunk, Container waits for the last record
|
||||
to be processed (with a timeout if the wait is long).
|
||||
|
||||
* Variations
|
||||
|
||||
** Rollback on Failure
|
||||
|
||||
If there is an exception in one of the record processing threads,
|
||||
the whole chunk should roll back:
|
||||
|
||||
[[1]] Client throws exception in record processing.
|
||||
|
||||
[[1]] Container catahes exception and attempts to abort other
|
||||
running processes.
|
||||
|
||||
[[1]] Container waits for running processes to abort (or finish
|
||||
normally, but preferably to abort).
|
||||
|
||||
[[1]] Container propagates the exception and signals transaction to
|
||||
rollback.
|
||||
|
||||
** Timeout
|
||||
|
||||
If there is a timeout during a chunk, it might happen before the
|
||||
chunk has finished, or while waiting for the processes to complete
|
||||
before exiting.
|
||||
|
||||
[[1]] At end of chunk, Container is waiting for all processes to
|
||||
finish. It times out, according to a parameter set by the
|
||||
Operator.
|
||||
|
||||
[[1]] Container does not start any new processes, and attempts to
|
||||
abort running processes.
|
||||
|
||||
[[1]] Container waits for running processes to abort (or finish
|
||||
normally, but preferably to abort).
|
||||
|
||||
[[1]] Container throws a time out exception and signals chunk
|
||||
transaction to rollback.
|
||||
|
||||
* Implementation
|
||||
|
||||
* The implementation of this use case could be tricky in the general
|
||||
case. In particular, the transactional nature is going to be hard
|
||||
or impossible to maintain across multiple threads without the
|
||||
individual processes being aware of the transaction, and (perhaps)
|
||||
without global (XA) transaction support.
|
||||
|
||||
A "normal" local transaction is thread bound - i.e. it only executes
|
||||
in one thread. If the code inside the transaction creates new
|
||||
threads, then they might not finish processing before the parent
|
||||
exits and the transaction wants to finish. The transaction needs to
|
||||
wait for the sub-processes before committing, or (more difficult)
|
||||
rolling back. The rollback case basically forces us to a model of
|
||||
one transaction per thread, and therefore to one transaction per
|
||||
data item in a concurrent environment.
|
||||
|
||||
Otherwise some transactional semantics might be respected in a
|
||||
parallel process, but others certainly will not be because
|
||||
synchronizations and resources are managed at the level of the
|
||||
thread where the transaction started. If the transaction manager is
|
||||
a local one (not XA) there is little hope even that the datasource
|
||||
resource would be the same for all the parallel threads and the
|
||||
parent method.
|
||||
|
||||
If we use a global transaction manager to make the parallel
|
||||
processes transactional, how will they know which transaction to
|
||||
participate in? There could be many active chunks, and each would
|
||||
have its own threads - how would each one be able to guide its child
|
||||
processes to participate in the same transaction?
|
||||
|
||||
* Beware a framework that extracts data from an <<<ItemReader>>>
|
||||
before executing the business logic (e.g. in a
|
||||
<<<ItemWriter>>>). It is not enough to allow concurrent
|
||||
processing but simply insist that the individual records are
|
||||
processed transactionally because the <<<ItemReader>>> will then
|
||||
not be able to participate in the transaction - its next record has
|
||||
already been passed to the consumer when the transaction starts, so
|
||||
if there is a rollback then the record is lost.
|
||||
|
||||
This is the origin of the signature:
|
||||
|
||||
+---
|
||||
public interface ItemReader {
|
||||
Object next();
|
||||
}
|
||||
+---
|
||||
|
||||
There is no peeking and no iterator-style <<<hasNext>>>. If there
|
||||
is a processing problem, transactional clients of the
|
||||
<<<ItemReader>>> throw an exception <after> the provider's
|
||||
<<<next()>>> has been called, but in the same thread (so that
|
||||
transactional semantics are preserved and the data provider reverts
|
||||
to its previous state).
|
||||
|
||||
This means that in the callback interface also picks up an
|
||||
<<<Object>>> return type
|
||||
|
||||
+---
|
||||
public interface RepeatCallback {
|
||||
Object doInIteration(BatchContext context);
|
||||
}
|
||||
+---
|
||||
|
||||
so we can return an object, which is null when the processing has
|
||||
finished.
|
||||
|
||||
In the end we decided against the <<<Object>>> return type and went
|
||||
with an exit status to signal for no more processing.
|
||||
@@ -1,208 +0,0 @@
|
||||
------
|
||||
Commit Periodically Use Case
|
||||
------
|
||||
Dave Syer
|
||||
------
|
||||
January 2007
|
||||
|
||||
Use Case: Commit Batch Process Periodically
|
||||
|
||||
* Goal
|
||||
|
||||
Read a file line-by-line and process into database inserts, for
|
||||
example using the Jdbc API. Commit periodically, and if there is a
|
||||
fault where the database transaction rolls back, then the file
|
||||
reader is reset to the place it was after the last successful
|
||||
commit.
|
||||
|
||||
To develop a batch process to achieve the goal above should be as
|
||||
simple a process as possible. The more that can be done with simple
|
||||
POJOs and Spring configuration the better.
|
||||
|
||||
* Scope
|
||||
|
||||
To keep things simple for now, assume that:
|
||||
|
||||
* All lines in the input file are in the same format and each line
|
||||
generates a single database insert (or a fixed number).
|
||||
|
||||
* The file is read synchronously by a single consumer.
|
||||
|
||||
* Preconditions
|
||||
|
||||
* A file exists in the right format, with a sufficiently large
|
||||
number of lines to be realistic.
|
||||
|
||||
* A mechanism exists to force a rollback at a non-trivial position
|
||||
(not during the first commit), but produce a successful operation
|
||||
on the second try.
|
||||
|
||||
* A framework for retry exists, so that the case above can be
|
||||
tested.
|
||||
|
||||
* Success
|
||||
|
||||
Integration test confirms that
|
||||
|
||||
* All data are processed and records inserted successfully.
|
||||
|
||||
* When a rollback occurs and the retry is successful, the complete
|
||||
dataset is processed (same result as successful run).
|
||||
|
||||
* Batch operations can be implemented without framework code (or
|
||||
with minimal dependencies, e.g. through interfaces). Launching
|
||||
the batch might require access to framework code.
|
||||
|
||||
* Description
|
||||
|
||||
The vanilla successful batch use case proceeds as follows:
|
||||
|
||||
[[1]] Container starts a transaction.
|
||||
|
||||
[[1]] Container makes resources available, e.g. opens file and
|
||||
creates <<<FileChannel>>> for it.
|
||||
|
||||
[[1]] Client reads a line from the file, and converts it to a
|
||||
database statement, then runs it.
|
||||
|
||||
[[1]] Container increments counter.
|
||||
|
||||
[[1]] Repeat previous two steps until a counter is equal to chunk
|
||||
size.
|
||||
|
||||
[[1]] Container commits database transaction.
|
||||
|
||||
[[1]] Repeat chunk processing until input source is exhausted.
|
||||
|
||||
* Variations
|
||||
|
||||
** Non-fatal Chunk Failure
|
||||
|
||||
If there is an unrecoverable database exception during execution of
|
||||
client code:
|
||||
|
||||
[[1]] Container rolls back current transaction.
|
||||
|
||||
[[1]] Container resets input source to the point it was at before
|
||||
failure.
|
||||
|
||||
[[1]] Container retries chunk.
|
||||
|
||||
** Fatal Chunk Failure
|
||||
|
||||
If there is an error in the input data in the middle of a chunk
|
||||
(could be manifested as database exception, e.g. uniqueness
|
||||
exception, or nullable exception):
|
||||
|
||||
[[1]] Container rolls back current transaction.
|
||||
|
||||
[[1]] Container terminates batch and notifies client of precise
|
||||
details, including the line number of error, and the last line
|
||||
that was committed (last of the previous chunk).
|
||||
|
||||
There is no need to reset the input source because the error is
|
||||
fatal.
|
||||
|
||||
To restart:
|
||||
|
||||
[[1]] Operator truncates the input file so the completed chunks
|
||||
are not repeated.
|
||||
|
||||
[[1]] Operator fixes bad line (if there was one), and starts the
|
||||
batch process wit hthe same parameters.
|
||||
|
||||
Variations on this theme are also necessary, e.g. a tolerance for a
|
||||
small number of bad records in the input data.
|
||||
|
||||
* Implementation
|
||||
|
||||
* The concept of a batch iterator seems relevant here (see also the
|
||||
{{{./simple.html}simple}} use case). The iterator could be more than
|
||||
just a loop that might terminate early: here it could also manage
|
||||
the file cursor on the input source. In this design there is a
|
||||
<<<ItemReader>>> interface that can take care of termination and
|
||||
iteration (e.g. iterator-like method signatures).
|
||||
|
||||
* Another design idea (more encapsulated and more in keeping with
|
||||
existing Spring practice) is to make the data source transaction
|
||||
aware, and for the client use it like a database resource, through a
|
||||
template. In this case there is a <<<FileInputTemplate>>>. The
|
||||
<<<ItemReader>>> needs to be aware of the data source template, so
|
||||
that it can terminate when the data is exhausted.
|
||||
|
||||
In this version of events there are two kinds of resource in play.
|
||||
The transaction itself, and the data sources that are aware of the
|
||||
transaction. The comparison with <<<DataSourceTransactionManager>>>
|
||||
and <<<JdbcTemplate>>> is obvious. The client is often completely
|
||||
unaware of the transaction manager, which is applied through an
|
||||
interceptor, whereas the data source is used explicitly with its own
|
||||
API through a template. The Client can concentrate on his domain,
|
||||
and not be concerned with infrastructure or resource handling.
|
||||
|
||||
* The analogy with <<<JmsTemplate>>> is even stronger. If the input
|
||||
data came from JMS instead of a file, we would hardly have to do
|
||||
anything to implement very robust chunking. JMS is the obvious best
|
||||
practice and already provides all the transactional semantics we
|
||||
need for chunking - simply roll back a transaction and the records
|
||||
processed return to the message system for delivery to the next
|
||||
consumer. Bad records can be sent to a bad message queue for
|
||||
independent processing. JMS might ssem like overkill for a lot of
|
||||
batch processes, but it is tempting to say that if the robustness is
|
||||
needed then the we should take that as a sign that installing and
|
||||
configuring JMS is worth the extra effort.
|
||||
|
||||
* Naturally we do not want to insist that the client code is aware
|
||||
of the transaction that is surrounding it - this would be the normal
|
||||
practice familiar from the Spring programming model. Should a
|
||||
client need access to transaction-scoped resources, the usual way to
|
||||
do that is to wrap the transactional resource (data source etc.) in
|
||||
a proxy that uses a synchronization, or a more generic thread-bound
|
||||
resource (using <<<TransactionSynchronizationManager>>>). The aim
|
||||
is to retain this separation in a batch operation. The batch
|
||||
framework itself might provide some of these synchronizations.
|
||||
|
||||
* The {{{./simple.html}Simple Batch Repeat}} is actually a pretty good
|
||||
model for the chunk processing in this use case. This observation
|
||||
leads to another: that a batch of chunks is a nested (or composed)
|
||||
batch - the outer termination policy is dependent only on the data
|
||||
source having further records to process, the inner one is a simple
|
||||
iterator (with a check for empty data). A simplified programming
|
||||
model for this is
|
||||
|
||||
+---
|
||||
RepeatCallback chunkCallback = new RepeatCallback() {
|
||||
|
||||
public boolean doInIteration(RepeatContext context) {
|
||||
|
||||
int count = 0;
|
||||
|
||||
do {
|
||||
|
||||
Object result = callback.doWithRepeat(context);
|
||||
|
||||
} while (result!=null && count++<chunkSize);
|
||||
|
||||
return result!=null;
|
||||
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
batchTemplate.iterate(chunkCallback);
|
||||
|
||||
+---
|
||||
|
||||
The transaction boundary is demarcated at the chunk level
|
||||
(<<<chunkCallback.doWithRepeat()>>>). The termination policy depends
|
||||
only on a data source eventually returning null.
|
||||
|
||||
* N.B. the chunkSize can be dynamic. E.g., if the chunk is long
|
||||
during a nightime batch window, and short when the window is over,
|
||||
in case the batch has to be terminated.
|
||||
|
||||
* Chunking can also be implemented simply in an
|
||||
<<<ItemHandler>>>. The handler just buffers records up to a
|
||||
chunk size, and then executes them all in one step (which might be
|
||||
transactional). This is easier to implement, and easier to
|
||||
configure for the clients, but cannot easily be made both concurrent
|
||||
and transactional.
|
||||
@@ -1,89 +0,0 @@
|
||||
------
|
||||
Copy File to File
|
||||
------
|
||||
Dave Syer
|
||||
------
|
||||
January 2007
|
||||
|
||||
Use Case: Copy File to File
|
||||
|
||||
* Goal
|
||||
|
||||
Read a file line-by-line and process into a file in a different
|
||||
format (possibly different number of lines). Commit periodically
|
||||
and in the event of an error both data sources (input and output)
|
||||
rollback to the last known good point.
|
||||
|
||||
* Scope
|
||||
|
||||
To keep things simple for now, assume that:
|
||||
|
||||
* All lines in the file are in the same format and the final
|
||||
output is an aggregate.
|
||||
|
||||
* The files are read and written synchronously by a single
|
||||
consumer.
|
||||
|
||||
* This use case requires two kinds of transactional file source.
|
||||
One is read-only and the other is write-only. Only one consumer
|
||||
can use the write-only source at a time.
|
||||
|
||||
* Preconditions
|
||||
|
||||
* An input file exists in the right format, with a sufficiently
|
||||
large number of lines to be realistic.
|
||||
|
||||
* Success
|
||||
|
||||
Integration test confirms that
|
||||
|
||||
* All data are processed and output produced successfully.
|
||||
|
||||
* Description
|
||||
|
||||
Very similar to the use case {{{./chunks.html}Copy File to
|
||||
Database}}, but involving transactional access to an output source
|
||||
which is a file. Also we are introducing the idea of an aggregate
|
||||
function for the output.
|
||||
|
||||
The vanilla successful case proceeds as in the file to database
|
||||
version, except that:
|
||||
|
||||
[[1]] A successful chunk results in a line in an intermediate file
|
||||
output source.
|
||||
|
||||
[[1]] After all chunks are successfully processed the intermediate
|
||||
file is itself processed in a single transaction to complete the
|
||||
aggregate. The output is itself sent to an output channel
|
||||
(e.g. database or file).
|
||||
|
||||
* Variations
|
||||
|
||||
* Chunk failure variations proceed as in the use case
|
||||
{{{./chunks.html}Copy File to Database}}. In the case of a
|
||||
restart after fatal failure, the intermediate output file need does
|
||||
not need to be reset or re-created.
|
||||
|
||||
* Implementation
|
||||
|
||||
* The write-only file source is new in this use case. It has a
|
||||
similar flavour to the read-only version, but also has more serious
|
||||
implications for implementation and usage. Since a file system is
|
||||
not inherently transactional, when we create the write-only data
|
||||
source we are assuming that consumers will play by the rules,
|
||||
principally that there is only one consumer at a time.
|
||||
|
||||
* With some external limitations the write-only file source can be
|
||||
implemented so that within a single JVM it will behave like a
|
||||
transactional database datasource. We can provide a
|
||||
<<<FlatFileItemWriter>>> that hides the resource acquisition and
|
||||
release, and interacts with an existing transaction to provide the
|
||||
transactional behaviour that is required.
|
||||
|
||||
* File-based transactional resources are a lot like messaging
|
||||
clients. We can send a message (write a line) through a sender
|
||||
client, and receive a message (read a line) through a consumer
|
||||
client. In the case of a transaction rollback, all sent messages
|
||||
are guaranteed not to reach consumers, and all received messages are
|
||||
returned to the queue. Maybe ActiveMQ has a file transport already?
|
||||
Mule definitely does, but it isn't transactional.
|
||||
@@ -1,113 +0,0 @@
|
||||
------
|
||||
Use Cases
|
||||
------
|
||||
Dave Syer
|
||||
------
|
||||
January 2007
|
||||
|
||||
Use Cases for Spring Batch
|
||||
|
||||
These are more like scenarios or flows than real use cases in formal
|
||||
UML terms, but they serve a useful purpose as both. We don't want
|
||||
to be over formal, and probably code is being written and tested at
|
||||
the same time as these use cases. But there are many stakeholders
|
||||
in this project, and use cases are a useful resource to make sure
|
||||
they are all agreed on scope and certain implementation details.
|
||||
|
||||
* {{{./simple.html}Simple Batch Repeat}}
|
||||
|
||||
* {{{./retry.html}Automatic Retry After Failure}}
|
||||
|
||||
* {{{./chunks.html}Commit Batch Process Periodically}}: chunk
|
||||
processing.
|
||||
|
||||
* {{{./async.html}Asynchronous Chunk Processing}}: parallel
|
||||
processing within a chunk.
|
||||
|
||||
* {{{./file-to-file.html}Copy File to File in a Batch}}
|
||||
|
||||
* {{{./parallel.html}Massively Parallel Batch Processing}}. Spring
|
||||
Batch 1.0 does not contain any implementations of this use case,
|
||||
but it is quite feasible to implement them using the framework as
|
||||
a starting point. 1.1 has some prototype code under the Integration
|
||||
module.
|
||||
|
||||
* {{{./restart.html}Manual Restart After Failure}}
|
||||
|
||||
* {{{./steps.html}Sequential Processing of Dependent Steps}}
|
||||
|
||||
* {{{./partial.html}Partial Processing}}: skip records (e.g. on rollback).
|
||||
|
||||
* Whole-Batch Transaction - transactional support for the whole
|
||||
batch, not just chunks. Quite a common requirement, but not
|
||||
always practical using normal transaction support. May require a
|
||||
staging area, and a decision after it is full about whether to
|
||||
copy it in one big batch (e.g. using native database tools) or
|
||||
chunk it (e.g. if it is now in a form for which chunk failure is
|
||||
easier to deal with).
|
||||
|
||||
* {{{./scheduled.html}Scheduled Processing}}: Batch Jobs controlled
|
||||
by scheduler (e.g. start, stop, suspend, kill). Spring Batch does
|
||||
not intend to implement the scheduler concerns, but needs
|
||||
to provide enough information that a scheduler can act
|
||||
appropriately.
|
||||
|
||||
* Non-Sequential Processing of Steps (Conditional Branching)
|
||||
|
||||
* {{{./pause.html}Pause and Resume Job Execution}}
|
||||
|
||||
|
||||
* Actors
|
||||
|
||||
The following actors are involved in the use cases (Container and
|
||||
Client being the most common / important).
|
||||
|
||||
** Client or Business Domain
|
||||
|
||||
Code written by the batch developer.
|
||||
|
||||
One aim us that the client is a POJO - the batch behaviour, boundary
|
||||
conditions, transactions etc. can be dealt with by the Container in
|
||||
such as way that the client does not need to know about them. The
|
||||
client may have access to framework abstractions, like templated
|
||||
data sources (<<<JdbcTemplate>>> etc.), but these should work the
|
||||
same whether they are in a batch or not.
|
||||
|
||||
** Container
|
||||
|
||||
An application that converts user requests for batch jobs into
|
||||
running processes. Container concerns are robustness, traceability,
|
||||
manageability.
|
||||
|
||||
** Framework
|
||||
|
||||
The Framework is the infrastructure code that the Container depends
|
||||
on, and possibly spi implementations where knowledge of the
|
||||
non-business logic resides.
|
||||
|
||||
The Framework provides two kinds of infrastruture (as per usual
|
||||
Spring cornerstones <AOP> and <Portable Service Abstractions>):
|
||||
|
||||
* For cross-cutting concerns there are interceptors that can be
|
||||
wrapped around client code without it needing any knowledge of the
|
||||
Framework at all. An existing parallel is with transaction
|
||||
support - the client code can use <<<TransactionTemplate>>>
|
||||
directly, but does not always need to.
|
||||
|
||||
* Concrete abstractions that allow access to resources in a
|
||||
uniform way without needing to know the details of how they are
|
||||
provided (e.g. partitioned). Client code can use these
|
||||
abstractions like it would a use a <<<DaoSupport>>>.
|
||||
|
||||
** Operator
|
||||
|
||||
The batch operator is not a developer. Tools are provided for the
|
||||
Operator to be able to stop and start a batch, and to monitor the
|
||||
progress and status of on ongoing or finished batch.
|
||||
|
||||
** Business User
|
||||
|
||||
The Operator has technical skills, e.g. a member of an application
|
||||
support team, but may need help with business-related decisions.
|
||||
For instance if input data are bad, he would not expect to be able
|
||||
to fix them alone because they might be bad for a business reason.
|
||||
@@ -1,278 +0,0 @@
|
||||
------
|
||||
Parallel Processing Use Case
|
||||
------
|
||||
Dave Syer
|
||||
------
|
||||
January 2007
|
||||
|
||||
Use Case: Massively Parallel Batch Processing
|
||||
|
||||
* Goal
|
||||
|
||||
Support efficient processing of really large batch jobs (100K -
|
||||
1000K records) through parallel processing, across multiple
|
||||
processes or physical or virtual machines. The goals of other use
|
||||
cases should not be compromised, e.g. we need to be able to start
|
||||
and stop a batch job easily (for non developer), and trace the
|
||||
progress and failure points of a batch. The client code should not
|
||||
be aware of whether the processing is parallel or serial.
|
||||
|
||||
* Scope
|
||||
|
||||
* Any batch operation that reads data item-by-item from an input
|
||||
source is capable of being scaled up by parallelizing.
|
||||
|
||||
* The initial implementation might concentrate on multiple threads
|
||||
in a single process. Ultimately we need to be able to support
|
||||
multiple processes each one running in an application server
|
||||
(e.g. so that jobs that require EJBs can be used).
|
||||
|
||||
* Preconditions
|
||||
|
||||
* A data source with multiple chunks (commitable units) - more chunks
|
||||
than parallel processes.
|
||||
|
||||
* A way for the framework to launch parallel processes.
|
||||
|
||||
* Success
|
||||
|
||||
* A batch completes successfully, and the results are verified.
|
||||
|
||||
* A batch fails in one of the nodes, and when restarted processes
|
||||
the remaining records.
|
||||
|
||||
* Description
|
||||
|
||||
[[1]] Framework splits input data into partitions.
|
||||
|
||||
[[1]] Framework sends input data (or references to them) to
|
||||
processing nodes.
|
||||
|
||||
[[1]] Processing nodes act independently, converting the input data
|
||||
and sending it transactionally to output source (as per normal
|
||||
single process batch).
|
||||
|
||||
[[1]] Framework collects status data from individual nodes for
|
||||
reporting and auditing.
|
||||
|
||||
[[1]] When all nodes are complete Framework decides that batch is
|
||||
complete finishes processing.
|
||||
|
||||
* Variations
|
||||
|
||||
Two failure cases can be distinguished, bad input data on a node and
|
||||
an internal node failure have different implications for how to
|
||||
proceed. In both cases, however
|
||||
|
||||
[[1]] Framework catches exception and classifies it. Rolls back
|
||||
current transaction to preserve state of data (input and output).
|
||||
|
||||
[[1]] Framework saves state for restart from last known good
|
||||
point, including a pointer to the next input record.
|
||||
|
||||
Then if a processing node detects bad data in the input source, it
|
||||
cannot be restarted or re-distributed because the data need to be
|
||||
modified for a successful outcome.
|
||||
|
||||
[[1]] Framework alerts Operator of the location and nature of the
|
||||
failure.
|
||||
|
||||
[[1]] Operator waits for batch to finish - the overall status will
|
||||
be a failure, but most of the data might be consumed.
|
||||
|
||||
[[1]] Operator fixes problem and restarts batch.
|
||||
|
||||
[[1]] Framework does not re-process data that has already been
|
||||
processed successfully. The parallel processing nodes are used as
|
||||
before.
|
||||
|
||||
[[1]] Batch completes normally.
|
||||
|
||||
If a processing node fails unrecoverably (e.g. after retry timeout),
|
||||
but with no indication that the input data were bad, then the data
|
||||
can be re-used: Framework returns unprocessed input data, and
|
||||
redistributes it to other nodes.
|
||||
|
||||
* Implementation
|
||||
|
||||
* There are actually two approaches to this problem, which are
|
||||
largely complementary.
|
||||
|
||||
[[1]] The <Chunking> model dynamically assigned chunks of items to
|
||||
be processed and sends them to durable middleware. Worker
|
||||
processes pick them up and process them, sending back a message
|
||||
about the status. This approach works best if the dispatching is
|
||||
efficient compared to the processing.
|
||||
|
||||
[[1]] The <Partitioning> approach is more like running multiple
|
||||
jobs in parallel, with input data partitioned into larger pieces,
|
||||
and not split any further by the dispatcher. The item reading
|
||||
happens in the worker processes. This approach is necessary if
|
||||
the dispatcher in the <Chunking> model becomes a bottle neck.
|
||||
|
||||
Generally, chunking is easier to implement than partitioning, but
|
||||
there are tools available for implementing both patterns
|
||||
efficiently.
|
||||
|
||||
** Chunking
|
||||
|
||||
The messages from a dispatcher to worker processes consist of a
|
||||
chunk of items - a set of items to be processed together in a single
|
||||
transaction (or as the worker sees fit). The dispatcher is usually
|
||||
single threaded, but this is only a restriction based on the input
|
||||
data type (if it is a file it is difficult to read in parallel and
|
||||
maintain restartability). Using a process indicator the dispatcher
|
||||
could be reading from a database table in a multi-threaded model.
|
||||
|
||||
The main restriction is that for restartability the messages between
|
||||
the dispatcher and workers has to be durable (i.e. JMS or
|
||||
equivalent). If there is a durable middleware there are no in
|
||||
principle difficulties with this approach.
|
||||
|
||||
The practicalities deserve some discussion. In particular the
|
||||
dispatcher has to co-ordinate asynchronous replies from its workers,
|
||||
and also has to avoid overwhelming the workers (so there should be
|
||||
some throttling). As long as the middleware is durable the
|
||||
dispatcher can simply wait for replies whenever it thinks there are
|
||||
workers working. It needs to record this expectation in a durable
|
||||
form as well, as part of an <<<ExecutionContext>>> for the step.
|
||||
|
||||
** Partitioning
|
||||
|
||||
The hard thing about this use case is the partitioning of input (and
|
||||
output) sources. Ideally, this has to be done in such a way that
|
||||
the individual operations are unaware that they are participating in
|
||||
a batch farm. Partitioning has to be at least partially
|
||||
deterministic because restarts have to be able to ignore data that
|
||||
have already been processed successfully.
|
||||
|
||||
Consider two examples: a file input source and a JDBC (SQL query)
|
||||
based input source. Each provides its own challenges.
|
||||
|
||||
*** File Data Source
|
||||
|
||||
* If each node reads the whole file there could be a performance
|
||||
issue. They would all need to have instructions about which lines
|
||||
to process.
|
||||
|
||||
* If each record of input data is a line, this isn't so bad. Each
|
||||
node can have a range of line numbers to process. The only problem
|
||||
is knowing how many lines there are, and how many nodes, so that the
|
||||
job can be partitionaed efficiently.
|
||||
|
||||
* But if each input record can span a variable number of lines (not
|
||||
that unlikely in practice), then we can't use line numbers
|
||||
|
||||
* Maybe the best solution is to use middleware anyway. A single
|
||||
process parses the file and sends it to a message queue, item by
|
||||
item (or chunk by chunk). The integration pattern could then be a
|
||||
simple Eager Consumer, assuming that all records are processed
|
||||
independently. The messaging semantics would simply have to ensure
|
||||
that a consumer can roll back and return the input records to a
|
||||
queue for another consumer to retry.
|
||||
|
||||
* For large batches a real messaging infrastructure (JMS etc.) with
|
||||
guaranteed delivery would be a benefit, but might be seen as
|
||||
overkill for a system that didn't otherwise require it. In this
|
||||
case we could imagine the partitioning process being one of simply
|
||||
dividing the input file up into smaller files, which are then
|
||||
processed by individual nodes independently. The integration
|
||||
pattern is then different - more like a Router.
|
||||
|
||||
* What would parallel processing look like to the client? We can
|
||||
make it completely transparent if we assume that the client only
|
||||
ever implements <<<ItemReader>>> and <<<ItemWriter>>>. The
|
||||
client code is unaware of the partitioning of its data source.
|
||||
|
||||
* Parallelisation could also take place at the level of the
|
||||
<<<ItemReader>>> - we could proxy the data provider and wrap it in
|
||||
a partitioning proxy:
|
||||
|
||||
+---
|
||||
<bean id="itemReader"
|
||||
class="org.springframework.aop.framework.ProxyFactoryBean">
|
||||
|
||||
<property name="target">
|
||||
<bean class="test.input.TradeItemProvider">
|
||||
...
|
||||
</bean>
|
||||
</property>
|
||||
<property name="interceptorNames" value="partitioner"/>
|
||||
|
||||
</bean>
|
||||
|
||||
<bean id="partitioner"
|
||||
class="org.springframework.core.batch.support.provider.PartioningInterceptor">
|
||||
...
|
||||
</bean>
|
||||
|
||||
+---
|
||||
|
||||
*** SQL Data Source Partitioning
|
||||
|
||||
* If each node is allowed to do its own query or queries to
|
||||
determine the input data:
|
||||
|
||||
* Each node has to be given a way to narrow the query so that they
|
||||
don't all use the same data. There is no easy universal way to
|
||||
achieve this, and in the general case we have to know in advance
|
||||
when we are going to execute in a parallel or as a single process.
|
||||
Maybe a range of primary keys would work as a special case that we
|
||||
could support as a strategy.
|
||||
|
||||
* Maybe we could assume that all nodes execute precisely the same
|
||||
query, and then provide a way to add a cursor to the result set,
|
||||
so it can be treated a bit more like a file.
|
||||
|
||||
* We might be forced to use a distributed transaction to ensure
|
||||
that all the nodes see the same data. This would be unfortunate,
|
||||
but possibly necessary. It would be up to the client to configure
|
||||
distributed transactions if that was required, otherwise the
|
||||
result might be unpredictable if data can be added to an input
|
||||
source while it is being read.
|
||||
|
||||
* If only one query is done by the Framework and the results shared
|
||||
out amongst the nodes we face the issue of how to send the data
|
||||
between nodes. Performance problems might ensue. Plus (more
|
||||
seriously) the individual nodes would now need a different
|
||||
implementation if they were acting in a parallel cluster to the
|
||||
vanilla serial processing case - a single node would do the query
|
||||
and work directly with the results, whereas in a parallel
|
||||
environment it would be one step removed from the actual query.
|
||||
This breaks our encapsulation design goal.
|
||||
|
||||
* When considering the approach to partitioning the data source
|
||||
we should follow closely the discussion above on partitioning a file
|
||||
input source. If the client is to remain unaware of the batch
|
||||
parameters, then an interceptor looks like the best approach.
|
||||
|
||||
If each node prefers to do its own query then an interceptor would
|
||||
have to catch the call to a JDBC template and modify the query
|
||||
dynamically. This is quite a scary thing to be doing - it might end
|
||||
up with us needing to parse the SQL and add where clauses. Maybe a
|
||||
client should be forced to specify (in the case of a parallel batch)
|
||||
how his query should be partitioned. For example:
|
||||
|
||||
+---
|
||||
<bean id="inputSource"
|
||||
class="test.input.SqlInputItemReader">
|
||||
|
||||
<property name="query">
|
||||
<value>SELECT * from T_INPUT</value>
|
||||
</property>
|
||||
|
||||
<property name="partitionQuery">
|
||||
<value>SELECT * from T_INPUT where ID>=? and ID<?</value>
|
||||
</property>
|
||||
|
||||
</bean>
|
||||
+---
|
||||
|
||||
It would be an error to run a batch in parallel if the partition
|
||||
query had not been provided.
|
||||
|
||||
* What happens if the data source changes between failed execution
|
||||
and restart? We can't legislate for that because it is outside the
|
||||
realm of what can be controlled through a transaction. A restart
|
||||
might produce different results than the original failed batch would
|
||||
have done were it successful.
|
||||
@@ -1,154 +0,0 @@
|
||||
------
|
||||
Partial Processing Use Case
|
||||
------
|
||||
Dave Syer
|
||||
------
|
||||
January 2007
|
||||
|
||||
Partial Processing
|
||||
|
||||
* Goal
|
||||
|
||||
Support partial processing of a batch, without having to interrupt
|
||||
or manually restart, but enabling corrective action to be taken
|
||||
after the process has finished to complete the processing of failed
|
||||
records. A batch that is going to fail completely can be be
|
||||
identified as soon as possible, but one which is substantially
|
||||
alright can run as far as possible to prevent costly duplication.
|
||||
Records that are skipped are reported in such a way that they can be
|
||||
easily identified by the Operator and / or Business User and a new
|
||||
batch created to finish the original goal. By the same token, in
|
||||
the case of an aborted batch where a minority of records are
|
||||
processed successfully first time, it should be possible to identify
|
||||
the successful records and exclude them from data presented on
|
||||
restart.
|
||||
|
||||
* Scope
|
||||
|
||||
Any batch should be configurable to support partial processing.
|
||||
|
||||
* Preconditions
|
||||
|
||||
* A data source with a small number of bad records exists.
|
||||
|
||||
* Success
|
||||
|
||||
* A test data set with a small number of bad records is run through
|
||||
the batch processer and completes normally. Operator confirms
|
||||
that the good recirds are all processed and then fixes and
|
||||
resubmits the bad records, and confirms that they are also
|
||||
correctly processed with no duplicates.
|
||||
|
||||
* Description
|
||||
|
||||
The vanilla flow proceeds as follows:
|
||||
|
||||
[[1]] Batch processing begins as per normal (see for example
|
||||
{{{./chunks.apt}chunk processing use case}}).
|
||||
|
||||
[[1]] A record is processed. This step repeats until...
|
||||
|
||||
[[1]] Container detects a bad record, e.g. by catching a
|
||||
classified execption.
|
||||
|
||||
[[1]] Container logs the exception in a way that identifies the
|
||||
bad record easily and immediately to the Operator.
|
||||
|
||||
[[1]] Container stores an identifier for the bad record (or the
|
||||
whole record) in a location designated to the Operator for that
|
||||
purpose.
|
||||
|
||||
[[1]] Container determines that the batch can still succeed
|
||||
despite the cumulative number or nature of bad records - the bad
|
||||
record is skipped. Container goes back to normal processing, and
|
||||
eventually completes the whole batch.
|
||||
|
||||
* Variations
|
||||
|
||||
** Abort Batch Early
|
||||
|
||||
The batch cannot skip all records. After each failure the decision
|
||||
about whether to coninue has to be made:
|
||||
|
||||
[[1]] When a record is processed successfully, Container logs the
|
||||
event in a form that can be used later to identify successful
|
||||
records in case the batch is aborted.
|
||||
|
||||
[[1]] Container determines that a sufficiently large fraction of
|
||||
the records processed so far have failed. The faction relevant is
|
||||
to be specified through configuration meta data (not specified by
|
||||
business logic).
|
||||
|
||||
[[1]] Container aborts the batch with a clear signal to the
|
||||
Operator that it has aborted owing to an unacceptable number of
|
||||
errors.
|
||||
|
||||
* Implementation
|
||||
|
||||
* When the decision to abort is taken, Container may have
|
||||
successfully processed a small number of records and the
|
||||
corresponding transactions might have committed. Those records that
|
||||
were successfully processed on the first attempt are easy to
|
||||
exclude from the restart, if transactional semantics are respected
|
||||
by the item processing.
|
||||
|
||||
* The decision to abort is based on exception classification. Each
|
||||
time an item is processed, the framework needs to catch exceptions
|
||||
and classify them as
|
||||
|
||||
* fatal: signals an abort - rethrow.
|
||||
|
||||
* transient: nominally fatal, but the operation is retryable.
|
||||
|
||||
* non-fatal: signals a skip.
|
||||
|
||||
The transient failure is really just a sub-type of fatal case. It
|
||||
is treated differently by the {{{./retry.html}retry framework}} but
|
||||
not necessarily by the vanilla batch.
|
||||
|
||||
* Actually we can't decide what action to take simply on the
|
||||
evidence of the current exception. What we need to do is decide,
|
||||
potentially based on the whole history of exceptions in a given
|
||||
batch, whether the latest one should trigger an abort. E.g. a
|
||||
simple and sensible policy would be to abort if the total number of
|
||||
exceptions reaches a threshold, either absolute or relative to the
|
||||
number of items processed.
|
||||
|
||||
* So how does it look? In the template...
|
||||
|
||||
+---
|
||||
public void iterate(RepeatCallback callback) {
|
||||
|
||||
...
|
||||
|
||||
try {
|
||||
result = callback.doInIteration(context);
|
||||
} catch (Exception e) {
|
||||
handleException(e); // Maybe re-throw, maybe not...
|
||||
}
|
||||
|
||||
...
|
||||
|
||||
}
|
||||
+---
|
||||
|
||||
If the callback was transactional it has already rolled back. If
|
||||
the whole <<<iterate()>>> was transactional we need to rethrow
|
||||
|
||||
* If the processing is asynchronous, the template has to execute in
|
||||
a separate thread (see {{{./async.html}asynchronous example}}). In
|
||||
this case the whole thread (i.e. the <<<iterate()>>>) has to be
|
||||
transactional. Whoever is counting failed items needs to be
|
||||
poooling information from multiple threads.
|
||||
|
||||
* It may also be the role of the framework to translate exceptions
|
||||
into a batch-specific hierarchy. This is not the same concern as
|
||||
exception classification (as done for instance by the Spring Jdbc
|
||||
and Jms templates). Exception classification might also be of
|
||||
value, but the argument is not as clear cut as the existing core
|
||||
templates, where there is an underlying Jave EE API checked
|
||||
exception to convert. In the absence of a batch-specific exception
|
||||
hierarchy definition, we could choose to leave exception translation
|
||||
out of the batch framework.
|
||||
|
||||
|
||||
@@ -1,121 +0,0 @@
|
||||
------
|
||||
Pause Resume Use Case
|
||||
------
|
||||
Dave Syer
|
||||
------
|
||||
October 2008
|
||||
|
||||
Use Case: Pause and Resume Job Execution
|
||||
|
||||
* Goal
|
||||
|
||||
Allow a job to pause itself and await further instructions. A
|
||||
paused status indicates to a user that the job is waiting, either
|
||||
for a manual signal to proceed, or for a remote worker to finish
|
||||
doing something asynchronously. For instance, a job may require
|
||||
manual verification of business condition before continuing - a
|
||||
sanity check on critical data. Assume that a job execution could
|
||||
receive hundreds of resume signals, and this is a "normal"
|
||||
situation, so it does not create a horrible mess in the history of
|
||||
the execution - e.g. looking like hundreds of restarts.
|
||||
|
||||
* Scope
|
||||
|
||||
* The instruction to pause comes from processing logic, not from an
|
||||
external signal (like an interrupt). A variation where the signal
|
||||
comes from outside might be a useful extension, but isn't explicitly
|
||||
included here.
|
||||
|
||||
* Preconditions
|
||||
|
||||
* A job is configured and one of its components can send the signal to pause
|
||||
|
||||
* The launching interface has the ability to resume a paused job
|
||||
|
||||
* The execution meta data can be inspected to verify that a pause has occurred
|
||||
|
||||
* Success
|
||||
|
||||
* User launches job and verifies that it has paused at a certain point
|
||||
|
||||
* User resumes job and verifies that it completes successfully.
|
||||
|
||||
* The end state is indistinguishable from a successful completion of
|
||||
the job in one attempt
|
||||
|
||||
* Description
|
||||
|
||||
The vanilla successful case proceeds as follows:
|
||||
|
||||
[[1]] User launches a new job execution.
|
||||
|
||||
[[1]] Framework begins processing, and successfully executes one
|
||||
or more steps.
|
||||
|
||||
[[1]] At the end of a step Framework encounters condition that
|
||||
signals it should pause (e.g. a status flag).
|
||||
|
||||
[[1]] Framework gracefully exits the job execution, marking it as
|
||||
paused so that it can be identifed as such when asked to resume.
|
||||
Often the framework will also be configured to notify a user that
|
||||
the pause has occurred, so that some business condition can be
|
||||
verified manually.
|
||||
|
||||
[[1]] User requests the job execution be resumed.
|
||||
|
||||
[[1]] Framework picks up where it left off, ignoring steps that
|
||||
have already successfully executed and starting with the one after
|
||||
the pause.
|
||||
|
||||
[[1]] Job finishes processing and Framework marks it as
|
||||
sucessfully completed, just as if it hadn't paused in the first
|
||||
place.
|
||||
|
||||
* Variations
|
||||
|
||||
* The agent that causes the job to resume is not a User but a remote
|
||||
worker process.
|
||||
|
||||
* Two agents request a resume at the same time. One of them has to
|
||||
lose (an exception is acceptable).
|
||||
|
||||
* A step pauses in the middle of execution. The job picks it
|
||||
up and start where it left off, just like in a restart.
|
||||
|
||||
* More than one step was executing when the pause signal was
|
||||
detected. Framework allows steps that are executing in process to
|
||||
complete (or pause) before exiting the job execution.
|
||||
|
||||
* More than one step is in a paused state when the job resumes.
|
||||
Requires no special treatment from Framework: if those steps were
|
||||
active when the pause reached the job level on the last run, then
|
||||
they will be processed in the same way on a resume (presumably in
|
||||
multiple threads).
|
||||
|
||||
* Implementation
|
||||
|
||||
* A new <<<BatchStatus.PAUSED>>>.
|
||||
|
||||
* The <<<JobLauncher>>> interface may not need any more than it already has:
|
||||
|
||||
+---
|
||||
public interface JobLauncher {
|
||||
|
||||
public JobExecution run(Job job, JobParameters jobParameters) throws ....;
|
||||
|
||||
}
|
||||
+---
|
||||
|
||||
In the case that the last execution failed, we already pick up from
|
||||
where we left off with a new <<<JobExecution>>>. The only
|
||||
difference now is that we don't need a new <<<JobExecution>>>, so we
|
||||
have to be careful about concurrency - what happens if two agents
|
||||
try to resume the job at once. To be safe we can treat this the
|
||||
same way as a restart - lock the <<<JobExecution>>> table in the
|
||||
database by setting a TX isolation attribute on the
|
||||
<<<JobRepository>>>.
|
||||
|
||||
* When we resume we need to wind forward through the job execution
|
||||
and look at all step executions to see if they are active. Once the
|
||||
<<<JobExecution>>> has been identified the process should be no
|
||||
different to a restart.
|
||||
@@ -1,86 +0,0 @@
|
||||
------
|
||||
Restart Use Case
|
||||
------
|
||||
Dave Syer
|
||||
------
|
||||
January 2007
|
||||
|
||||
Use Case: Manual Restart After Failure
|
||||
|
||||
* Goal
|
||||
|
||||
Restart a failed or interrupted batch and have it pick up where it
|
||||
left off (within limits of transaction boundaries) to save time and
|
||||
resources. A key goal is that the management of the batch process
|
||||
(locating a job and its input and results, starting, scheduling,
|
||||
restarting) should be as easy as possible for a non-developer, like
|
||||
an application support team with some business back up.
|
||||
|
||||
* Scope
|
||||
|
||||
Any batch should be able to restart gracefully, even if (depending
|
||||
on chosen execution or client implementation) it might have to go
|
||||
right back to the beginning.
|
||||
|
||||
* Preconditions
|
||||
|
||||
* It is possible to identify exception conditions under which a
|
||||
restart will be able to carry on processing a batch from where it
|
||||
left off.
|
||||
|
||||
* There exists a persistent storage mechanism for the initial
|
||||
conditions.
|
||||
|
||||
* Success
|
||||
|
||||
* Force a batch to fail, and then fix the problem and restart. See
|
||||
successful completion with no duplicate results.
|
||||
|
||||
* Description
|
||||
|
||||
[[1]] A batch operation encounters an exception which forces the
|
||||
process to stop processing.
|
||||
|
||||
[[1]] Framework catches exception and classifies it.
|
||||
|
||||
[[1]] Framework logs event with enough information to identify the
|
||||
location of the job and the nature of the problem.
|
||||
|
||||
[[1]] Framework saves initial condition from last commit point, to
|
||||
enable restart to start from the last known good operation.
|
||||
|
||||
[[1]] Operator fixes problem (e.g. makes missing resource available,
|
||||
edits input file).
|
||||
|
||||
[[1]] Operator restarts batch.
|
||||
|
||||
[[1]] Framework loads initial conditions and continues processing.
|
||||
|
||||
* Variations
|
||||
|
||||
* Some restarts might lend themsleves to being handled automatically
|
||||
- see the use case {{{./retry.html}Automatic Retry}}.
|
||||
|
||||
* Implementation
|
||||
|
||||
* The saving of initial conditions needs to be strategised. In some
|
||||
cases saving a native serialization to a file will suffice. In
|
||||
others a database might be used, or some custom serialization
|
||||
(persist / rehydrate).
|
||||
|
||||
* The initial condition is naturally under control of the
|
||||
<<<ItemReader>>>. The client need not know about the persistence
|
||||
and rehydration. In fact explicit persistence and rehydration might
|
||||
be overkill - just relying on the transaction semantics might be
|
||||
adequate in a lot of cases. The <<<ItemReader>>> would have to be
|
||||
aware of the transactions, which we assume are normally demarcated
|
||||
in the <<<Step>>>. Since the point at which persistence
|
||||
is needed is tied to transaction commits, there may have to be some
|
||||
transaction synchronization.
|
||||
|
||||
* The persistence of initial conditions is a cross cutting concern.
|
||||
It may lend itself (along with the application of an execution
|
||||
handler generally) to being implemented as an aspect. Compare the
|
||||
<<<TransactionTemplate>>>, where the most common usage is via an
|
||||
interceptor, but occasionally the template is used directly by
|
||||
client code.
|
||||
@@ -1,279 +0,0 @@
|
||||
------
|
||||
Automatic Retry Use Case
|
||||
------
|
||||
Dave Syer
|
||||
------
|
||||
January 2007
|
||||
|
||||
Use Case: Automatic Retry
|
||||
|
||||
* Goal
|
||||
|
||||
Support automatic retry of an operation if it fails in certain
|
||||
pre-determined ways. Client code is not aware of the details of
|
||||
when and how many times to retry the operation, and various
|
||||
strategies for those details are available. The decision about
|
||||
whether to retry or abandon lies with the Framework, but is
|
||||
parameterisable through some retry meta data.
|
||||
|
||||
Retryable operations are usually transactional, but this can be
|
||||
provided by a normal transaction template or interceptor
|
||||
(transaction meta data are independent of the retry meta data).
|
||||
|
||||
* Scope
|
||||
|
||||
Any operation can be retried, but there are restrictions on nesting
|
||||
transactions (normally an inner transaction needs to be
|
||||
propagation=NESTED).
|
||||
|
||||
* Preconditions
|
||||
|
||||
An operation exists that can be forced to fail and is able to
|
||||
succeed on a retry.
|
||||
|
||||
* Success
|
||||
|
||||
* Verify that an operation fails and then succeeds on a retry.
|
||||
|
||||
* Verify that back off policy (time between retries) can be
|
||||
strategised without changing client code.
|
||||
|
||||
* Verify that the retry policy can be strategised, and can be used
|
||||
to change the number of retry attempts depending on the type of
|
||||
exception thrown in the retry block.
|
||||
|
||||
* Description
|
||||
|
||||
Successful retry proceeds as follows:
|
||||
|
||||
[[1]] Framework executes an operation provided by Client.
|
||||
|
||||
[[1]] The operation fails and Framework catches an exception,
|
||||
classified as retryable.
|
||||
|
||||
[[1]] Framework waits for a pre-defined back off period. The
|
||||
period is not be fixed, but is strategised so that different
|
||||
policies can be applied. The most common and useful policy is an
|
||||
exponentially increasing back off delay, with a ceiling.
|
||||
|
||||
[[1]] Framework repeats the operation.
|
||||
|
||||
[[1]] Processing is successful.
|
||||
|
||||
[[1]] Framework stores and / or logs statistics about the retry
|
||||
for management purposes. Details?
|
||||
|
||||
* Variations
|
||||
|
||||
The following variations are supported.
|
||||
|
||||
** Retry Failure
|
||||
|
||||
A retry can fail for a number of reasons. E.g. if the number of
|
||||
retries is too high, or there is a timeout, or an exception of
|
||||
another sort that cannot be classified as retryable.
|
||||
|
||||
[[1]] Last retry attempt fails and Framework determines that
|
||||
another retry is not permitted by the current policy.
|
||||
|
||||
[[1]] Framework records status for management purposes.
|
||||
|
||||
[[1]] Framework throws a recognisable exception?
|
||||
|
||||
[[1]] Control may return to client (if the exception was caught),
|
||||
or the processing may end.
|
||||
|
||||
** Transient and Non-transient Failures
|
||||
|
||||
We may wish to classify exceptions into (at least) three types, and
|
||||
vary the retry policy based on the classification:
|
||||
|
||||
* Transient failures come from resources that are external and may
|
||||
have independent lifecycles to the client process. Examples are
|
||||
database deadlock, network connectivity. It is always worth
|
||||
retrying on a transient failure, and normally we can keep retrying
|
||||
(if not forever then for a very long time), in the belief that
|
||||
eventually the resource will become available again.
|
||||
|
||||
* Non-transient failures can be retried a few times. This is the
|
||||
default.
|
||||
|
||||
* Non-retryable failures like a configuration or input data error
|
||||
should not be retried (they will always fail the same way).
|
||||
|
||||
** Early Termination
|
||||
|
||||
Normally client code is unaware of the Framework, but occasionally
|
||||
emergency measures might be taken inside client code where all
|
||||
further retry attempts are vetoed for the current block.
|
||||
|
||||
** Stateful Retry
|
||||
|
||||
A stateful (or external) retry is used to force a roll back of an
|
||||
external message (or other data) resource, so that the message will
|
||||
be re-delivered. The implementation has to be stateful so it can
|
||||
remember the context for the failed message next time it is
|
||||
delivered. The additional features of a stateful retry, as opposed
|
||||
to a normal rollback, are that:
|
||||
|
||||
* A message can be retried indefinitely or up to a set number of
|
||||
times, after which an error processing route is taken.
|
||||
|
||||
* A back-off delay is used at the <beginning> of the retry
|
||||
before any other transactional resources are enlisted.
|
||||
|
||||
* {Implementation}
|
||||
|
||||
* The vanilla case and most of the variations can be achieved with a
|
||||
simple template approach:
|
||||
|
||||
+---
|
||||
RetryTemplate retryTemplate = new RetryTemplate();
|
||||
retryTemplate.setRetryPolicy(new SimpleRetryPolicy(5));
|
||||
Object result = retryTemplate.execute(new RetryCallback() {
|
||||
public Object doWithRetry(RetryContext context) throws Throwable {
|
||||
// do some processing
|
||||
return result;
|
||||
}
|
||||
});
|
||||
+---
|
||||
|
||||
* Schematically we can represent the implementation of the [retry}
|
||||
template as follows:
|
||||
|
||||
+---
|
||||
1 | TRY {
|
||||
1.1 | do something;
|
||||
2 | } FAIL {
|
||||
2.1 | if (retry limit reached) {
|
||||
2.2 | rethrow exception;
|
||||
| } else {
|
||||
2.3 | TRY(1) again;
|
||||
| }
|
||||
| }
|
||||
+---
|
||||
|
||||
* The template has policies for back off and retry (whether or not
|
||||
to retry the last exception). The example above shows the retry
|
||||
policy being set to simply retry all exceptions up to a limit of 5
|
||||
times.
|
||||
|
||||
* The <<<RetryContext>>> has an API that allows clients to override
|
||||
the retry policy. The context can also be accessed as a thread
|
||||
local from a static convenience class, in the case that the callback
|
||||
is implemented as a wrapper around a POJO.
|
||||
|
||||
* External retry is the most difficult variation to implement, and
|
||||
doesn't fit naturally into the template model above. Two things
|
||||
depend on the retry count - back-off delay and the decision to
|
||||
follow the recovery path - so it needs to be available at the
|
||||
beginning of every processing block.
|
||||
|
||||
We will discuss the implementation from a JMS-flavoured viewpoint,
|
||||
where the current item being processed is a message. This can be
|
||||
generalised to more generic data types, as long as the item can be
|
||||
rejected transactionally to signal that we require it to be
|
||||
re-delivered to this or another consumer.
|
||||
|
||||
Consider this pattern, which is very typical:
|
||||
|
||||
+---
|
||||
1 | SESSION {
|
||||
2 | receive;
|
||||
3 | RETRY {
|
||||
| remote access;
|
||||
| }
|
||||
| }
|
||||
+---
|
||||
|
||||
A <<<RetryTemplate>>> is responsible for the RETRY(3) block. But
|
||||
we can't put the same wrapper around the whole process:
|
||||
|
||||
+---
|
||||
0 | RETRY { // Do not do this!
|
||||
1 | SESSION {
|
||||
2 | receive;
|
||||
3 | RETRY {
|
||||
| remote access;
|
||||
| }
|
||||
| }
|
||||
| }
|
||||
+---
|
||||
|
||||
because the receive(2) might not get the same message back on the
|
||||
second and subsequent attempts (another consumer might get it, or it
|
||||
might come out of order). So external retry has a different flow -
|
||||
it might be a different implementation of the same interface, or a
|
||||
different parameterisation of the normal retry template.
|
||||
|
||||
We can break down the implementation of an external retry into steps
|
||||
as follows:
|
||||
|
||||
+---
|
||||
1 | SESSION {
|
||||
2 | receive;
|
||||
3 | TRY {
|
||||
3.1 | if (already processed) {
|
||||
3.2 | backoff;
|
||||
| }
|
||||
4 | RETRY {
|
||||
| remote access;
|
||||
| }
|
||||
5 | } FAIL {
|
||||
5.1 | if (retry limit reached) {
|
||||
5.2 | recover;
|
||||
| } else {
|
||||
5.3 | rethrow exception;
|
||||
| }
|
||||
| }
|
||||
| }
|
||||
+---
|
||||
|
||||
Decisions (3.1) and (5.1) require knowledge of the history of
|
||||
processing the current message. Note that the action on failure is
|
||||
the opposite to the vanilla case {{{Implementation}above}} - if the retry
|
||||
limit is not reached then we rethrow the exception.
|
||||
|
||||
If the retry limit is not reached then the rethrow(5.3) causes the
|
||||
SESSION(1) to roll back, and the message will be re-delivered.
|
||||
RETRY(4) is a normal retry with a template.
|
||||
|
||||
The retry logic is easy to implement - the hard bit is that the
|
||||
policies depend on the history of the message. This requires some
|
||||
special retry and back off policies that are aware of the history:
|
||||
|
||||
* When a message arrives, at the beginning of the TRY(3) above, we
|
||||
need to update our knowledge of its history.
|
||||
|
||||
* The backoff policy can decide whether to back off immediately
|
||||
when it is initialized at step (3.1).
|
||||
|
||||
* The retry decision at (5.1) has to be aware of the history as
|
||||
well as some simple exception classification rules.
|
||||
|
||||
* If the retry cannot proceed the retry policy can take steps to
|
||||
recover (5.2), e.g. send the current message to an error queue.
|
||||
The exception should not propagate in this case.
|
||||
|
||||
* If we fail and rethrow (5.3), then we need to store the
|
||||
knowledge of the message history somewhere where another consumer
|
||||
can access it.
|
||||
|
||||
There is a small conundrum about what value to return from the
|
||||
TRY(3) block if it ultimately fails (5.2) - a normal retry never
|
||||
completes unless it is successful, but an external retry can
|
||||
complete if it is unsuccessful. The obvious choice is to return
|
||||
null. It probably won't matter in a messaging application anyway
|
||||
because the client of the retry block probably isn't expecting
|
||||
anything. It may matter if the TRY(3) block is part of a batch
|
||||
because the batch template uses null as a signal that the current
|
||||
batch is complete. But on the other hand it might be a good
|
||||
strategy to close the batch if processing a message fails.
|
||||
|
||||
With JMS there is no indication in the <<<Message>>> how many times
|
||||
it has been rejected - only a flag <<<getJMSRedelivered>>> to show
|
||||
that it has failed at least once. To count the number of retries,
|
||||
we have to store a global map of messages (ids) to retry counts
|
||||
(within a single VM - for more than one OS process each one has to
|
||||
be independent).
|
||||
|
||||
@@ -1,42 +0,0 @@
|
||||
------
|
||||
Scheduler Managed Use Case
|
||||
------
|
||||
Wayne Lund, Dave Syer
|
||||
------
|
||||
May 2007
|
||||
|
||||
Use Case: Scheduler Managed Processing
|
||||
|
||||
* Goal
|
||||
|
||||
Ensure that an Enterprise Scheduler can interact with the Batch Launcher to start, stop,
|
||||
suspend and/or kill a batch job.
|
||||
|
||||
* Scope
|
||||
|
||||
* Batch jobs tends to run within carefully planned job stream
|
||||
schedules. At a minimum this requires an integration between the
|
||||
Batch Launcher (in the abstract) and the scheduler's control
|
||||
mechanism to start and stop batch jobs and then to understand the
|
||||
results of the batch job execution (e.g. COMPLETED, ABENDED, etc.)
|
||||
so that subsequent actions may be taken.
|
||||
|
||||
* Spring Batch does not aim to implement the scheduling concerns as
|
||||
such (other tools are available for that). The framework, does need
|
||||
to provide the information that such tools need to decide when to
|
||||
act and what to do (e.g. exit code mapping).
|
||||
|
||||
* Preconditions
|
||||
|
||||
* A mechanism has been established for the scheduler to launch a batch job. This is often times
|
||||
a simple unix or dos shell script.
|
||||
|
||||
* A mapping of exit codes to the error code numbers that the scheduler is expecting on the exiting
|
||||
of a batch job.
|
||||
|
||||
* Success
|
||||
|
||||
* Batch Jobs are launched and managed by scheduler
|
||||
|
||||
* Description
|
||||
|
||||
@@ -1,290 +0,0 @@
|
||||
------
|
||||
Simple Batch Repeat Use Case
|
||||
------
|
||||
Dave Syer
|
||||
------
|
||||
January 2007
|
||||
|
||||
Use Case: Simple Batch Repeat
|
||||
|
||||
* Goal
|
||||
|
||||
Repeat a simple operation such as processing a data item, or a
|
||||
message, up to a fixed number of times, normally with a transaction
|
||||
scoped to the whole batch. Transaction resources are shared between
|
||||
the operations in the batch, leading to performance benefits.
|
||||
|
||||
* Scope
|
||||
|
||||
The operation to be repeated:
|
||||
|
||||
* Can expect to use and manage its own I/O or datastore resources,
|
||||
but not necessarily transactions;
|
||||
|
||||
* May need to introspect the batch status (as a variation);
|
||||
|
||||
* Executes synchronously or asynchronously (as a variation).
|
||||
|
||||
* Is stateless - this is not a framework restriction in principle,
|
||||
but simplifies the implementation for now. See in the
|
||||
{{{store}Implementation}} section below for some notes on
|
||||
stateful synchronisation;
|
||||
|
||||
* Should be implementable as a POJO if desired.
|
||||
|
||||
* Preconditions
|
||||
|
||||
Client code can locate and acquire all the resources it needs for
|
||||
the batched operation, and can force transactions to rollback for
|
||||
testing purposes.
|
||||
|
||||
* Success
|
||||
|
||||
* Verify that a successful batch executed a fixed number of times.
|
||||
|
||||
* Verify that a batch completes early but successfully if an
|
||||
underlying transaction times out.
|
||||
|
||||
* Terminate a batch by failing one of the operations, and verify
|
||||
that the preceding operations rolled back (subject to batch meta
|
||||
data).
|
||||
|
||||
* Execute a batch asynchronously and verify that the correct number
|
||||
of operations is performed.
|
||||
|
||||
* Description
|
||||
|
||||
We are often interested in a specific scenario of this use case
|
||||
where the batched operation is:
|
||||
|
||||
* Read a message or data item from an endpoint like a JMS
|
||||
Destination.
|
||||
|
||||
* Do some business processing involving database reads and writes.
|
||||
|
||||
The vanilla successful batch use case proceeds as follows:
|
||||
|
||||
[[1]] Framework starts a batch, acquiring resources as needed and
|
||||
creating a context for the execution.
|
||||
|
||||
[[1]] Client provides a batch operation in the form of a source of
|
||||
data items and a processor acting on the data item.
|
||||
|
||||
[[1]] Framework executes batch operation.
|
||||
|
||||
[[1]] Repeat the last step until the batch size is reached.
|
||||
|
||||
[[1]] Framework commits the batch. All database changes are
|
||||
committed and received messages removed from the endpoints.
|
||||
|
||||
* Variations
|
||||
|
||||
** Rollback
|
||||
|
||||
If one of the operations rolls back it will throw an exception.
|
||||
Normal transaction semantics determine what happens next. Usually
|
||||
(in the scenario described above) there is an outer transaction for
|
||||
the whole batch, which rolls back as well: all the messages remain
|
||||
unsent, and all the data remain uncommitted. A retry will receive
|
||||
exactly the same initial conditions.
|
||||
|
||||
** Timeout
|
||||
|
||||
The batch size is not fixed. The use case proceeds as above, but in
|
||||
the middle of a batch operation execution:
|
||||
|
||||
|
||||
[[1]] Framework determines that the batch has timed out operation
|
||||
(e.g. while it was waiting for an incoming message).
|
||||
|
||||
[[1]] Framework commits the batch with all operations so far
|
||||
complete - possibly a smaller than normal size.
|
||||
|
||||
** Asynchronous Processing
|
||||
|
||||
Instead of the Framework waiting for each operation to complete it
|
||||
could spin them off independently into separate threads or a work
|
||||
queue. The batch still has to have a definite endpoint, so the
|
||||
Framework waits for all the operations to finish or fail
|
||||
before cmpleting the batch.
|
||||
|
||||
** Introspection of Batch Context
|
||||
|
||||
Client may wish to inspect the state of the ongoing batch operation,
|
||||
and potentially force an early completion.
|
||||
|
||||
* {Implementation}
|
||||
|
||||
* The completion of the batch loop is handled by a policy delegate
|
||||
that we can use to strategise the concept of a loop that might
|
||||
complete early. This can cover both the timeout variation and the
|
||||
vanilla use case flow.
|
||||
|
||||
* What form should the batch template (<<<RepeatOperations>>>)
|
||||
interface take? We might start with something like this:
|
||||
|
||||
+---
|
||||
batchTemplate.iterate(new RepeatCallback() {
|
||||
|
||||
public boolean doInIteration() {
|
||||
// do stuff
|
||||
}
|
||||
|
||||
});
|
||||
+---
|
||||
|
||||
* A nice tool for a batch operation in a callback is an iterator
|
||||
through a data set or message endpoint (<<<ItemProvider>>>), coupled
|
||||
with a handler for processing the item. This adds a potential
|
||||
implementation of <<<RepeatCallback>>> that knows about the
|
||||
<<<ItemProvider>>> and adds a processor object. E.g. as an
|
||||
anonymous inner class:
|
||||
|
||||
+---
|
||||
final ItemProvider provider = new JmsItemProvider();
|
||||
final ItemProcessor processor = new ItemProcessor() {
|
||||
public void process(Object data) {
|
||||
// do something with the data (a record)
|
||||
}
|
||||
};
|
||||
|
||||
batchTemplate.execute(new RepeatCallback() {
|
||||
|
||||
public boolean doInIteration() {
|
||||
Object data = provider.next();
|
||||
if (data!=null) {
|
||||
processor.process(data);
|
||||
}
|
||||
return data!=null;
|
||||
}
|
||||
|
||||
});
|
||||
+---
|
||||
|
||||
* Is a batch template with callback the best implementation? Could
|
||||
we perhaps use or re-use <<<TaskExecutor>>> somehow? Which is
|
||||
better for the client:
|
||||
|
||||
+---
|
||||
batchTemplate.iterate(new RepeatCallback() {
|
||||
|
||||
public boolean doInIteration() {
|
||||
// do stuff
|
||||
}
|
||||
|
||||
});
|
||||
+---
|
||||
|
||||
where the batch template might itself use a <<<TaskExecutor>>>
|
||||
internally, or
|
||||
|
||||
+---
|
||||
batchTemplate.iterate(new Runnable() {
|
||||
|
||||
public void run() {
|
||||
// do stuff with data
|
||||
};
|
||||
|
||||
});
|
||||
+---
|
||||
|
||||
where the batch template is a <<<TaskExecutor>>>. Probably the
|
||||
former because it is more encapsulated: it gives the framework more
|
||||
freedom to implement the template in any way it needs to, e.g. to
|
||||
accommodate more complicated use cases.
|
||||
|
||||
* To {store} up SQL operations until the end of a batch, and take
|
||||
advantage of JDBC driver efficiencies, the client needs to store
|
||||
some state during the batch, and also register a transaction
|
||||
synchronisation. For this kind of scenario we introduce an
|
||||
interceptor framework in the template execution. The template calls
|
||||
back to interceptors, which themselves can strategise clean up and
|
||||
close-type behaviour:
|
||||
|
||||
+---
|
||||
public class RepeatTemplate implements RepeatOperations {
|
||||
|
||||
public void iterate(RepeatCallback callback) {
|
||||
|
||||
// set up the batch
|
||||
interceptors.open();
|
||||
|
||||
while (running) {
|
||||
|
||||
// allow interceptor to pre-process and veto continuation
|
||||
interceptor.before();
|
||||
|
||||
// continue only if batch is ongoing
|
||||
if (running = callback.doInIteration()!=null) {
|
||||
interceptor.after();
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
// clean up or commit the whole batch
|
||||
interceptor.close();
|
||||
|
||||
}
|
||||
}
|
||||
+---
|
||||
|
||||
The <<<RepeatInterceptor>>> can be stateful, and can store up inserts
|
||||
until the end of the batch. If the <<<RepeatTemplate.iterate>>> is
|
||||
transactional then they will only happen if the transaction is
|
||||
successful.
|
||||
|
||||
This way the client can even decide to use a batch interceptor
|
||||
that runs in its own transaction at the end of the batch.
|
||||
|
||||
* There is no need for an overall batch timeout because the inner
|
||||
operations are synchronous and have their own timeout metadata
|
||||
though transaction definitions. The whole batch (outer transaction)
|
||||
may still have a timeout attribute, and then there is a corner case
|
||||
where the batch operations are all successful, but because they all
|
||||
took a long time the whole batch rolls back because of the timeout.
|
||||
|
||||
* The context of the ongoing batch is closely linked with the
|
||||
completion policy. The completion policy is pluggable into the
|
||||
batch template, and acts as a factory for context objects which can
|
||||
then be inspected by Client in the callback. For example:
|
||||
|
||||
+---
|
||||
public class RepeatTemplate implements RepeatOperations {
|
||||
|
||||
public void iterate(RepeatCallback callback) {
|
||||
|
||||
// set up the batch session
|
||||
RepeatContext context = completionPolicy.start();
|
||||
|
||||
while (!completionPolicy.isComplete(context)) {
|
||||
|
||||
// callback gets the context as an argument
|
||||
callback.doInIteration(context);
|
||||
|
||||
completionPolicy.update(context);
|
||||
}
|
||||
|
||||
}
|
||||
}
|
||||
+---
|
||||
|
||||
* The example above provides Client the opportunity to inspect the
|
||||
context through the callback interface. If Client is a POJO,
|
||||
Framework has to create a callback and wrap it, in which case there
|
||||
needs to be a global accessor for the current context or session.
|
||||
The template is then responsible for registering the current context
|
||||
with a <<<RepeatSynchronizationManager>>>. E.g.client code can look
|
||||
at the session and mark it as complete if desired
|
||||
(c.f. <<<TransactionStatus>>>):
|
||||
|
||||
+---
|
||||
public Object doMyBatch() {
|
||||
|
||||
// do some processing
|
||||
|
||||
// something bad happened...
|
||||
RepeatContext context = RepeatSynchronizationManager.getContext();
|
||||
context.setCompleteOnly();
|
||||
|
||||
}
|
||||
+---
|
||||