Add crash/start jepsen test

This commit is contained in:
Janne Valkealahti
2015-08-22 16:27:22 +01:00
parent efce1fa3af
commit c8afe725da
5 changed files with 137 additions and 2 deletions

View File

@@ -563,3 +563,19 @@ What's happening in above chart:
* Jepsen will heal network and after some time nodes `n3/n4` will join
back into ensemble and synchronize its distributed status.
=== Crash and Join Tolerance
In this test we will demostrate that killing existing state machine
and then joining new instance back into an ensemble will keep the
distributed state healthy and newly joined state machines will synchronize
their states properly.
image::images/sm-tech-stop-start.png[width=500]
What's happening in above chart:
* All state machines are transitioned from initial state `S21` into
`S211` so that we can test proper state synchronize during join.
* `X` is marking when a specific node has been crashed and started.
* At a same time we request states from all machines and plot it.
* Finally we do a simple transition back to `S21` from `S211` to make
sure that all state machines are still functioning properly.

Binary file not shown.

After

Width:  |  Height:  |  Size: 31 KiB

View File

@@ -12,6 +12,7 @@
[knossos.history :as history]
[gnuplot.core :as g]))
(def nodetovalue {"n1" 1 "n2" 2 "n3" 3 "n4" 4 "n5" 5 })
(def statetovalue {"S11" 1 "S12" 2 "S211" 3 "S212" 4 })
(def variabletovalue {"v1" 1 "v2" 2 "v3" 3 "v4" 4 "v5" 5 "v6" 6 "v7" 7 "v8" 8})
@@ -65,6 +66,16 @@
(vector
(reduce-kv (fn [vec key value] (conj vec (vector (get value :time) (+ (get value :process) 1) (get value :v))) ) [] (vec data)))))
(defn extract-plot-data5
[history]
(let [data
(->> history
(filter #(:value %))
(filter #(= :nemesis (:process %)))
(filter #(= :stop (:f %))))]
(vector
(reduce-kv (fn [vec key value] (conj vec (vector (get value :time) (get nodetovalue (name (last (keys (get value :value))))) "X")) ) [] (vec data)))))
(defn plot1!
[test model history]
@@ -129,6 +140,38 @@
output-path)
{:valid? true})
(defn plot3!
[test model history]
(let [output-path (.getCanonicalPath (store/path! test "states.png"))]
(g/raw-plot! [[:set :key :outside]
[:set :style :textbox :opaque]
[:set :terminal :qt :size (keyword "900,450")]
[:set :yrange (keyword "[0.5:4.5]")]
[:set :y2range (keyword "[0.5:5.5]")]
[:set :xtics :format "%h\nns"]
[:set :xlabel "elapsed time"]
[:set :ylabel "states in nodes"]
[:set :y2label "crash/start in nodes"]
[:set :ytics 1]
[:set :ytics (keyword "('S21' 1, 'S22' 2, 'S211' 3, 'S212' 4)")]
[:set :ytics :nomirror]
[:set :y2tics 1]
[:set :y2tics (keyword "('n1' 1, 'n2' 2, 'n3' 3, 'n4' 4, 'n5' 5)")]
[:plot
(g/list ["-" :title "states n1" :with :steps :lw :3]
["-" :title "states n2" :with :steps :lw :3]
["-" :title "states n3" :with :steps :lw :3]
["-" :title "states n4" :with :steps :lw :3]
["-" :title "states n5" :with :steps :lw :3]
["-" :title "crash" :with :labels :center :boxed :font ",15" :axis :x1y2]
)]]
(into
(extract-plot-data history)
(extract-plot-data5 history)))
output-path)
{:valid? true})
(defn checker1
"Constructs a Jepsen checker."
[]
@@ -142,3 +185,10 @@
(reify Checker
(check [_ test model history]
(if (env :plot) (plot2! test model history) {:valid? true}))))
(defn checker3
"Constructs a Jepsen checker."
[]
(reify Checker
(check [_ test model history]
(if (env :plot) (plot3! test model history) {:valid? true}))))

View File

@@ -17,6 +17,7 @@
[tests :as tests]]
[spring-statemachine-jepsen.checker :refer [checker1]]
[spring-statemachine-jepsen.checker :refer [checker2]]
[spring-statemachine-jepsen.checker :refer [checker3]]
[jepsen.checker.timeline :as timeline]
[jepsen.control.net :as net]
[jepsen.os.debian :as debian]
@@ -300,8 +301,7 @@
(gen-send-event-variable "J" "v7")
(gen-read-variable "v7")
(gen-send-event-variable "J" "v8")
(gen-read-variable "v8")
))
(gen-read-variable "v8")))
(defn event-gen-4
"Generates event and checks states while splitting network"
@@ -326,6 +326,64 @@
(gen-status 30)
(gen-read-states 10 ["S0","S1","S11"])))
(defn event-gen-5
"Generates starts and stops and checks joins"
[]
(gen/phases
(gen-read-states 5 ["S0","S1","S11"])
(gen-send-event-all "C")
(gen-read-states 5 ["S0","S2","S21","S211"])
(gen/nemesis
(gen/seq [{:type :info :f :start}
(gen/sleep 5)
{:type :info :f :stop}]))
(gen-read-states 2 ["S0","S2","S21","S211"])
(gen/nemesis
(gen/seq [{:type :info :f :start}
(gen/sleep 5)
{:type :info :f :stop}]))
(gen-read-states 2 ["S0","S2","S21","S211"])
(gen/nemesis
(gen/seq [{:type :info :f :start}
(gen/sleep 5)
{:type :info :f :stop}]))
(gen-read-states 2 ["S0","S2","S21","S211"])
(gen/nemesis
(gen/seq [{:type :info :f :start}
(gen/sleep 5)
{:type :info :f :stop}]))
(gen-read-states 2 ["S0","S2","S21","S211"])
(gen/nemesis
(gen/seq [{:type :info :f :start}
(gen/sleep 5)
{:type :info :f :stop}]))
(gen-read-states 2 ["S0","S2","S21","S211"])
(gen/nemesis
(gen/seq [{:type :info :f :start}
(gen/sleep 5)
{:type :info :f :stop}]))
(gen-read-states 2 ["S0","S2","S21","S211"])
(gen/nemesis
(gen/seq [{:type :info :f :start}
(gen/sleep 5)
{:type :info :f :stop}]))
(gen-read-states 2 ["S0","S2","S21","S211"])
(gen/nemesis
(gen/seq [{:type :info :f :start}
(gen/sleep 5)
{:type :info :f :stop}]))
(gen-read-states 5 ["S0","S2","S21","S211"])
(gen-send-event-all "K")
(gen-read-states 5 ["S0","S1","S11"])))
(defn killer
"Kills statemachine on a random node on start, restarts it on stop."
[]
(nemesis/node-start-stopper
rand-nth
(fn start [test node] (c/su (c/exec :pkill :-9 :-f :spring-statemachine-samples-web)))
(fn stop [test node] (start! node))))
(defn statemachine-test
"Defaults for testing state machine."
[name opts]
@@ -374,3 +432,11 @@
{:nemesis (nemesis/partition-random-halves)
:generator (event-gen-4)
:checker (checker1)}))
(defn stop-start-test
"Stops and start nodes checking join is okk."
[]
(event-test "partition-half"
{:nemesis (killer)
:generator (event-gen-5)
:checker (checker3)}))

View File

@@ -25,3 +25,6 @@
(deftest partition-half
(run-statemachine-test! (partition-half-test)))
(deftest stop-start
(run-statemachine-test! (stop-start-test)))