From 4a2d8bfc6ba5807d525a9722df8257098844426a Mon Sep 17 00:00:00 2001
From: "Rodrigo Q. Saramago" <deepmarolaest@gmail.com>
Date: Thu, 3 Dec 2015 01:14:46 -0200
Subject: [PATCH] Better failure detector timeout in deploy config

---
 benchmark/deploy/cfabcast-deploy.conf | 58 +++++++++++++++++++++++++++
 1 file changed, 58 insertions(+)

diff --git a/benchmark/deploy/cfabcast-deploy.conf b/benchmark/deploy/cfabcast-deploy.conf
index 1f020b1..4f5a6e1 100644
--- a/benchmark/deploy/cfabcast-deploy.conf
+++ b/benchmark/deploy/cfabcast-deploy.conf
@@ -23,6 +23,64 @@ akka {
     seed-nodes = []
 
     auto-down-unreachable-after = off
+
+    # Settings for the Phi accrual failure detector (http://ddg.jaist.ac.jp/pub/HDY+04.pdf
+    # [Hayashibara et al]) used by the cluster subsystem to detect unreachable
+    # members.
+    # The default PhiAccrualFailureDetector will trigger if there are no heartbeats within
+    # the duration heartbeat-interval + acceptable-heartbeat-pause + threshold_adjustment,
+    # i.e. around 5.5 seconds with default settings.
+    failure-detector {
+ 
+      # FQCN of the failure detector implementation.
+      # It must implement akka.remote.FailureDetector and have
+      # a public constructor with a com.typesafe.config.Config and
+      # akka.actor.EventStream parameter.
+      implementation-class = "akka.remote.PhiAccrualFailureDetector"
+ 
+      # How often keep-alive heartbeat messages should be sent to each connection.
+      # default: 1 s
+      heartbeat-interval = 5 s
+ 
+      # Defines the failure detector threshold.
+      # A low threshold is prone to generate many wrong suspicions but ensures
+      # a quick detection in the event of a real crash. Conversely, a high
+      # threshold generates fewer mistakes but needs more time to detect
+      # actual crashes.
+      # default : 8.0
+      threshold = 12.0
+ 
+      # Number of the samples of inter-heartbeat arrival times to adaptively
+      # calculate the failure timeout for connections.
+      # default: 1000
+      max-sample-size = 1000
+ 
+      # Minimum standard deviation to use for the normal distribution in
+      # AccrualFailureDetector. Too low standard deviation might result in
+      # too much sensitivity for sudden, but normal, deviations in heartbeat
+      # inter arrival times.
+      # default: 100 ms
+      min-std-deviation = 100 ms
+ 
+      # Number of potentially lost/delayed heartbeats that will be
+      # accepted before considering it to be an anomaly.
+      # This margin is important to be able to survive sudden, occasional,
+      # pauses in heartbeat arrivals, due to for example garbage collect or
+      # network drop.
+      # default: 3 s 
+      acceptable-heartbeat-pause = 60 s
+ 
+      # Number of member nodes that each member will send heartbeat messages to,
+      # i.e. each node will be monitored by this number of other nodes.
+      # default: 5
+      monitored-by-nr-of-members = 5
+      
+      # After the heartbeat request has been sent the first failure detection
+      # will start after this period, even though no heartbeat message has
+      # been received.
+      # default: 1 s
+      expected-response-after = 20 s
+    }
   }
 
 #  persistence {