From 4a2d8bfc6ba5807d525a9722df8257098844426a Mon Sep 17 00:00:00 2001 From: "Rodrigo Q. Saramago" Date: Thu, 3 Dec 2015 01:14:46 -0200 Subject: [PATCH] Better failure detector timeout in deploy config --- benchmark/deploy/cfabcast-deploy.conf | 58 +++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) diff --git a/benchmark/deploy/cfabcast-deploy.conf b/benchmark/deploy/cfabcast-deploy.conf index 1f020b1..4f5a6e1 100644 --- a/benchmark/deploy/cfabcast-deploy.conf +++ b/benchmark/deploy/cfabcast-deploy.conf @@ -23,6 +23,64 @@ akka { seed-nodes = [] auto-down-unreachable-after = off + + # Settings for the Phi accrual failure detector (http://ddg.jaist.ac.jp/pub/HDY+04.pdf + # [Hayashibara et al]) used by the cluster subsystem to detect unreachable + # members. + # The default PhiAccrualFailureDetector will trigger if there are no heartbeats within + # the duration heartbeat-interval + acceptable-heartbeat-pause + threshold_adjustment, + # i.e. around 5.5 seconds with default settings. + failure-detector { + + # FQCN of the failure detector implementation. + # It must implement akka.remote.FailureDetector and have + # a public constructor with a com.typesafe.config.Config and + # akka.actor.EventStream parameter. + implementation-class = "akka.remote.PhiAccrualFailureDetector" + + # How often keep-alive heartbeat messages should be sent to each connection. + # default: 1 s + heartbeat-interval = 5 s + + # Defines the failure detector threshold. + # A low threshold is prone to generate many wrong suspicions but ensures + # a quick detection in the event of a real crash. Conversely, a high + # threshold generates fewer mistakes but needs more time to detect + # actual crashes. + # default : 8.0 + threshold = 12.0 + + # Number of the samples of inter-heartbeat arrival times to adaptively + # calculate the failure timeout for connections. + # default: 1000 + max-sample-size = 1000 + + # Minimum standard deviation to use for the normal distribution in + # AccrualFailureDetector. Too low standard deviation might result in + # too much sensitivity for sudden, but normal, deviations in heartbeat + # inter arrival times. + # default: 100 ms + min-std-deviation = 100 ms + + # Number of potentially lost/delayed heartbeats that will be + # accepted before considering it to be an anomaly. + # This margin is important to be able to survive sudden, occasional, + # pauses in heartbeat arrivals, due to for example garbage collect or + # network drop. + # default: 3 s + acceptable-heartbeat-pause = 60 s + + # Number of member nodes that each member will send heartbeat messages to, + # i.e. each node will be monitored by this number of other nodes. + # default: 5 + monitored-by-nr-of-members = 5 + + # After the heartbeat request has been sent the first failure detection + # will start after this period, even though no heartbeat message has + # been received. + # default: 1 s + expected-response-after = 20 s + } } # persistence {