summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorPhilip Colmer <philip.colmer@linaro.org>2014-01-22 15:19:09 +0000
committerPhilip Colmer <philip.colmer@linaro.org>2014-01-22 15:19:09 +0000
commitb8f443b0ca6368406f2040d81bb29538a623530e (patch)
treed7769db376fdd974512b64a3bf9486290f732ad3
parent4a86224dd881b564aa1556ed3de061c0df95ce53 (diff)
More fixes to the healthcheck script
-rw-r--r--healthcheck.py43
1 files changed, 42 insertions, 1 deletions
diff --git a/healthcheck.py b/healthcheck.py
index e78bc88..f8b1ef5 100644
--- a/healthcheck.py
+++ b/healthcheck.py
@@ -16,6 +16,7 @@ class HealthCheck(object):
self.last_response = prev_response
self.last_address = prev_addr
self.state_process = None
+ self.startingup_countdown = 0
def silentremove(self, filename):
try:
@@ -62,6 +63,22 @@ class HealthCheck(object):
new_state = States.Maintenance
elif (self.last_state == States.Failed):
new_state = States.Failed
+ elif (self.last_state == States.Frozen):
+ # We were frozen but now we aren't - figure out what our state
+ # should be from the response code we were issuing
+ if (self.last_response == 200):
+ self.last_state = new_state = States.Active
+ elif (self.last_response == 202):
+ self.last_state = new_state = States.Passive
+ elif (self.last_response == 203):
+ self.last_state = new_state = States.StartingUp
+ elif (self.last_response == 500):
+ self.last_state = new_state = States.Failed
+ elif (self.last_response == 503):
+ self.last_state = new_state = States.Maintenance
+ else:
+ self.logmsg("Coming out of frozen, old response code was %s" % str(self.last_response))
+ self.last_state = new_state = States.Failed
else:
# The following logic ONLY works if:
# a) there are two nodes
@@ -91,6 +108,11 @@ class HealthCheck(object):
elif (self.last_state == States.Passive):
# We're on the passive node, we were passive and we're still passive
new_state = States.Passive
+ elif (self.last_state == States.StartingUp):
+ # We were starting up the services but the IP address has shifted so we need to
+ # stop the services - switch back to Passive
+ self.logmsg("No longer the active node, switching from StartingUp back to Passive")
+ new_state = States.Passive
else:
self.logmsg("IP address has changed with uncaught state of %s" % str(self.last_state))
else:
@@ -177,6 +199,25 @@ class HealthCheck(object):
except Exception,e:
self.logmsg("Got exception trying to save service IP: %s" % str(e))
+ # Set the starting up count - we won't run the change state script until
+ # the count reaches 0. Since the script hasn't run, we'll stay in StartingUp
+ # because we can't switch to Active until monit (or whatever) detects the service
+ # as actually running.
+ if (new_state == States.StartingUp and self.last_state != States.StartingUp):
+ self.startingup_countdown = 10
+ # Override last state in order to prevent the state change script from
+ # running
+ self.last_state = States.StartingUp
+ self.logmsg("Holding start up for %s cycles" % str(self.startingup_countdown))
+ elif (new_state == States.StartingUp and self.startingup_countdown > 0):
+ # Decrement the countdown - if we reach zero, switch last state to Passive
+ # to trigger the state change script
+ self.startingup_countdown -= 1
+ if (self.startingup_countdown == 0):
+ self.last_state = States.Passive
+ else:
+ self.logmsg("Holding start up for %s cycles" % str(self.startingup_countdown))
+
# See if a script exists for one of the supported state changes
if (new_state != self.last_state):
valid_state = False
@@ -184,7 +225,7 @@ class HealthCheck(object):
if (new_state == States.StartingUp or new_state == States.Maintenance or new_state == States.Frozen):
valid_state = True
elif (self.last_state == States.StartingUp):
- if (new_state == States.Active or new_state == States.Failed or new_state == States.Maintenance or new_state == States.Frozen):
+ if (new_state == States.Passive or new_state == States.Active or new_state == States.Failed or new_state == States.Maintenance or new_state == States.Frozen):
valid_state = True
elif (self.last_state == States.Active):
if (new_state == States.Failed or new_state == States.Maintenance or new_state == States.Frozen or new_state == States.Passive):