diff options
author | Philip Colmer <philip.colmer@linaro.org> | 2014-01-22 15:19:09 +0000 |
---|---|---|
committer | Philip Colmer <philip.colmer@linaro.org> | 2014-01-22 15:19:09 +0000 |
commit | b8f443b0ca6368406f2040d81bb29538a623530e (patch) | |
tree | d7769db376fdd974512b64a3bf9486290f732ad3 /healthcheck.py | |
parent | 4a86224dd881b564aa1556ed3de061c0df95ce53 (diff) |
More fixes to the healthcheck script
Diffstat (limited to 'healthcheck.py')
-rw-r--r-- | healthcheck.py | 43 |
1 files changed, 42 insertions, 1 deletions
diff --git a/healthcheck.py b/healthcheck.py index e78bc88..f8b1ef5 100644 --- a/healthcheck.py +++ b/healthcheck.py @@ -16,6 +16,7 @@ class HealthCheck(object): self.last_response = prev_response self.last_address = prev_addr self.state_process = None + self.startingup_countdown = 0 def silentremove(self, filename): try: @@ -62,6 +63,22 @@ class HealthCheck(object): new_state = States.Maintenance elif (self.last_state == States.Failed): new_state = States.Failed + elif (self.last_state == States.Frozen): + # We were frozen but now we aren't - figure out what our state + # should be from the response code we were issuing + if (self.last_response == 200): + self.last_state = new_state = States.Active + elif (self.last_response == 202): + self.last_state = new_state = States.Passive + elif (self.last_response == 203): + self.last_state = new_state = States.StartingUp + elif (self.last_response == 500): + self.last_state = new_state = States.Failed + elif (self.last_response == 503): + self.last_state = new_state = States.Maintenance + else: + self.logmsg("Coming out of frozen, old response code was %s" % str(self.last_response)) + self.last_state = new_state = States.Failed else: # The following logic ONLY works if: # a) there are two nodes @@ -91,6 +108,11 @@ class HealthCheck(object): elif (self.last_state == States.Passive): # We're on the passive node, we were passive and we're still passive new_state = States.Passive + elif (self.last_state == States.StartingUp): + # We were starting up the services but the IP address has shifted so we need to + # stop the services - switch back to Passive + self.logmsg("No longer the active node, switching from StartingUp back to Passive") + new_state = States.Passive else: self.logmsg("IP address has changed with uncaught state of %s" % str(self.last_state)) else: @@ -177,6 +199,25 @@ class HealthCheck(object): except Exception,e: self.logmsg("Got exception trying to save service IP: %s" % str(e)) + # Set the starting up count - we won't run the change state script until + # the count reaches 0. Since the script hasn't run, we'll stay in StartingUp + # because we can't switch to Active until monit (or whatever) detects the service + # as actually running. + if (new_state == States.StartingUp and self.last_state != States.StartingUp): + self.startingup_countdown = 10 + # Override last state in order to prevent the state change script from + # running + self.last_state = States.StartingUp + self.logmsg("Holding start up for %s cycles" % str(self.startingup_countdown)) + elif (new_state == States.StartingUp and self.startingup_countdown > 0): + # Decrement the countdown - if we reach zero, switch last state to Passive + # to trigger the state change script + self.startingup_countdown -= 1 + if (self.startingup_countdown == 0): + self.last_state = States.Passive + else: + self.logmsg("Holding start up for %s cycles" % str(self.startingup_countdown)) + # See if a script exists for one of the supported state changes if (new_state != self.last_state): valid_state = False @@ -184,7 +225,7 @@ class HealthCheck(object): if (new_state == States.StartingUp or new_state == States.Maintenance or new_state == States.Frozen): valid_state = True elif (self.last_state == States.StartingUp): - if (new_state == States.Active or new_state == States.Failed or new_state == States.Maintenance or new_state == States.Frozen): + if (new_state == States.Passive or new_state == States.Active or new_state == States.Failed or new_state == States.Maintenance or new_state == States.Frozen): valid_state = True elif (self.last_state == States.Active): if (new_state == States.Failed or new_state == States.Maintenance or new_state == States.Frozen or new_state == States.Passive): |