import string, cgi, time, socket, os, errno, logging, glob, subprocess
import cherrypy
from cherrypy import log, _cplogging
from cherrypy.process.plugins import Daemonizer, PIDFile
from logging import handlers

class States:
    Passive, Active, Failed, StartingUp, Maintenance, Frozen = range(6)

class HealthCheck(object):
    def __init__(self, script_dir, service_fqdn, my_ip, prev_state, prev_response, prev_addr):
        self.script_directory = script_dir
        self.fqdn = service_fqdn
        self.system_ip = my_ip
        self.last_state = prev_state
        self.last_response = prev_response
        self.last_address = prev_addr
        self.state_process = None
        self.startingup_countdown = 0

    def silentremove(self, filename):
        try:
            os.remove(self.script_directory + filename)
        except OSError as e:
            if (e.errno != errno.ENOENT):
                raise

    def logmsg(self, string):
        log.error(string, context='HTTP', severity=20, traceback=True)

    @cherrypy.expose
    def index(self):
        new_state = -1

        # If we had a state change process, poll it so that the process quits properly
        # when it has finished, rather than leaving a defunct thing lying around.
        if not (self.state_process is None):
            if (self.state_process.poll() is None):
                self.logmsg("State change script has finished")
                self.state_process = None

        # We can't abort the daemon starting if we fail to get the right
        # info so we only proceed if the various sanity checks work
        if (self.fqdn != "" and self.system_ip != ""):
            # Get the IP address from Route 53
            try:
                service_ip = socket.gethostbyname(self.fqdn)
            except Exception,e:
                self.logmsg("Got exception trying to get IP address for '%s': %s" % (fqdn, str(e)))
                service_ip = "unknown"

            # Only log IP address info when a change happens
            if (service_ip != self.last_address):
                self.logmsg("Service IP = %s, this IP = %s, last service IP = %s" % (service_ip, self.system_ip, self.last_address))

            if (os.path.isfile(self.script_directory + "/frozen")):
                if (self.last_state != States.Frozen):
                    self.logmsg("Frozen file exists")
                new_state = States.Frozen
            elif (os.path.isfile(self.script_directory + "/maintenance")):
                if (self.last_state != States.Maintenance):
                    self.logmsg("Maintenance file exists")
                new_state = States.Maintenance
            elif (self.last_state == States.Failed):
                new_state = States.Failed
            elif (self.last_state == States.Frozen):
                # We were frozen but now we aren't - figure out what our state
                # should be from the response code we were issuing
                if (self.last_response == 200):
                    self.last_state = new_state = States.Active
                elif (self.last_response == 202):
                    self.last_state = new_state = States.Passive
                elif (self.last_response == 203):
                    self.last_state = new_state = States.StartingUp
                elif (self.last_response == 500):
                    self.last_state = new_state = States.Failed
                elif (self.last_response == 503):
                    self.last_state = new_state = States.Maintenance
                else:
                    self.logmsg("Coming out of frozen, old response code was %s" % str(self.last_response))
                    self.last_state = new_state = States.Failed
            else:
                # The following logic ONLY works if:
                # a) there are two nodes
                # b) the IP addresses returned by Route 53 map onto those nodes

                # See if the external health checks think we're healthy?
                healthy = os.path.isfile(self.script_directory + "/healthy")

                if (service_ip != self.last_address):
                    # Active node has changed
                    if (service_ip == self.system_ip):
                        if (self.last_state == States.Passive):
                            # We've become the new active node - switch to starting up
                            self.logmsg("We're the active node and we were passive, now starting up")
                            new_state = States.StartingUp
                        else:
                            self.logmsg("Now active node with uncaught state of %s" % str(self.last_state))
                    elif (self.last_state == States.Active):
                        # We were the active node - see if we are still healthy,
                        # in which case we switch to passive, or if we have failed.
                        if (healthy):
                            self.logmsg("Active node has changed and we are healthy; switching to Passive")
                            new_state = States.Passive
                        else:
                            self.logmsg("Active node has changed and we aren't healthy; switching to Failed")
                            new_state = States.Failed
                    elif (self.last_state == States.Passive):
                        # We're on the passive node, we were passive and we're still passive
                        new_state = States.Passive
                    elif (self.last_state == States.StartingUp):
                        # We were starting up the services but the IP address has shifted so we need to
                        # stop the services - switch back to Passive
                        self.logmsg("No longer the active node, switching from StartingUp back to Passive")
                        new_state = States.Passive
                    else:
                        self.logmsg("IP address has changed with uncaught state of %s" % str(self.last_state))
                else:
                    if (service_ip == self.system_ip):
                        # We're the active node.
                        if (self.last_state == States.Maintenance):
                            self.logmsg("Active node, last state was Maintenance, switching to Passive")
                            new_state = States.Passive
                        elif (self.last_state == States.Passive):
                            self.logmsg("Active node, last state was Passive, switching to StartingUp")
                            new_state = States.StartingUp
                        elif (self.last_state == States.StartingUp):
                            if (healthy):
                                # Finished starting up
                                self.logmsg("Healthy active node, switching from StartingUp to Active")
                                new_state = States.Active
                            else:
                                # Still starting
                                new_state = States.StartingUp
                        elif (self.last_state == States.Active):
                            if (healthy):
                                new_state = States.Active
                            else:
                                self.logmsg("Active node but we aren't healthy; switching to Failed")
                                new_state = States.Failed
                        else:
                            self.logmsg("Active node with uncaught state of %s" % str(self.last_state))
                    else:
                        # We're the passive node
                        if (self.last_state != States.Passive):
                            self.logmsg("Passive node = passive state")
                        new_state = States.Passive
        else:
            # Sanity checks failed = failed :-)
            new_state = States.Failed

        if (new_state == States.Active):
            new_response = 200
        elif (new_state == States.Passive):
            new_response = 202
        elif (new_state == States.StartingUp):
            new_response = 203
        elif (new_state == States.Failed):
            new_response = 500
        elif (new_state == States.Maintenance):
            new_response = 503
        elif (new_state == States.Frozen):
            new_response = self.last_response
        else:
            self.logmsg("Unmatched state of %s" % str(new_state))
            new_response = 500

        # Clean up some of the trigger files
        if (new_state != States.Active):
            self.silentremove("/healthy")
        if (new_state != States.Maintenance):
            self.silentremove("/maintenance")
        if (new_state != States.Frozen):
            self.silentremove("/frozen")

        cherrypy.response.status = new_response
        cherrypy.response.headers['Content-type'] = 'text/html'
        cherrypy.response.body = ["<p>This is the Linaro health check service. State is %s and response code is %s</P>" % (str(new_state), str(new_response))]

        # Save away the various bits of information
        try:
            fp = open(self.script_directory + "/last_state", "w")
            fp.write(str(new_state))
            fp.close
        except Exception,e:
            self.logmsg("Got exception trying to save reported state: %s" % str(e))

        try:
            fp = open(self.script_directory + "/last_response", "w")
            fp.write(str(new_response))
            fp.close
        except Exception,e:
            self.logmsg("Got exception trying to save reported response: %s" % str(e))

        try:
            fp = open(self.script_directory + "/last_address", "w")
            fp.write(str(service_ip))
            fp.close
        except Exception,e:
            self.logmsg("Got exception trying to save service IP: %s" % str(e))

        # Set the starting up count - we won't run the change state script until
        # the count reaches 0. Since the script hasn't run, we'll stay in StartingUp
        # because we can't switch to Active until monit (or whatever) detects the service
        # as actually running.
        if (new_state == States.StartingUp and self.last_state != States.StartingUp):
            self.startingup_countdown = 10
            # Override last state in order to prevent the state change script from
            # running
            self.last_state = States.StartingUp
            self.logmsg("Holding start up for %s cycles" % str(self.startingup_countdown))
        elif (new_state == States.StartingUp and self.startingup_countdown > 0):
            # Decrement the countdown - if we reach zero, switch last state to Passive
            # to trigger the state change script
            self.startingup_countdown -= 1
            if (self.startingup_countdown == 0):
                self.last_state = States.Passive
            else:
                self.logmsg("Holding start up for %s cycles" % str(self.startingup_countdown))

        # See if a script exists for one of the supported state changes
        if (new_state != self.last_state):
            valid_state = False
            if (self.last_state == States.Passive):
                if (new_state == States.StartingUp or new_state == States.Maintenance or new_state == States.Frozen):
                    valid_state = True
            elif (self.last_state == States.StartingUp):
                if (new_state == States.Passive or new_state == States.Active or new_state == States.Failed or new_state == States.Maintenance or new_state == States.Frozen):
                    valid_state = True
            elif (self.last_state == States.Active):
                if (new_state == States.Failed or new_state == States.Maintenance or new_state == States.Frozen or new_state == States.Passive):
                    valid_state = True
            elif (self.last_state == States.Failed):
                if (new_state == States.Maintenance or new_state == States.Frozen):
                    valid_state = True
            elif (self.last_state == States.Maintenance or self.last_state == States.Frozen):
                if (new_state == States.Passive):
                    valid_state = True

            if valid_state:
                filename = "from_%s_to_%s.*" % (str(self.last_state), str(new_state))
                files = glob.glob("%s/%s" % (self.script_directory, filename))
                if len(files) == 1:
                    # os.system(files[0])
                    self.logmsg("Firing state change script %s" % files[0])
                    self.state_process = subprocess.Popen([files[0]])
                elif (len(files) > 1):
                    self.logmsg("More than one matching script for stage change %s to %s" % (str(self.last_state), str(new_state)))
            else:
                self.logmsg("Unexpected stage change from %s to %s" % (str(self.last_state), str(new_state)))

        self.last_state = new_state
        self.last_response = new_response
        self.last_address = service_ip

def safereadline(logger, script_directory, filename):
    line = ""
    try:
        with open(script_directory + filename) as fp:
            line = fp.readline()
    except IOError as e:
        if (e.errno != errno.ENOENT):
            logger.error("Got exception trying to read %s: %s" % (filename, str(e)))
    return line

def main():
    # Set up a syslog logger so that we can report stuff before
    # the daemon starts up
    syslog_logger = logging.getLogger("linaro_healthcheck")
    syslog_logger.setLevel(logging.DEBUG)
    handler = logging.handlers.SysLogHandler(
        facility=logging.handlers.SysLogHandler.LOG_DAEMON, address="/dev/log")
    syslog_logger.addHandler(handler)

    script_dir = os.path.dirname(os.path.abspath(__file__))
    syslog_logger.debug("Linaro Healthcheck running from %s" % script_dir)

    service_fqdn = ""
    try:
        with open(script_dir + "/fqdn") as fp:
            service_fqdn = str(fp.readline()).rstrip()
    except Exception,e:
        syslog_logger.error("Got exception trying to get fqdn: %s" % str(e))

    # Try to get this system's IP address
    my_ip = ""
    try:
        s = socket.socket(socket.AF_INET, socket.SOCK_DGRAM)
        s.connect(("8.8.8.8", 53))
        my_ip = s.getsockname()[0]
        s.close
    except Exception,e:
        syslog_logger.error("Got exception trying to get system's IP address: %s" % str(e))

    # See if we've got a recorded last state
    prev_state = States.Passive
    line = safereadline(syslog_logger, script_dir, "/last_state")
    if (line != ""):
        prev_state = int(line)
    if ((prev_state < States.Passive) or (prev_state > States.Frozen)):
        prev_state = States.Passive

    # and see if we've got a recorded last response code
    line = safereadline(syslog_logger, script_dir, "/last_response")
    if (line != ""):
        prev_response = int(line)
    else:
        prev_response = 202

    # and a last IP address
    prev_address = safereadline(syslog_logger, script_dir, "/last_address")
    if (prev_address == ""):
        prev_address = "error"

    # If we were anything other then failed, maintenance or frozen last time
    # this script ran, switch to Passive so that things get started up
    # properly, i.e. Passive -> StartingUp -> Active
    if (prev_state != States.Failed and prev_state != States.Maintenance and prev_state != States.Frozen):
        prev_state = States.Passive
        prev_response = 202

    cherrypy.config.update({'server.socket_host': '0.0.0.0',
                            'server.socket_port': 1234,
                            'server.thread_pool': 1,
                            'server.thread_pool_max': 1,
                            'tools.staticdir.on': True,
                            'tools.staticdir.dir': script_dir,
                            'log.screen': True,
                            'tools.sessions.on': True,
                           })

    config = {'/':
                 {
                 }
             }

    application = cherrypy.tree.mount(HealthCheck(script_dir, service_fqdn, my_ip, prev_state, prev_response, prev_address), "/", config)

    #log = application.log
    logscope = cherrypy.log

    # Make a new RotatingFileHandler for the error log.
    fname = getattr(logscope, "rot_error_file", "%s/error.log" % script_dir)
    h = handlers.TimedRotatingFileHandler(fname, when='midnight')
    h.setLevel(logging.DEBUG)
    h.setFormatter(_cplogging.logfmt)
    logscope.error_file = ""
    logscope.error_log.addHandler(h)

    # Make a new RotatingFileHandler for the access log.
    fname = getattr(logscope, "rot_access_file", "%s/access.log" % script_dir)
    h = handlers.TimedRotatingFileHandler(fname, when='midnight')
    h.setLevel(logging.DEBUG)
    h.setFormatter(_cplogging.logfmt)
    logscope.access_file = ""
    logscope.access_log.addHandler(h)

    # Add a CTRL+C handler
    if hasattr(cherrypy.engine, 'signal_handler'):
        cherrypy.engine.signal_handler.subscribe()
    if hasattr(cherrypy.engine, 'console_control_handler'):
        cherrypy.engine.console_control_handler.subscrive()

    # Set up the daemon
    d = Daemonizer(cherrypy.engine)
    d.subscribe()
    PIDFile(cherrypy.engine, '/var/run/linaro-healthcheck.pid').subscribe()

    cherrypy.engine.start()
    cherrypy.engine.block()

if __name__ == '__main__':
    main()