From 3465ecd59a583010b15f951537e83ea922882e7d Mon Sep 17 00:00:00 2001 From: Andrew McDermott Date: Tue, 15 Oct 2013 22:11:20 +0100 Subject: meta-linaro: ganglia: add monitoring utilities to system startup Added gmond and gmetad to system startup. Also ships with default configurations; for gmond this comes from running `gmond --default_config'. Change-Id: I5274b722245cbc2476b08c293a5b25d2cd25e36b Signed-off-by: Andrew McDermott --- .../ganglia/ganglia/gmetad-example.conf | 122 +++++++ .../recipes-extra/ganglia/ganglia/gmetad.init | 48 +++ .../ganglia/ganglia/gmond-example.conf | 383 +++++++++++++++++++++ .../recipes-extra/ganglia/ganglia/gmond.init | 46 +++ 4 files changed, 599 insertions(+) create mode 100644 meta-linaro/recipes-extra/ganglia/ganglia/gmetad-example.conf create mode 100755 meta-linaro/recipes-extra/ganglia/ganglia/gmetad.init create mode 100644 meta-linaro/recipes-extra/ganglia/ganglia/gmond-example.conf create mode 100755 meta-linaro/recipes-extra/ganglia/ganglia/gmond.init (limited to 'meta-linaro/recipes-extra/ganglia/ganglia') diff --git a/meta-linaro/recipes-extra/ganglia/ganglia/gmetad-example.conf b/meta-linaro/recipes-extra/ganglia/ganglia/gmetad-example.conf new file mode 100644 index 00000000..7f2fd7ff --- /dev/null +++ b/meta-linaro/recipes-extra/ganglia/ganglia/gmetad-example.conf @@ -0,0 +1,122 @@ +# This is an example of a Ganglia Meta Daemon configuration file +# http://ganglia.sourceforge.net/ +# +#------------------------------------------------------------------------------- +# Setting the debug_level to 1 will keep daemon in the forground and +# show only error messages. Setting this value higher than 1 will make +# gmetad output debugging information and stay in the foreground. +# default: 0 +# debug_level 10 +# +#------------------------------------------------------------------------------- +# What to monitor. The most important section of this file. +# +# The data_source tag specifies either a cluster or a grid to +# monitor. If we detect the source is a cluster, we will maintain a complete +# set of RRD databases for it, which can be used to create historical +# graphs of the metrics. If the source is a grid (it comes from another gmetad), +# we will only maintain summary RRDs for it. +# +# Format: +# data_source "my cluster" [polling interval] address1:port addreses2:port ... +# +# The keyword 'data_source' must immediately be followed by a unique +# string which identifies the source, then an optional polling interval in +# seconds. The source will be polled at this interval on average. +# If the polling interval is omitted, 15sec is asssumed. +# +# A list of machines which service the data source follows, in the +# format ip:port, or name:port. If a port is not specified then 8649 +# (the default gmond port) is assumed. +# default: There is no default value +# +# data_source "my cluster" 10 localhost my.machine.edu:8649 1.2.3.5:8655 +# data_source "my grid" 50 1.3.4.7:8655 grid.org:8651 grid-backup.org:8651 +# data_source "another source" 1.3.4.7:8655 1.3.4.8 + +data_source "my cluster" localhost + +# +# Round-Robin Archives +# You can specify custom Round-Robin archives here (defaults are listed below) +# +# RRAs "RRA:AVERAGE:0.5:1:244" "RRA:AVERAGE:0.5:24:244" "RRA:AVERAGE:0.5:168:244" "RRA:AVERAGE:0.5:672:244" \ +# "RRA:AVERAGE:0.5:5760:374" +# + +# +#------------------------------------------------------------------------------- +# Scalability mode. If on, we summarize over downstream grids, and respect +# authority tags. If off, we take on 2.5.0-era behavior: we do not wrap our output +# in tags, we ignore all tags we see, and always assume +# we are the "authority" on data source feeds. This approach does not scale to +# large groups of clusters, but is provided for backwards compatibility. +# default: on +# scalable off +# +#------------------------------------------------------------------------------- +# The name of this Grid. All the data sources above will be wrapped in a GRID +# tag with this name. +# default: unspecified +# gridname "MyGrid" +# +#------------------------------------------------------------------------------- +# The authority URL for this grid. Used by other gmetads to locate graphs +# for our data sources. Generally points to a ganglia/ +# website on this machine. +# default: "http://hostname/ganglia/", +# where hostname is the name of this machine, as defined by gethostname(). +# authority "http://mycluster.org/newprefix/" +# +#------------------------------------------------------------------------------- +# List of machines this gmetad will share XML with. Localhost +# is always trusted. +# default: There is no default value +# trusted_hosts 127.0.0.1 169.229.50.165 my.gmetad.org +# +#------------------------------------------------------------------------------- +# If you want any host which connects to the gmetad XML to receive +# data, then set this value to "on" +# default: off +# all_trusted on +# +#------------------------------------------------------------------------------- +# If you don't want gmetad to setuid then set this to off +# default: on +# setuid off +# +#------------------------------------------------------------------------------- +# User gmetad will setuid to (defaults to "nobody") +# default: "nobody" +# setuid_username "nobody" +# +#------------------------------------------------------------------------------- +# The port gmetad will answer requests for XML +# default: 8651 +# xml_port 8651 +# +#------------------------------------------------------------------------------- +# The port gmetad will answer queries for XML. This facility allows +# simple subtree and summation views of the XML tree. +# default: 8652 +# interactive_port 8652 +# +#------------------------------------------------------------------------------- +# The number of threads answering XML requests +# default: 4 +# server_threads 10 +# +#------------------------------------------------------------------------------- +# Where gmetad stores its round-robin databases +# default: "/var/lib/ganglia/rrds" +# rrd_rootdir "/some/other/place" +# +#------------------------------------------------------------------------------- +# In earlier versions of gmetad, hostnames were handled in a case +# sensitive manner +# If your hostname directories have been renamed to lower case, +# set this option to 0 to disable backward compatibility. +# From version 3.2, backwards compatibility will be disabled by default. +# default: 1 (for gmetad < 3.2) +# default: 0 (for gmetad >= 3.2) +case_sensitive_hostnames 1 diff --git a/meta-linaro/recipes-extra/ganglia/ganglia/gmetad.init b/meta-linaro/recipes-extra/ganglia/ganglia/gmetad.init new file mode 100755 index 00000000..822f59c6 --- /dev/null +++ b/meta-linaro/recipes-extra/ganglia/ganglia/gmetad.init @@ -0,0 +1,48 @@ +#! /bin/sh +### BEGIN INIT INFO +# Provides: gmetad +# Required-Start: $local_fs +# Should-Start: +# Required-Stop: $local_fs +# Should-Stop: +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: gmetad initscript +# Description: Starts the gmetad monitoring daemon +### END INIT INFO + +PATH=/sbin:/usr/sbin:/bin:/usr/bin + +DESC="Ganglia Monitor Meta-Daemon" +NAME="gmetad" +GMETAD=`which $NAME` + +. /etc/init.d/functions || exit 1 + +# Exit if the package is not installed +[ -x "$GMETAD" ] || exit 0 + +case "$1" in + start) + echo -n "Starting $DESC: $NAME... " + start-stop-daemon -S -x $GMETAD -- $GMETAD_ARGS + echo "done." + ;; + stop) + echo -n "Stopping $DESC: $NAME... " + start-stop-daemon -K -x $GMETAD + echo "done." + ;; + restart) + echo "Restarting $DESC: $NAME... " + $0 stop + $0 start + echo "done." + ;; + *) + echo "Usage: $0 {start|stop|restart}" + exit 1 + ;; +esac + +exit 0 diff --git a/meta-linaro/recipes-extra/ganglia/ganglia/gmond-example.conf b/meta-linaro/recipes-extra/ganglia/ganglia/gmond-example.conf new file mode 100644 index 00000000..a94b4b5d --- /dev/null +++ b/meta-linaro/recipes-extra/ganglia/ganglia/gmond-example.conf @@ -0,0 +1,383 @@ +/* This configuration is as close to 2.5.x default behavior as possible + The values closely match ./gmond/metric.h definitions in 2.5.x */ +globals { + daemonize = yes + setuid = yes + user = nobody + debug_level = 0 + max_udp_msg_len = 1472 + mute = no + deaf = yes + allow_extra_data = yes + host_dmax = 86400 /*secs. Expires (removes from web interface) hosts in 1 day */ + host_tmax = 20 /*secs */ + cleanup_threshold = 300 /*secs */ + gexec = no + # By default gmond will use reverse DNS resolution when displaying your hostname + # Uncommeting following value will override that value. + # override_hostname = "mywebserver.domain.com" + # If you are not using multicast this value should be set to something other than 0. + # Otherwise if you restart aggregator gmond you will get empty graphs. 60 seconds is reasonable + send_metadata_interval = 20 /*secs */ + +} + +/* + * The cluster attributes specified will be used as part of the + * tag that will wrap all hosts collected by this instance. + */ +cluster { + name = "unspecified" + owner = "unspecified" + latlong = "unspecified" + url = "unspecified" +} + +/* The host section describes attributes of the host, like the location */ +host { + location = "unspecified" +} + +/* Feel free to specify as many udp_send_channels as you like. Gmond + used to only support having a single channel */ +udp_send_channel { + #bind_hostname = yes # Highly recommended, soon to be default. + # This option tells gmond to use a source address + # that resolves to the machine's hostname. Without + # this, the metrics may appear to come from any + # interface and the DNS names associated with + # those IPs will be used to create the RRDs. + mcast_join = 239.2.11.71 + port = 8649 + ttl = 1 +} + +/* You can specify as many udp_recv_channels as you like as well. */ +udp_recv_channel { + mcast_join = 239.2.11.71 + port = 8649 + bind = 239.2.11.71 + retry_bind = true + # Size of the UDP buffer. If you are handling lots of metrics you really + # should bump it up to e.g. 10MB or even higher. + # buffer = 10485760 +} + +/* You can specify as many tcp_accept_channels as you like to share + an xml description of the state of the cluster */ +tcp_accept_channel { + port = 8649 + # If you want to gzip XML output + gzip_output = no +} + +/* Channel to receive sFlow datagrams */ +#udp_recv_channel { +# port = 6343 +#} + +/* Optional sFlow settings */ +#sflow { +# udp_port = 6343 +# accept_vm_metrics = yes +# accept_jvm_metrics = yes +# multiple_jvm_instances = no +# accept_http_metrics = yes +# multiple_http_instances = no +# accept_memcache_metrics = yes +# multiple_memcache_instances = no +#} + +/* Each metrics module that is referenced by gmond must be specified and + loaded. If the module has been statically linked with gmond, it does + not require a load path. However all dynamically loadable modules must + include a load path. */ +modules { + module { + name = "core_metrics" + } + module { + name = "cpu_module" + path = "modcpu.so" + } + module { + name = "disk_module" + path = "moddisk.so" + } + module { + name = "load_module" + path = "modload.so" + } + module { + name = "mem_module" + path = "modmem.so" + } + module { + name = "net_module" + path = "modnet.so" + } + module { + name = "proc_module" + path = "modproc.so" + } + module { + name = "sys_module" + path = "modsys.so" + } +} + +/* The old internal 2.5.x metric array has been replaced by the following + collection_group directives. What follows is the default behavior for + collecting and sending metrics that is as close to 2.5.x behavior as + possible. */ + +/* This collection group will cause a heartbeat (or beacon) to be sent every + 20 seconds. In the heartbeat is the GMOND_STARTED data which expresses + the age of the running gmond. */ +collection_group { + collect_once = yes + time_threshold = 20 + metric { + name = "heartbeat" + } +} + +/* This collection group will send general info about this host every + 1200 secs. + This information doesn't change between reboots and is only collected + once. */ +collection_group { + collect_once = yes + time_threshold = 1200 + metric { + name = "cpu_num" + title = "CPU Count" + } + metric { + name = "cpu_speed" + title = "CPU Speed" + } + metric { + name = "mem_total" + title = "Memory Total" + } + /* Should this be here? Swap can be added/removed between reboots. */ + metric { + name = "swap_total" + title = "Swap Space Total" + } + metric { + name = "boottime" + title = "Last Boot Time" + } + metric { + name = "machine_type" + title = "Machine Type" + } + metric { + name = "os_name" + title = "Operating System" + } + metric { + name = "os_release" + title = "Operating System Release" + } + metric { + name = "location" + title = "Location" + } +} + +/* This collection group will send the status of gexecd for this host + every 300 secs.*/ +/* Unlike 2.5.x the default behavior is to report gexecd OFF. */ +collection_group { + collect_once = yes + time_threshold = 300 + metric { + name = "gexec" + title = "Gexec Status" + } +} + +/* This collection group will collect the CPU status info every 20 secs. + The time threshold is set to 90 seconds. In honesty, this + time_threshold could be set significantly higher to reduce + unneccessary network chatter. */ +collection_group { + collect_every = 20 + time_threshold = 90 + /* CPU status */ + metric { + name = "cpu_user" + value_threshold = "1.0" + title = "CPU User" + } + metric { + name = "cpu_system" + value_threshold = "1.0" + title = "CPU System" + } + metric { + name = "cpu_idle" + value_threshold = "5.0" + title = "CPU Idle" + } + metric { + name = "cpu_nice" + value_threshold = "1.0" + title = "CPU Nice" + } + metric { + name = "cpu_aidle" + value_threshold = "5.0" + title = "CPU aidle" + } + metric { + name = "cpu_wio" + value_threshold = "1.0" + title = "CPU wio" + } + metric { + name = "cpu_steal" + value_threshold = "1.0" + title = "CPU steal" + } + /* The next two metrics are optional if you want more detail... + ... since they are accounted for in cpu_system. + metric { + name = "cpu_intr" + value_threshold = "1.0" + title = "CPU intr" + } + metric { + name = "cpu_sintr" + value_threshold = "1.0" + title = "CPU sintr" + } + */ +} + +collection_group { + collect_every = 20 + time_threshold = 90 + /* Load Averages */ + metric { + name = "load_one" + value_threshold = "1.0" + title = "One Minute Load Average" + } + metric { + name = "load_five" + value_threshold = "1.0" + title = "Five Minute Load Average" + } + metric { + name = "load_fifteen" + value_threshold = "1.0" + title = "Fifteen Minute Load Average" + } +} + +/* This group collects the number of running and total processes */ +collection_group { + collect_every = 80 + time_threshold = 950 + metric { + name = "proc_run" + value_threshold = "1.0" + title = "Total Running Processes" + } + metric { + name = "proc_total" + value_threshold = "1.0" + title = "Total Processes" + } +} + +/* This collection group grabs the volatile memory metrics every 40 secs and + sends them at least every 180 secs. This time_threshold can be increased + significantly to reduce unneeded network traffic. */ +collection_group { + collect_every = 40 + time_threshold = 180 + metric { + name = "mem_free" + value_threshold = "1024.0" + title = "Free Memory" + } + metric { + name = "mem_shared" + value_threshold = "1024.0" + title = "Shared Memory" + } + metric { + name = "mem_buffers" + value_threshold = "1024.0" + title = "Memory Buffers" + } + metric { + name = "mem_cached" + value_threshold = "1024.0" + title = "Cached Memory" + } + metric { + name = "swap_free" + value_threshold = "1024.0" + title = "Free Swap Space" + } +} + +collection_group { + collect_every = 40 + time_threshold = 300 + metric { + name = "bytes_out" + value_threshold = 4096 + title = "Bytes Sent" + } + metric { + name = "bytes_in" + value_threshold = 4096 + title = "Bytes Received" + } + metric { + name = "pkts_in" + value_threshold = 256 + title = "Packets Received" + } + metric { + name = "pkts_out" + value_threshold = 256 + title = "Packets Sent" + } +} + +/* Different than 2.5.x default since the old config made no sense */ +collection_group { + collect_every = 1800 + time_threshold = 3600 + metric { + name = "disk_total" + value_threshold = 1.0 + title = "Total Disk Space" + } +} + +collection_group { + collect_every = 40 + time_threshold = 180 + metric { + name = "disk_free" + value_threshold = 1.0 + title = "Disk Space Available" + } + metric { + name = "part_max_used" + value_threshold = 1.0 + title = "Maximum Disk Space Used" + } +} + +include ("/etc/conf.d/*.conf") + diff --git a/meta-linaro/recipes-extra/ganglia/ganglia/gmond.init b/meta-linaro/recipes-extra/ganglia/ganglia/gmond.init new file mode 100755 index 00000000..4509329f --- /dev/null +++ b/meta-linaro/recipes-extra/ganglia/ganglia/gmond.init @@ -0,0 +1,46 @@ +#!/bin/sh +### BEGIN INIT INFO +# Provides: gmond +# Required-Start: $local_fs +# Required-Stop: $local_fs +# Default-Start: 2 3 4 5 +# Default-Stop: 0 1 6 +# Short-Description: Starts ganglia monitor +# Description: Starts the gmond monitoring daemon +### END INIT INFO + +PATH=/sbin:/usr/sbin:/bin:/usr/bin + +DESC="Ganglia Monitor Daemon" +NAME="gmond" +GMOND=`which $NAME` + +. /etc/init.d/functions || exit 1 + +# Exit if the package is not installed +[ -x "$GMOND" ] || exit 0 + +case "$1" in + start) + echo -n "Starting $DESC: $NAME... " + start-stop-daemon -S -x $GMOND -- $GMOND_ARGS + echo "done." + ;; + stop) + echo -n "Stopping $DESC: $NAME... " + start-stop-daemon -K -x $GMOND + echo "done." + ;; + restart) + echo "Restarting $DESC: $NAME... " + $0 stop + $0 start + echo "done." + ;; + *) + echo "Usage: $0 {start|stop|restart}" + exit 1 + ;; +esac + +exit 0 -- cgit v1.2.3