+

Last commit for check_ceph_usage: fadfa27591da2271db54a38b16320672af96300b

Initial commit

Benjamin Renard [2013-12-03 10:00:39]
Initial commit
#!/usr/bin/python
#
# Nagios plugin to check Ceph cluster usage
#
#     Usage: check_ceph_usage [options]
#
#     Options:
#       -h, --help            show this help message and exit
#       -d, --debug
#       -b BIN, --bin=BIN     Ceph binary (default : /usr/bin/ceph)
#       --conf=CONF           Ceph configuration file
#       -m MON, --mon=MON     Ceph monitor address[:port]
#       -i ID, --id=ID        Ceph client id
#       -k KEYRING, --keyring=KEYRING
#                             Ceph client keyring file
#       -w WARNDATA, --warning-data=WARNDATA
#                             Warning data threshold (default : 70%)
#       -c CRITDATA, --critical-data=CRITDATA
#                             Critical data threshold (default : 85%)
#       -W WARNALLOC, --warning-allocated=WARNALLOC
#                             Warning allocated threshold (default : 80%)
#       -C CRITALLOC, --critical-allocated=CRITALLOC
#                             Critical allocated threshold (default : 90%)
#
# Copyright (c) 2013 Benjamin Renard <brenard@zionetrix.net>
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License version 2
# as published by the Free Software Foundation.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA  02111-1307, USA.
#

import sys,os,json,subprocess
from optparse import OptionParser

# default ceph values
CEPH_COMMAND = '/usr/bin/ceph'
WARN_DATA = 70
CRIT_DATA = 85
WARN_ALLOC = 80
CRIT_ALLOC = 90

# nagios exit code
STATUS_OK = 0
STATUS_WARNING = 1
STATUS_ERROR = 2
STATUS_UNKNOWN = 3

parser = OptionParser()
parser.add_option('-d',
                  '--debug',
                  action="store_true",
                  dest="debug",
                  default=False)

parser.add_option('-b',
                  '--bin',
                  action="store",
                  dest="bin",
                  help="Ceph binary (default : %s)" % CEPH_COMMAND,
                  type='string',
                  default=CEPH_COMMAND)

parser.add_option('--conf',
                  action="store",
                  dest="conf",
                  help="Ceph configuration file",
                  type='string',
                  default=None)

parser.add_option('-m',
                  '--mon',
                  action="store",
                  dest="mon",
                  help="Ceph monitor address[:port]",
                  type='string',
                  default=None)

parser.add_option('-i',
                  '--id',
                  action="store",
                  dest="id",
                  help="Ceph client id",
                  type='string',
                  default=None)

parser.add_option('-k',
                  '--keyring',
                  action="store",
                  dest="keyring",
                  help="Ceph client keyring file",
                  type='string',
                  default=None)

parser.add_option('-w',
                  '--warning-data',
                  action="store",
                  dest="warndata",
                  help="Warning data threshold (default : %s%%)" % WARN_DATA,
                  type='int',
                  default=WARN_DATA)

parser.add_option('-c',
                  '--critical-data',
                  action="store",
                  dest="critdata",
                  help="Critical data threshold (default : %s%%)" % CRIT_DATA,
                  type='int',
                  default=CRIT_DATA)

parser.add_option('-W',
                  '--warning-allocated',
                  action="store",
                  dest="warnalloc",
                  help="Warning allocated threshold (default : %s%%)" % WARN_ALLOC,
                  type='int',
                  default=WARN_ALLOC)

parser.add_option('-C',
                  '--critical-allocated',
                  action="store",
                  dest="critalloc",
                  help="Critical allocated threshold (default : %s%%)" % CRIT_ALLOC,
                  type='int',
                  default=CRIT_ALLOC)

(options, args) = parser.parse_args()

 # validate args
if not os.path.exists(options.bin):
    print "ERROR: ceph executable '%s' doesn't exist" % options.bin
    sys.exit(STATUS_UNKNOWN)

if options.conf and not os.path.exists(options.conf):
    print "ERROR: ceph conf file '%s' doesn't exist" % options.conf
    sys.exit(STATUS_UNKNOWN)

if options.keyring and not os.path.exists(options.keyring):
    print "ERROR: keyring file '%s' doesn't exist" % options.keyring
    sys.exit(STATUS_UNKNOWN)

# build command
ceph_cmd = [options.bin]
if options.mon:
    ceph_cmd.append('-m')
    ceph_cmd.append(options.mon)
if options.conf:
    ceph_cmd.append('-c')
    ceph_cmd.append(options.conf)
if options.id:
    ceph_cmd.append('--id')
    ceph_cmd.append(options.id)
if options.keyring:
    ceph_cmd.append('--keyring')
    ceph_cmd.append(options.keyring)
ceph_cmd.append('status')
ceph_cmd.append('--format=json')

# exec command
p = subprocess.Popen(ceph_cmd,stdout=subprocess.PIPE,stderr=subprocess.PIPE)
output, err = p.communicate()

if output:
	data=json.loads(output)
	if 'pgmap' not in data:
		print "UNKNOWN : fail to read pgmap status"
		sys.exit(STATUS_UNKNOWN)

	if options.debug:
		print "data : %s" % data['pgmap']['data_bytes']
		print "allocated : %s" % data['pgmap']['bytes_used']
		print "total : %s" % data['pgmap']['bytes_total']

	PER_DATA=round(int(data['pgmap']['data_bytes'])*100/int(data['pgmap']['bytes_total']),1)
	DATA_WARN_T=int(int(data['pgmap']['bytes_total'])*options.warndata/100)
	DATA_CRIT_T=int(int(data['pgmap']['bytes_total'])*options.critdata/100)
	PER_ALLOC=round(int(data['pgmap']['bytes_used'])*100/int(data['pgmap']['bytes_total']),1)
	ALLOC_WARN_T=int(int(data['pgmap']['bytes_total'])*options.warnalloc/100)
	ALLOC_CRIT_T=int(int(data['pgmap']['bytes_total'])*options.critalloc/100)


	if options.debug:
		print "%% data : %s" % PER_DATA
		print "%% allocated : %s" % PER_ALLOC

	STATUS=STATUS_OK
	STATUS_TXT="OK"
	if PER_DATA > options.critdata or PER_ALLOC > options.critalloc:
		STATUS=STATUS_CRITICAL
		STATUS_TXT="CRITICAL"
	elif PER_DATA > options.warndata or PER_ALLOC > options.warnalloc:
		STATUS=STATUS_WARNING
		STATUS_TXT="WARNING"

	print "%s : %s%% allocated / %s%% really used|allocated=%sB;%s;%s;0;%s,used=%sB;%s;%s;0;%s" % (STATUS_TXT,PER_ALLOC,PER_DATA,data['pgmap']['bytes_used'],ALLOC_WARN_T,ALLOC_CRIT_T,data['pgmap']['bytes_total'],data['pgmap']['data_bytes'],DATA_WARN_T,DATA_CRIT_T,data['pgmap']['bytes_total'])
	sys.exit(STATUS)
else:
	print "UNKNOWN : fail to execute ceph status command"
	sys.exit(STATUS_UNKNOWN)
ViewGit