Initial commit
Benjamin Renard

Benjamin Renard commited on 2012-03-14 15:04:14
Showing 2 changed files, with 161 additions and 0 deletions.

... ...
@@ -0,0 +1 @@
1
+*~
... ...
@@ -0,0 +1,160 @@
1
+#!/bin/bash
2
+#
3
+# Nagios plugin to check Postgresql streamin replication state
4
+# 
5
+# Could be use on Master or on standby node
6
+#
7
+# Requirement : 
8
+#
9
+#     On master node : Slaves must be able to connect with user PG_USER
10
+#                      to database postgres as trust
11
+#
12
+#     On standby node : PG_USER must be able to connect localy as trust
13
+#
14
+# Author : Benjamin Renard <brenard@easter-eggs.com>
15
+# Date : Wed, 14 Mar 2012 14:45:55 +0000
16
+# Source : http://git.zionetrix.net/check_pg_streaming_replication
17
+#
18
+
19
+PG_USER=postgres
20
+PSQL_BIN=/usr/bin/psql
21
+PG_MAIN=/var/lib/postgresql/9.1/main
22
+RECOVERY_CONF=$PG_MAIN/recovery.conf
23
+PG_DEFAULT_PORT=5432
24
+
25
+DEBUG=0
26
+[ "$1" == "-d" ] && DEBUG=1
27
+
28
+function psql_get () {
29
+	echo "$1"|su - $PG_USER -c "$PSQL_BIN -t -P format=unaligned"
30
+}
31
+
32
+function debug() {
33
+	if [ $DEBUG -eq 1 ]
34
+	then
35
+		echo "[DEBUG] $1"
36
+	fi
37
+}
38
+
39
+# Postgres is running ?
40
+if [ $DEBUG -eq 0 ]
41
+then
42
+	psql_get '\q' 2> /dev/null
43
+else
44
+	psql_get '\q'
45
+fi
46
+if [ $? -ne 0 ]
47
+then
48
+	echo "CRITICAL : Postgres is not running !"
49
+	exit 2
50
+fi
51
+debug "Postgres is running"
52
+
53
+RECOVERY_MODE=0
54
+[ $( psql_get 'SELECT pg_is_in_recovery();' ) == "t" ] && RECOVERY_MODE=1
55
+
56
+if [ -f $RECOVERY_CONF ]
57
+then
58
+	debug "File recovery.conf found. Hot-standby mode."
59
+	
60
+	# Check recovery mode
61
+	if [ $RECOVERY_MODE -ne 1 ]
62
+	then
63
+		echo "CRITICAL : Not in recovery mode while recovery.conf file found !"
64
+		exit 2
65
+	fi
66
+	debug "Postgres is in recovery mode"
67
+
68
+	LAST_XLOG_RECEIVE=$( psql_get "SELECT pg_last_xlog_receive_location()" )
69
+	debug "Last xlog file receive : $LAST_XLOG_RECEIVE"
70
+	LAST_XLOG_REPLAY=$( psql_get "SELECT pg_last_xlog_replay_location()" )
71
+	debug "Last xlog file replay : $LAST_XLOG_REPLAY"
72
+
73
+
74
+	# Get master connection informations from recovery.conf file
75
+	MASTER_CONN_INFOS=$( egrep '^ *primary_conninfo' $RECOVERY_CONF|sed "s/^ *primary_conninfo *= *[\"\']\([^\"\']*\)[\"\'].*$/\1/" )
76
+	if [ ! -n "$MASTER_CONN_INFOS" ]
77
+	then
78
+		echo "UNKNOWN : Can't retreive master connection informations form recovery.conf file"
79
+		exit 3
80
+	fi
81
+	debug "Master connection informations : $MASTER_CONN_INFOS"
82
+
83
+	M_HOST=$( echo "$MASTER_CONN_INFOS"|sed 's/^.*host= *\([^ ]*\) *.*$/\1/' )
84
+	if [ ! -n "$M_HOST" ]
85
+	then
86
+		echo "UNKNOWN : Can't retreive master host from recovery.conf file"
87
+		exit 3
88
+	fi
89
+	debug "Master host : $M_HOST"
90
+
91
+	M_PORT=$( echo "$MASTER_CONN_INFOS"|sed 's/^.*port= *\([^ ]*\) *.*$/\1/' )
92
+	if [ ! -n "$M_PORT" ]
93
+	then
94
+		debug "Master port not specify, use default : $PG_DEFAULT_PORT"
95
+		M_PORT=$PG_DEFAULT_PORT
96
+	else
97
+		debug "Master port : $M_PORT"
98
+	fi
99
+	
100
+	# Get current xlog file from master
101
+	M_CUR_XLOG="$( echo 'SELECT pg_current_xlog_location()'|su - $PG_USER -c "$PSQL_BIN -h $M_HOST -p $M_PORT -t -P format=unaligned" )"
102
+	if [ ! -n "$M_CUR_XLOG" ]
103
+	then
104
+		echo "UNKNOWN : Can't retreive current xlog from master server"
105
+		exit 3
106
+	fi
107
+	debug "Master current xlog : $M_CUR_XLOG"
108
+
109
+	# Master current xlog is the last receive xlog ?
110
+	if [ "$M_CUR_XLOG" != "$LAST_XLOG_RECEIVE" ]
111
+	then
112
+		echo "CRITICAL : Master current xlog is not the last receive xlog"
113
+		exit 2
114
+	fi
115
+	debug "Master current xlog is the last receive xlog"
116
+
117
+	# The last receive xlog is the last replay file ?
118
+	if [ "$LAST_XLOG_RECEIVE" != "$LAST_XLOG_REPLAY" ]
119
+	then
120
+		echo "WARNING : last receive xlog file is not the last replay file"
121
+		exit 1
122
+	fi
123
+	debug "Last receive xlog file is the last replay file"
124
+
125
+	echo "OK : Hot-standby server is uptodate"
126
+	exit 0
127
+else
128
+	debug "File recovery.conf not found. Master mode."
129
+
130
+	# Check recovery mode
131
+	if [ $RECOVERY_MODE -eq 1 ]
132
+	then
133
+		echo "CRITICAL : In recovery mode while recovery.conf file not found !"
134
+		exit 2
135
+	fi
136
+	debug "Postgres is not in recovery mode"
137
+
138
+	# Check standby client
139
+	STANDBY_CLIENTS=$( psql_get "SELECT client_addr, sync_state FROM pg_stat_replication;" )
140
+	if [ ! -n "$STANDBY_CLIENTS" ]
141
+	then
142
+		echo "WARNING : no stand-by client connected"
143
+		exit 1
144
+	fi
145
+	debug "Stand-by client(s) : $( echo  -n $STANDBY_CLIENTS|sed 's/\n/ , /g' )"
146
+
147
+	STANDBY_CLIENTS_TXT=""
148
+	STANDBY_CLIENTS_COUNT=0
149
+	for line in $STANDBY_CLIENTS
150
+	do
151
+		let STANDBY_CLIENTS_COUNT=STANDBY_CLIENTS_COUNT+1
152
+
153
+		IP=$( echo $line|cut -d '|' -f 1 )
154
+		MODE=$( echo $line|cut -d '|' -f 2 )
155
+		STANDBY_CLIENTS_TXT="$STANDBY_CLIENTS_TXT $IP (mode=$MODE)"
156
+	done
157
+
158
+	echo "OK : $STANDBY_CLIENTS_COUNT stand-by client(s) connected - $STANDBY_CLIENTS_TXT"
159
+	exit 0
160
+fi
0 161