Benjamin Renard commited on 2015-11-12 14:16:19
Showing 1 changed files, with 73 additions and 46 deletions.
| ... | ... |
@@ -54,6 +54,13 @@ my $goodOpt = 0; |
| 54 | 54 |
my @ownerOnly; |
| 55 | 55 |
my @hostsDesired; |
| 56 | 56 |
my @hostsExcluded; |
| 57 |
+my %Status; |
|
| 58 |
+my $statusCode = 'OK'; |
|
| 59 |
+my $ok_count = 0; |
|
| 60 |
+my $unknown_count = 0; |
|
| 61 |
+my $warning_count = 0; |
|
| 62 |
+my $critical_count = 0; |
|
| 63 |
+ |
|
| 57 | 64 |
|
| 58 | 65 |
# Process options |
| 59 | 66 |
$goodOpt = GetOptions( |
| ... | ... |
@@ -114,8 +121,6 @@ if ($err) |
| 114 | 121 |
exit $ERRORS{'UNKNOWN'};
|
| 115 | 122 |
} |
| 116 | 123 |
|
| 117 |
-my %Status; |
|
| 118 |
- |
|
| 119 | 124 |
# query the BackupPC server for host status |
| 120 | 125 |
my $status_raw = $server->ServerMesg('status hosts');
|
| 121 | 126 |
my $hosts_infos = $server->HostInfoRead(); |
| ... | ... |
@@ -125,19 +130,16 @@ eval $status_raw; |
| 125 | 130 |
|
| 126 | 131 |
# check the dumped output |
| 127 | 132 |
my $hostCount = 0; |
| 128 |
-my $errorLevel='OK'; |
|
| 129 | 133 |
|
| 130 | 134 |
foreach my $host (@hostsDesired, @hostsExcluded) |
| 131 | 135 |
{
|
| 132 |
- if (not grep {/$host/} keys(%Status))
|
|
| 136 |
+ if (not grep {/^$host$/} keys(%Status))
|
|
| 133 | 137 |
{
|
| 134 | 138 |
print("BACKUPPC UNKNOWN - Unknown host ($host)\n");
|
| 135 | 139 |
exit $ERRORS{'UNKNOWN'};
|
| 136 | 140 |
} |
| 137 | 141 |
} |
| 138 | 142 |
|
| 139 |
-my @problems; |
|
| 140 |
- |
|
| 141 | 143 |
# host status checks |
| 142 | 144 |
foreach my $host (sort(keys(%Status))) |
| 143 | 145 |
{
|
| ... | ... |
@@ -145,10 +147,12 @@ foreach my $host (sort(keys(%Status))) |
| 145 | 147 |
my $owner = $hosts_infos->{$host}->{user};
|
| 146 | 148 |
next if (@ownerOnly and not grep {/$owner/} @ownerOnly);
|
| 147 | 149 |
my %host_conf = %{$server->ConfigDataRead($host)};
|
| 148 |
- next if ( $host_conf{BackupsDisable} );
|
|
| 149 |
- next if (@hostsDesired and not grep {/$host/} @hostsDesired);
|
|
| 150 |
- next if (@hostsExcluded and grep {/$host/} @hostsExcluded);
|
|
| 150 |
+ $Status{$host}{BackupsDisable} = $host_conf{BackupsDisable};
|
|
| 151 |
+ next if ( $Status{$host}{BackupsDisable} );
|
|
| 152 |
+ next if (@hostsDesired and not grep {/^$host$/} @hostsDesired);
|
|
| 153 |
+ next if (@hostsExcluded and grep {/^$host$/} @hostsExcluded);
|
|
| 151 | 154 |
next if ($Status{$host}{'type'} eq 'archive');
|
| 155 |
+ $Status{$host}{'statusCode'} = 'OK';
|
|
| 152 | 156 |
$hostCount++; |
| 153 | 157 |
# Debug |
| 154 | 158 |
if ($verbose == 2) |
| ... | ... |
@@ -161,50 +165,73 @@ foreach my $host (sort(keys(%Status))) |
| 161 | 165 |
# Check host error |
| 162 | 166 |
if ($Status{$host}{'error'})
|
| 163 | 167 |
{
|
| 164 |
- # Check connectivity errors with greater care |
|
| 165 |
- if ($Status{$host}{'error'} ne 'ping too slow' &&
|
|
| 166 |
- $Status{$host}{'error'} ne 'no ping response' &&
|
|
| 167 |
- $Status{$host}{'error'} ne 'no ping response' &&
|
|
| 168 |
- $Status{$host}{'error'} ne 'host not found' &&
|
|
| 169 |
- $Status{$host}{'reason'} !~ /Reason_restore_failed/) {
|
|
| 170 |
- push @problems, "$host error : ".$Status{$host}{'error'}." / ".$Status{$host}{'reason'};
|
|
| 171 |
- next; |
|
| 172 |
- } |
|
| 168 |
+ $Status{$host}{statusMsg} = "error: ".$Status{$host}{'error'}." / ".$Status{$host}{'reason'};
|
|
| 169 |
+ } else {
|
|
| 170 |
+ $Status{$host}{statusMsg} = "status: ".$Status{$host}{'state'};
|
|
| 173 | 171 |
} |
| 172 |
+ |
|
| 174 | 173 |
# Check last good backup time |
| 175 |
- my $difftime=difftime(time(), $Status{$host}{'lastGoodBackupTime'});
|
|
| 176 |
- my $diffdays=$difftime/(3600 * 24); |
|
| 177 |
- $Status{$host}{'lastGoodBackupTime'} = $Status{$host}{'startTime'} if (not $Status{$host}{'lastGoodBackupTime'});
|
|
| 178 |
- if ($difftime > ($critDaysOld * 3600 * 24)) |
|
| 174 |
+ $Status{$host}{'lastGoodBackupDays'} = difftime(time(), $Status{$host}{'lastGoodBackupTime'}) / (3600 * 24) if ( defined $Status{$host}{'lastGoodBackupTime'} );
|
|
| 175 |
+ if ( ! $Status{$host}{'lastGoodBackupDays'} ) {
|
|
| 176 |
+ $Status{$host}{'startDays'} = difftime(time(), $Status{$host}{'startTime'}) / (3600 * 24);
|
|
| 177 |
+ if ( $Status{$host}{'startDays'} > $critDaysOld ) {
|
|
| 178 |
+ $Status{$host}{statusMsg} .= ", no backups";
|
|
| 179 |
+ $Status{$host}{statusCode} = 'CRITICAL';
|
|
| 180 |
+ $statusCode = 'CRITICAL'; |
|
| 181 |
+ } elsif ( $Status{$host}{'startDays'} > $warnDaysOld ) {
|
|
| 182 |
+ $Status{$host}{statusMsg} .= ", no backups";
|
|
| 183 |
+ $Status{$host}{statusCode} = 'WARNING' unless ( $Status{$host}{statusCode} = 'CRITICAL' );
|
|
| 184 |
+ $statusCode = 'WARNING' unless ( $statusCode eq 'CRITICAL' ); |
|
| 185 |
+ } |
|
| 186 |
+ } elsif ( $Status{$host}{'lastGoodBackupDays'} > $critDaysOld )
|
|
| 179 | 187 |
{
|
| 180 |
- push @problems, "$host : last good backup have ".sprintf("%.1f",$diffdays)." days";
|
|
| 181 |
- $errorLevel='CRITICAL'; |
|
| 188 |
+ $Status{$host}{statusMsg} .= ", last good backup have ".sprintf("%.1f", $Status{$host}{'lastGoodBackupDays'})." days";
|
|
| 189 |
+ $Status{$host}{statusCode} = 'CRITICAL';
|
|
| 190 |
+ $statusCode = 'CRITICAL'; |
|
| 182 | 191 |
} |
| 183 |
- elsif ($difftime > ($warnDaysOld * 3600 * 24)) |
|
| 192 |
+ elsif ( $Status{$host}{'lastGoodBackupDays'} > $warnDaysOld )
|
|
| 184 | 193 |
{
|
| 185 |
- push @problems, "$host : last good backup have ".sprintf("%.1f",$diffdays)." days";
|
|
| 186 |
- $errorLevel='WARNING' if ($errorLevel eq 'OK'); |
|
| 187 |
- } |
|
| 188 |
-} |
|
| 189 |
- |
|
| 190 |
-my $problemTxt=""; |
|
| 191 |
-if (scalar(@problems) > 0) {
|
|
| 192 |
- if ($verbose > 0) {
|
|
| 193 |
- foreach my $pbl (@problems) {
|
|
| 194 |
- if ($problemTxt ne "") {
|
|
| 195 |
- $problemTxt.=" , "; |
|
| 196 |
- } |
|
| 197 |
- else {
|
|
| 198 |
- $problemTxt=" ( "; |
|
| 199 |
- } |
|
| 200 |
- $problemTxt.=$pbl; |
|
| 194 |
+ $Status{$host}{statusMsg} .= ", last good backup have ".sprintf("%.1f",$Status{$host}{'lastGoodBackupDays'})." days";
|
|
| 195 |
+ $Status{$host}{statusCode} = 'WARNING' unless ( $Status{$host}{statusCode} = 'CRITICAL' );
|
|
| 196 |
+ $statusCode = 'WARNING' unless ( $statusCode eq 'CRITICAL' ); |
|
| 197 |
+ } else {
|
|
| 198 |
+ $Status{$host}{statusMsg} .= ", last good backup have ".sprintf("%.1f",$Status{$host}{'lastGoodBackupDays'})." days";
|
|
| 199 |
+ } |
|
| 200 |
+ $ok_count++ if ( $Status{$host}{statusCode} eq 'OK' );
|
|
| 201 |
+ $unknown_count++ if ( $Status{$host}{statusCode} eq 'UNKNOWN' );
|
|
| 202 |
+ $warning_count++ if ( $Status{$host}{statusCode} eq 'WARNING' );
|
|
| 203 |
+ $critical_count++ if ( $Status{$host}{statusCode} eq 'CRITICAL' );
|
|
| 204 |
+} |
|
| 205 |
+ |
|
| 206 |
+ |
|
| 207 |
+my $statusMsg = "BACKUPPC $statusCode"; |
|
| 208 |
+ |
|
| 209 |
+if ( $statusCode eq 'OK' ) {
|
|
| 210 |
+ if ( $verbose && scalar(@hostsDesired) == 1 ) {
|
|
| 211 |
+ $statusMsg .= " (".$Status{$hostsDesired[0]}{statusMsg}.")";
|
|
| 212 |
+ } else {
|
|
| 213 |
+ $statusMsg .= " ($ok_count OK)"; |
|
| 214 |
+ } |
|
| 215 |
+} else {
|
|
| 216 |
+ if ( $verbose ) {
|
|
| 217 |
+ $statusMsg .= " (";
|
|
| 218 |
+ my $first_host = 1; |
|
| 219 |
+ foreach my $host ( keys %Status ) {
|
|
| 220 |
+ next if (@hostsDesired and not grep {/^$host$/} @hostsDesired);
|
|
| 221 |
+ next if (@hostsExcluded and grep {/^$host$/} @hostsExcluded);
|
|
| 222 |
+ next if ( $Status{$host}{BackupsDisable} );
|
|
| 223 |
+ next if ($Status{$host}{'type'} eq 'archive');
|
|
| 224 |
+ if ( $Status{$host}{statusCode} ne 'OK' ) {
|
|
| 225 |
+ $statusMsg .= ", " unless ( $first_host ); |
|
| 226 |
+ $statusMsg .= "$host: ".$Status{$host}{statusCode}." - ".$Status{$host}{statusMsg};
|
|
| 227 |
+ $first_host = 0 if ( $first_host ); |
|
| 201 | 228 |
} |
| 202 |
- $problemTxt.=" )"; |
|
| 203 | 229 |
} |
| 204 |
- else {
|
|
| 205 |
- $problemTxt=" (".scalar(@problems)." problems)";
|
|
| 230 |
+ $statusMsg .= ")"; |
|
| 231 |
+ } else {
|
|
| 232 |
+ $statusMsg .= " ( $ok_count OK, $unknown_count UNKNOWN, $warning_count WARNING, $critical_count CRITICAL)"; |
|
| 206 | 233 |
} |
| 207 | 234 |
} |
| 208 | 235 |
|
| 209 |
-print "BACKUPPC $errorLevel$problemTxt\n"; |
|
| 210 |
-exit $ERRORS{$errorLevel};
|
|
| 236 |
+print "$statusMsg\n"; |
|
| 237 |
+exit $ERRORS{$statusCode};
|
|
| 211 | 238 |