/[nagios-plugins-perl]/trunk/plugins/check_postgresql.pl
ViewVC logotype

Contents of /trunk/plugins/check_postgresql.pl

Parent Directory Parent Directory | Revision Log Revision Log


Revision 188 - (show annotations) (download)
Thu Apr 13 09:23:07 2017 UTC (3 years, 7 months ago) by racvision
File MIME type: text/plain
File size: 14270 byte(s)
ajout plugins check_postgre + check_mail_mx
1 #!/usr/bin/perl -w
2 #
3 # Copyright (c) 2017 - St├ęphane Urbanovski <stephane.urbanovski@ac-nancy-metz.fr>
4 #
5 # This program is free software; you can redistribute it and/or
6 # modify it under the terms of the GNU General Public License
7 # as published by the Free Software Foundation; either version 2
8 # of the License, or (at your option) any later version.
9 #
10 # This program is distributed in the hope that it will be useful,
11 # but WITHOUT ANY WARRANTY; without even the implied warranty
12 # of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
13 # GNU General Public License for more details.
14 #
15 # you should have received a copy of the GNU General Public License
16 # along with this program (or with Nagios); if not, write to the
17 # Free Software Foundation, Inc., 59 Temple Place - Suite 330,
18 # Boston, MA 02111-1307, USA
19 #
20
21 use strict;
22 use warnings;
23
24 use Nagios::Plugin ;
25
26 use DBI;
27 use DBD::Pg;
28
29 use File::Basename;
30 use Locale::gettext;
31
32 use POSIX qw(:signal_h floor setlocale);
33 use Time::HiRes; # get microtime
34 use DateTime;
35
36 use Data::Dumper;
37
38
39 my $PROGNAME = basename($0);
40 '$Revision: 2.1 $' =~ /^.*(\d+.\d+) \$$/; # Use The Revision from RCS/CVS/SVN
41 my $VERSION = $1;
42
43 my $DEBUG = 0;
44 my $TIMEOUT = 9;
45
46 # i18n :
47 setlocale(LC_MESSAGES, '');
48 textdomain('nagios-plugins-DB2');
49
50 # Retention files path (save previous values) :
51 # FIXME: Make this configurable
52 my $TempPath = '/tmp/';
53
54
55 my $np = Nagios::Plugin->new(
56 version => $VERSION,
57 blurb => _gt('Plugin to check Postrgresql databases'),
58 usage => "Usage: %s [ -v|--verbose ] -H <host> -p <port> -b <database name> [-t <timeout>] [ -c|--critical=<threshold> ] [ -w|--warning=<threshold> ]",
59 timeout => $TIMEOUT+1
60 );
61
62 # Setup DB2 environnement
63
64
65 # Parse command line arguments :
66
67 $np->add_arg (
68 spec => 'debug|d',
69 help => _gt('Debug level'),
70 default => 0,
71 required => 0,
72 );
73
74 $np->add_arg (
75 spec => 'hostname|H=s',
76 help => _gt('Hostname'),
77 required => 1,
78 );
79
80 $np->add_arg (
81 spec => 'port|p=i',
82 help => _gt('Database listen port'),
83 # required => 1,
84 default => 5432,
85 );
86
87 $np->add_arg (
88 spec => 'base|b=s',
89 help => _gt('Database name'),
90 required => 1,
91 );
92 $np->add_arg (
93 spec => 'user|u=s',
94 help => _gt('Connection user name'),
95 # required => 1,
96 default => 'postgres',
97 );
98 $np->add_arg (
99 spec => 'password|w=s',
100 help => _gt('Connection user password'),
101 required => 1,
102 );
103 $np->add_arg (
104 spec => 'replication',
105 help => _gt('Force replication check'),
106 required => 0,
107 default => 0,
108 );
109
110
111 $np->add_arg (
112 spec => 'wt=f',
113 help => _gt('Warning request time threshold (in seconds)'),
114 default => 2,
115 required => 0,
116 label => 'FLOAT'
117 );
118
119 $np->add_arg (
120 spec => 'ct=f',
121 help => _gt('Critical request time threshold (in seconds)'),
122 default => 10,
123 required => 0,
124 label => 'FLOAT'
125 );
126
127 $np->add_arg (
128 spec => 'wb=i',
129 help => _gt('Warning backup age threshold (in hours)'),
130 default => 0,
131 required => 0,
132 );
133
134 $np->add_arg (
135 spec => 'cb=i',
136 help => _gt('Critical backup age threshold (in hours)'),
137 default => 0,
138 required => 0,
139 );
140
141 $np->add_arg (
142 spec => 'wa=i',
143 help => _gt('Warning agent used threshold (in % of MAX_AGENTS)'),
144 default => 0,
145 required => 0,
146 );
147
148 $np->add_arg (
149 spec => 'ca=i',
150 help => _gt('Critical agent used threshold (in % of MAX_AGENTS)'),
151 default => 0,
152 required => 0,
153 );
154
155 $np->add_arg (
156 spec => 'wl=i',
157 help => _gt('Warning number of LOCKS_WAITING threshold'),
158 default => 0,
159 required => 0,
160 );
161
162 $np->add_arg (
163 spec => 'cl=i',
164 help => _gt('Critical number of LOCKS_WAITING threshold'),
165 default => 0,
166 required => 0,
167 );
168 $np->getopts;
169
170 $DEBUG = $np->opts->get('debug');
171
172 my $hostname = $np->opts->get('hostname');
173 my $port = $np->opts->get('port');
174 my $db = $np->opts->get('base');
175
176 my $user = $np->opts->get('user');
177 my $pass = $np->opts->get('password');
178 my $useReplication = $np->opts->get('replication');
179
180
181 # Thresholds :
182 # time
183 my $warn_t = $np->opts->get('wt');
184 my $crit_t = $np->opts->get('ct');
185 # agents
186 my $warn_a = $np->opts->get('wa');
187 my $crit_a = $np->opts->get('ca');
188 # backups
189 my $warn_b = $np->opts->get('wb');
190 my $crit_b = $np->opts->get('cb');
191 # lock wait
192 my $warn_l = $np->opts->get('wl');
193 my $crit_l = $np->opts->get('cl');
194
195
196
197 my $dsn = "dbi:Pg:dbname=$db; host=$hostname; port=$port;";
198
199
200 my $mask = POSIX::SigSet->new( SIGALRM );
201 my $action = POSIX::SigAction->new(\&timeoutExit,$mask);
202 my $oldaction = POSIX::SigAction->new();
203
204 sigaction( SIGALRM, $action, $oldaction );
205
206 debug( "Seting Alarm timeout to $TIMEOUT");
207 my $startTime = Time::HiRes::time();
208 alarm($TIMEOUT);
209
210
211 my $dbh = DBI->connect($dsn, $user, $pass, {PrintError => $DEBUG, PrintWarn=> $DEBUG, RaiseError => 0 });
212 if ( !$dbh ) {
213 $np->nagios_exit(CRITICAL, _gt("Query failed (connect): ").$DBI::errstr );
214 }
215
216 debug( 'Connected !');
217 my $out = '';
218
219 my %pgData = ();
220
221
222
223 my $version = _gt("Unknown Pg version");
224 my $majVersion = 0;
225
226 # -------------------------------------------------------
227 my $reqName = 'version';
228 my $sql = 'SELECT
229 version() AS version_full,
230 current_setting(\'server_version\') AS version,
231 current_setting(\'autovacuum\') AS autovacuum,
232 current_setting(\'max_connections\') AS max_connections,
233 current_setting(\'work_mem\') AS work_mem,
234 pg_database_size( \''.$db.'\') AS dsize,
235 pg_is_in_recovery() AS in_recovery;
236 ';
237 $pgData{'version'} = queryFirstRow($dbh,$sql);
238
239 if ( !defined($pgData{$reqName}) ) {
240 $np->nagios_exit(CRITICAL, 'Error while querying version() : '.$DBI::errstr );
241
242 }
243
244 if ( $pgData{$reqName}{'version'} =~ /^((\d+)\.[\d\.]+)/ ) {
245 $majVersion = $2;
246 $version = $1;
247 }
248
249 $np->add_message(OK, sprintf(_gt('PostgreSQL %s (autovacuum=%s,work_mem=%s)'),$version,$pgData{$reqName}{'autovacuum'},$pgData{$reqName}{'work_mem'}) );
250
251
252
253 # -------------------------------------------------------
254 # # Size
255 # $reqName = 'size';
256 # $sql = 'SELECT
257 # d.*,
258 # pg_database_size(d.oid) AS dsize,
259 # pg_size_pretty(pg_database_size(d.oid)) AS pdsize,
260 # datname,
261 # r.rolname AS rolname
262 # FROM pg_database d
263 # LEFT JOIN pg_roles r ON (r.oid=d.datdba)
264 # WHERE datname = \''.$db.'\';
265 # ';
266 # $pgData{$reqName} = queryFirstRow($dbh,$sql);
267 # if ( !defined($pgData{$reqName}) ) {
268 # $np->nagios_exit(CRITICAL, sprintf(_gt('Error while querying \'%s\': %s'),$reqName,$DBI::errstr));
269 # }
270
271
272 # -------------------------------------------------------
273 $reqName = 'stats';
274 $sql = 'SELECT
275 *,
276 A.numbackends,
277 A.xact_commit,
278 A.xact_rollback,
279 A.deadlocks,
280 A.blks_read,
281 A.blks_hit,
282
283 A.temp_files,
284 A.temp_bytes,
285
286 A.tup_fetched,
287 A.tup_returned,
288 A.tup_inserted,
289 A.tup_updated,
290 A.tup_deleted,
291
292 B.confl_lock,
293 B.confl_deadlock
294 FROM pg_stat_database A
295 JOIN pg_stat_database_conflicts B ON (A.datid=B.datid )
296 AND A.datname = \''.$db.'\';
297 ';
298 $pgData{$reqName} = queryFirstRow($dbh,$sql);
299 if ( !defined($pgData{$reqName}) ) {
300 $np->nagios_exit(CRITICAL, sprintf(_gt('Error while querying \'%s\': %s'),$reqName,$DBI::errstr));
301 }
302
303
304
305 # -------------------------------------------------------
306 # archive
307 $reqName = 'archive';
308 $sql = 'SELECT archived_count, failed_count FROM pg_stat_archiver;';
309 $pgData{$reqName} = queryFirstRow($dbh,$sql);
310 if ( !defined($pgData{$reqName}) ) {
311 $np->nagios_exit(CRITICAL, sprintf(_gt('Error while querying \'%s\': %s'),$reqName,$DBI::errstr));
312 }
313
314 # -------------------------------------------------------
315 # archive
316 # $reqName = 'hit_ratio';
317 # $sql = 'SELECT
318 # round(
319 # sum(
320 # case when(blks_read*blks_hit)=0
321 # then null
322 # else
323 # 100-round((blks_read*100/(blks_read+blks_hit)),2)
324 # end
325 # )/count(*),
326 # 2
327 # ) as cache_hit_ratio
328 # FROM pg_stat_database
329 # WHERE datname = \''.$db.'\';
330 # ';
331 # $pgData{$reqName} = queryFirstRow($dbh,$sql);
332 # if ( !defined($pgData{$reqName}) ) {
333 # $np->nagios_exit(CRITICAL, sprintf(_gt('Error while querying \'%s\': %s'),$reqName,$DBI::errstr));
334 # }
335
336 # -------------------------------------------------------
337 # pg_settings
338 # $reqName = 'pg_settings';
339 # $sql = 'SELECT name,vartype,setting,short_desc FROM pg_settings;';
340 # $pgData{$reqName} = queryFirstRow($dbh,$sql);
341 # if ( !defined($pgData{$reqName}) ) {
342 # $np->nagios_exit(CRITICAL, sprintf(_gt('Error while querying \'%s\': %s'),$reqName,$DBI::errstr));
343 # }
344
345
346 # -------------------------------------------------------
347 # replication
348 if ( $useReplication ) {
349 $reqName = 'replication';
350 $sql = 'SELECT *,
351 state,
352 sync_state
353 FROM pg_stat_replication;
354 ';
355 $pgData{$reqName} = queryFirstRow($dbh,$sql);
356 if ( !defined($pgData{$reqName}) ) {
357 $np->nagios_exit(CRITICAL, sprintf(_gt('Error while querying \'%s\': %s'),$reqName,$DBI::errstr));
358 }
359
360 }
361
362
363
364
365 # -------------------------------------------------------
366 my ($row2, $rowMem, $rowHadr);
367 $dbh->disconnect();
368
369 alarm(0);
370
371
372 # Check timer value
373
374 my $timer = Time::HiRes::time() - $startTime;
375
376 my $status_t = $np->check_threshold(
377 'check' => $timer,
378 'warning' => $warn_t,
379 'critical' => $crit_t,
380 );
381
382
383 my %counters = (
384 'tup_fetched' => $pgData{'stats'}{'tup_fetched'},
385 'tup_returned' => $pgData{'stats'}{'tup_returned'},
386 'tup_inserted' => $pgData{'stats'}{'tup_inserted'},
387 'tup_updated' => $pgData{'stats'}{'tup_updated'},
388 'tup_deleted' => $pgData{'stats'}{'tup_deleted'},
389 'temp_files' => $pgData{'stats'}{'temp_files'},
390 'temp_bytes' => $pgData{'stats'}{'temp_bytes'},
391
392 );
393
394
395 $np->add_perfdata(
396 'label' => 't',
397 'value' => sprintf('%.6f',$timer),
398 'uom' => 's',
399 'threshold' => $np->threshold()
400 );
401
402 $np->add_perfdata(
403 'label' => 'size',
404 'value' => $pgData{'version'}{'dsize'},
405 'uom' => 'B',
406 );
407
408 # $np->add_perfdata(
409 # 'label' => 'cache_hit',
410 # 'value' => $pgData{'hit_ratio'}{'cache_hit_ratio'},
411 # 'min' => 0,
412 # 'uom' => '%',
413 # );
414
415
416
417 $np->add_perfdata(
418 'label' => 'deadlock',
419 'value' => $pgData{'stats'}{'confl_deadlock'},
420 );
421
422
423
424
425 # Read previous retention data ( get rate from counter values)
426
427 my $TempFile = $TempPath.'check_postgresql_'.$hostname.'_on_'.$db;
428
429 my %previousValues = ();
430 if ( (-e $TempFile) && (-r $TempFile) && (-w $TempFile) ) {
431 if ( !open (RETENTION_FILE, '<',$TempFile) ) {
432 # $np->nagios_exit(CRITICAL, _gt("Error while trying to read $TempFile !") );
433 }
434 debug("Retention file : $TempFile");
435 while ( my $l = <RETENTION_FILE> ) {
436 chomp($l);
437 if ( $l =~ /^(\w+) ([\d\.]+)$/ ) {
438 $previousValues{$1} = $2;
439 debug("Retention data : '$1' = $2");
440 } else {
441 debug("Bad retention data : $l");
442 }
443 }
444 close (RETENTION_FILE);
445
446 } else {
447 debug("Retention file '$TempFile' not found");
448 }
449
450
451 # Save retention data
452 if ( !open (RETENTION_FILE, '>',$TempFile) ) {
453 $np->nagios_exit(UNKNOWN, _gt(sprintf("Error while trying to read '%s' !",$TempFile)) );
454 }
455 print RETENTION_FILE 'TIMESTAMP '.$startTime."\n";
456 foreach my $k ( keys(%counters) ) {
457 if ( defined($counters{$k}) ) {
458 print RETENTION_FILE $k.' '.$counters{$k}."\n";
459 }
460 }
461 close (RETENTION_FILE);
462
463
464 my %delta = ();
465 # my %rate = ();
466
467 my $dt = 0;
468
469 if ( defined($previousValues{'TIMESTAMP'}) ) {
470 $dt = $startTime - $previousValues{'TIMESTAMP'};
471
472 if ( $dt > 0 && $dt <= 600 ) {
473 #time increase (but not obsolete : >10mn)
474
475 # compute deltas for counters
476 foreach my $k ( keys(%counters) ) {
477 if ( defined($counters{$k}) && defined($previousValues{$k}) ) {
478
479 if ( $counters{$k} >= $previousValues{$k} ) {
480 # counter increase
481 $delta{$k} = $counters{$k} - $previousValues{$k};
482 }
483 }
484 }
485
486 foreach my $counter ('tup_fetched','tup_returned','tup_inserted','tup_updated','tup_deleted') {
487 if ( defined($delta{$counter}) ) {
488 # The number of SQL SELECT statements par second that were executed.
489 my $rate = $delta{$counter} / $dt;
490 $np->add_perfdata(
491 'label' => $counter.'_rate',
492 'value' => sprintf('%.3f',$rate),
493 'uom' => 'tup/s'
494 );
495 }
496 }
497
498 $np->add_perfdata(
499 'label' => 'temp_files_rate',
500 'value' => sprintf('%.3f',$delta{'temp_files'} / $dt),
501 'uom' => 'files/s'
502 );
503 $np->add_perfdata(
504 'label' => 'temp_bytes_rate',
505 'value' => sprintf('%.3f',$delta{'temp_bytes'} / $dt),
506 'uom' => 'B/s',
507 );
508
509 }
510 }
511
512
513
514
515 # compute sort overflow ratio
516 if ( defined($delta{'TOTAL_SORTS'}) && defined($delta{'SORT_OVERFLOWS'})) {
517 my $ratio = 0;
518 if ( $delta{'TOTAL_SORTS'} > 0 ) {
519 $ratio = int(100 * defined($delta{'SORT_OVERFLOWS'}) / $delta{'TOTAL_SORTS'} * 1000) / 1000;
520 }
521 $np->add_perfdata(
522 'label' => 'SORT_OVERFLOWS_RATIO',
523 'value' => $ratio,
524 'min' => 0,
525 'max' => 100,
526 'uom' => '%'
527 );
528 }
529
530
531 if ( defined($row2->{'APPLS_IN_DB2'}) ) {
532 # IBM doc : Indicates the number of applications that are currently connected to the database, and for which the database manager is currently processing a request.
533 $np->add_perfdata(
534 'label' => 'APPLS_IN_DB2',
535 'value' => $row2->{'APPLS_IN_DB2'},
536 'uom' => 'appls',
537 'min' => 0,
538 );
539 }
540
541 if ( defined($rowMem->{'POOL_CUR_SIZE_SUM'}) ) {
542 $np->add_perfdata(
543 'label' => 'POOL_CUR_SIZE_SUM',
544 'value' => $rowMem->{'POOL_CUR_SIZE_SUM'},
545 'uom' => 'B',
546 'min' => 0,
547 );
548 }
549
550
551
552
553
554 my ($status, $message) = $np->check_messages();
555
556 $np->nagios_exit($status, $message );
557
558 # (Last backup: '.$lastBackup.')
559
560
561 exit;
562
563 sub timeoutExit {
564 print _gt("DB2 - Connection timeout !\n");
565 exit CRITICAL;
566 }
567
568 sub queryFirstRow {
569 my ($dbh,$sql) = @_;
570 my $timer = Time::HiRes::time();
571 my $sth = $dbh->prepare($sql);
572
573 debug ("SQL: $sql");
574
575 if ( !$sth ) {
576 $np->nagios_exit(CRITICAL, _gt("Query failed (prepare): ").$sth->errstr );
577 return undef;
578 }
579
580 if ( $sth->execute() ) {
581
582 my $run = 1;
583 my $row = undef;
584 $timer = Time::HiRes::time()-$timer;
585
586 while ( $run ) {
587
588 my $tmpRow = $sth->fetchrow_hashref();
589
590
591 if ( $dbh->err ) {
592 debug ('Fetch error : '.$dbh->err.' '.$dbh->errstr);
593 return undef;
594 }
595
596 if ( defined($tmpRow) ) {
597 if ( ! defined($row) ) {
598 $row = $tmpRow;
599 }
600
601 if ($DEBUG) {
602 debug ( sprintf ("------- Sql results (time=%fms)", $timer*1000));
603 foreach my $attr ( keys(%{$tmpRow}) ) {
604 debug ( sprintf ("%40s = %s", $attr,defined($tmpRow->{$attr})?$tmpRow->{$attr}:'NULL'));
605 }
606 debug ( '---------');
607 } else {
608 $run = 0;
609 }
610
611 } else {
612 $run = 0;
613 }
614
615
616 }
617 $sth->finish();
618
619 return $row;
620
621 } else {
622 $np->nagios_exit(CRITICAL, _gt("Query failed (execute):").$sth->errstr );
623 }
624 }
625
626
627 # Print debug information if $DEBUG > 0
628 sub debug {
629 print STDERR "[DEBUG] ".$_[0]."\n" if $DEBUG;
630 }
631
632 # Gettext wrapper
633 sub _gt {
634 return gettext($_[0]);
635 }
636
637

Properties

Name Value
svn:executable *

  ViewVC Help
Powered by ViewVC 1.1.8