#!/usr/bin/perl -w # # Copyright (c) 2017 Stéphane URBANOVSKI - CRT Supervision # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # use strict; use warnings; use POSIX qw(strftime setlocale); use File::Basename; use Locale::gettext; use LWP::UserAgent; # http client use HTTP::Request; # used by LWP::UserAgent use HTTP::Status; # to get http err msg use JSON::XS; use Nagios::Plugin ; use Data::Dumper; $Data::Dumper::Terse = 1; my $VERSION = '1.1'; my $TIMEOUT = 60; my $tookWarn = 0.2; my $tookCrit = 1.5; my $np = Nagios::Plugin->new( version => $VERSION, blurb => 'Query Elasticsearch and check hits returned', usage => "Usage: %s [ -v|--verbose ] [-t ] ", timeout => $TIMEOUT+1 ); $np->add_arg ( spec => 'filter=s@', help => _gt('Terms series (filter) to be searched'), ); $np->add_arg ( spec => 'should=s@', help => _gt('Terms series (should) to be searched'), ); $np->add_arg ( spec => 'query=s', help => _gt('ES query to be searched'), ); $np->add_arg ( spec => 'dontcount', help => _gt('Don\'t try to count all hits, stops after the first hit returned'), ); $np->add_arg ( spec => 'eshost=s', help => _gt('Elasticsearch hostname'), default => 'localhost', ); $np->add_arg ( spec => 'esport=i', help => _gt('Elasticsearch port'), default => 9200, ); $np->add_arg ( spec => 'esuser=s', help => _gt('Elasticsearch user login'), default => 'admin', ); $np->add_arg ( spec => 'espass=s', help => _gt('Elasticsearch password'), ); $np->add_arg ( spec => 'hitw=s', help => _gt('Returned hits warning threshold'), default => '1:', ); $np->add_arg ( spec => 'hitc=s', help => _gt('Returned hits critical threshold'), default => '', ); $np->add_arg ( spec => 'index=s', help => _gt('ES indexes to use (use strftime pattern substitutions) Exemple: events-*-%Y.%m.%d'), default => '_all', ); $np->add_arg ( spec => 'delta=i', help => _gt('Interval of time to use (s) until now'), default => 15*60, ); $np->add_arg ( spec => 'label=s', help => 'Additionnal information added to output', ); $np->getopts; my $DEBUG = $np->opts->verbose; my $eshost = $np->opts->get('eshost'); my $esport = $np->opts->get('esport'); my $esuser = $np->opts->get('esuser'); my $espass = $np->opts->get('espass'); my $hitw = $np->opts->get('hitw'); my $hitc = $np->opts->get('hitc'); my $esindexPattern = $np->opts->get('index'); my $label = $np->opts->get('label'); my $deltaTime = $np->opts->get('delta'); my $queryFilter = $np->opts->get('filter'); my $queryShould = $np->opts->get('should'); my $query = $np->opts->get('query'); my $terminate_after = $np->opts->get('dontcount') ? 1 : 0; # Create a LWP user agent object: my $ua = new LWP::UserAgent( 'env_proxy' => 0, 'timeout' => $TIMEOUT, ); $ua->agent(basename($0)); # Workaround for LWP bug : $ua->parse_head(0); my $esindex = '_all'; my $cred = ''; if ( defined($espass) ) { $cred = $esuser.':'.$espass.'@'; } # handle indexes named with time reference if ( defined($esindexPattern) ) { my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst) = localtime(); $esindex = POSIX::strftime( $esindexPattern, $sec,$min,$hour,$mday,$mon,$year ); # Add previous day when using daily patterns # FIXME: this hack works only for daily pattern. if ( $esindexPattern =~ /%d/ ) { $esindex .= ','.POSIX::strftime( $esindexPattern, $sec,$min,$hour,$mday - 1,$mon,$year ); } logD('Using index pattern : '.$esindex ); } # TODO: choose protocol my $esUrl = 'https://'.$cred.$eshost.':'.$esport.'/'.$esindex.'/_search?ignore_unavailable=true'; logD2("URL: ".$esUrl ); $np->set_thresholds('warning' => $hitw, 'critical' => $hitc); my $tsEnd = time(); my $esTime = 0; my $dateEndUTC = POSIX::strftime("%Y-%m-%dT%T",gmtime($tsEnd)); my $timeEnd = strftime("%T",localtime($tsEnd)); my $dateStartUTC = POSIX::strftime("%Y-%m-%dT%T",gmtime($tsEnd - $deltaTime)); my $timeStart = strftime("%T",localtime($tsEnd - $deltaTime)); logD2('TERMS: $queryFilter='.Dumper(\$queryFilter) ); my %esQueryFilter = (); foreach my $t ( @{$queryFilter} ) { if ( $t =~ /^([\w\.]+)\:(.*)/ ) { my ($k,$v) = ($1,$2); # $v =~ s/([\"\'])/\\$1/g; $esQueryFilter{$k} = $v; } else { logD ("Bad filter term definition : $t"); } } my %esQueryShould = (); foreach my $t ( @{$queryShould} ) { if ( $t =~ /^([\w\.]+)\:(.*)/ ) { my ($k,$v) = ($1,$2); $v =~ s/([\"\'])/\\$1/g; $esQueryShould{$k} = $v; } else { logD ("Bad should term definition : $t"); } } my $esQuery = { 'query' => { 'bool' => { 'filter' => [ { 'range' => { '@timestamp' => { 'gte' => $dateStartUTC, 'lt' => $dateEndUTC }, }, }, ], }, }, 'size' => 0, 'terminate_after' => $terminate_after }; if ( defined($query) ) { push( @{$esQuery->{'query'}{'bool'}{'filter'}}, { 'query_string' => { 'query' => $query, 'analyze_wildcard' => JSON::XS::false, } }); } foreach my $k ( keys(%esQueryFilter) ) { push( @{$esQuery->{'query'}{'bool'}{'filter'}}, { 'term' => { $k => $esQueryFilter{$k} } }); } my $jsonQuery = encode_json($esQuery); my $esRequest = HTTP::Request->new('POST', $esUrl); $esRequest->content_type('application/json'); logD2('POST: '.JSON::XS->new->pretty(1)->encode($esQuery) ) if $DEBUG>1; $esRequest->content($jsonQuery); my $timer = time(); my $http_response = $ua->request( $esRequest ); $timer = time()-$timer; $esTime += $timer; if ( $http_response->is_error() ) { my $err = $http_response->code." ".status_message($http_response->code)." (".$http_response->message.")"; logD("ES Query failed : HTTP error: ".$err ); $np->nagios_exit(UNKNOWN, sprintf(_gt('ES Query failed (%s)'),$err )); } my $json = $http_response->content; logD('RESPONSE: $json='.Dumper(\$json) ); my $jdata; eval { $jdata = decode_json($json); }; if ($@) { logD ('Enable to decode json : '.$json); $np->nagios_exit(UNKNOWN, _gt('Enable to decode json returned') ); } # print Dumper(\$jdata)."\n"; if ( !defined($jdata->{'hits'}{'hits'}) ) { $np->nagios_exit(UNKNOWN, _gt('Invalid ES response returned !') ); } # check search time my $took = $jdata->{'took'}/1000; # ms -> s my $tookStatus = $np->check_threshold( 'check' => $took, 'warning' => $tookWarn, 'critical' => $tookCrit, ); $np->add_perfdata( 'label' => 't', 'value' => $took, 'min' => 0, 'uom' => 's', 'threshold' => $np->threshold() ); my $shardsUsed = $jdata->{'_shards'}{'total'} || 0; # check failed shards if ( defined($jdata->{'_shards'}{'failed'}) && $jdata->{'_shards'}{'failed'} > 0 ) { $np->add_message(WARNING, sprintf(_gt('Some shards failed (%d/%d)!'), $jdata->{'_shards'}{'failed'}, $shardsUsed) ); } my $hits = $jdata->{'hits'}{'total'}; my $hitStatus = $np->check_threshold( 'check' => $hits, 'warning' => $hitw, 'critical' => $hitc, ); $np->add_perfdata( 'label' => 'hits', 'value' => $hits, 'min' => 0, 'threshold' => $np->threshold() ); if ( $hitStatus ) { $np->add_message($hitStatus, sprintf(_gt('Hit count (%d) out of range !'),$hits) ); } else { $np->add_message($hitStatus, sprintf(_gt('%d hits between %s and %s (%d shards used)'),$hits,$timeStart,$timeEnd,$shardsUsed) ); } my ($status, $message) = $np->check_messages('join' => ' '); if ( $label ) { $message = sprintf('%s - %s', $label,$message); } $np->nagios_exit($status, $message ); # Gettext wrapper sub _gt { return gettext($_[0]); } sub logD { my ($msg) = @_; print STDERR "DEBUG: $msg\n" if $DEBUG; } sub logD2 { my ($msg) = @_; print STDERR "DEBUG2: $msg\n" if $DEBUG>1; }