[check_postgres] Change to show query details for slow query time
Giles Westwood
giles.westwood at semantico.com
Mon Mar 1 16:14:05 UTC 2010
Hi list,
We had a need to see the client port of slow query to trace the pid of
the process holding the connection open. I modified check_postgres to
display this:-
sub check_query_time {
## Check the length of running queries
## Supports: Nagios, MRTG
## It makes no sense to run this more than once on the same cluster
## Warning and critical are time limits - defaults to seconds
## Valid units: s[econd], m[inute], h[our], d[ay]
## All above may be written as plural as well (e.g. "2 hours")
## Can also ignore databases with exclude and limit with include
## Limit to a specific user with the includeuser option
## Exclude users with the excludeuser option
my ($warning, $critical) = validate_range
({
type => 'time',
default_warning => '2 minutes',
default_critical => '5 minutes',
});
## Bail early if stats_command_string is off
$SQL = q{SELECT setting FROM pg_settings WHERE name =
'stats_command_string'};
my $info = run_command($SQL);
for my $db (@{$info->{db}}) {
if ($db->{slurp} =~ /off/) {
ndie msg('qtime-fail');
}
}
$SQL = q{SELECT client_addr, client_port, procpid, datname,
max(COALESCE(ROUND(EXTRACT(epoch FROM now()-query_start)),0)) AS
query_length }.
qq{FROM pg_stat_activity WHERE current_query <>
'<IDLE>'$USERWHERECLAUSE GROUP BY client_addr, client_port, procpid,
datname ORDER BY query_length DESC LIMIT 1};
$info = run_command($SQL, { regex => qr{\s*.+?\s+\|\s+\-?\d+}, emptyok
=> 1 } );
my $found = 0;
my $client_addr = '0.0.0.0';
my $client_port = 0;
my $procpid = 0;
for $db (@{$info->{db}}) {
if ($db->{slurp} !~ /\w/ and $USERWHERECLAUSE) {
$stats{$db->{dbname}} = 0;
add_ok msg('no-match-user');
next;
}
$found = 1;
my $max = 0;
SLURP: while ($db->{slurp} =~
/(.+?)\s+\|(.+?)\s+\|(.+?)\s+\|(.+?)\s+\|\s+(\-?\d+)\s*/gsm) {
my ($dbname,$current) = ($4, int $5);
$client_addr = $1;
$client_port = $2;
$procpid = $3;
next SLURP if skip_item($dbname);
$max = $current if $current > $max;
}
if ($MRTG) {
$stats{$db->{dbname}} = $max;
next;
}
$db->{perf} .= "maxtime=$max;";
$db->{perf} .= "$warning" if length $warning;
$db->{perf} .= ';';
$db->{perf} .= "$critical" if length $critical;
my $msg = msg('qtime-msg', $max);
if (length $critical and $max >= $critical) {
$msg .= " procpid:$procpid, client_addr:$client_add,
client_port:$client_port. Run sudo netstat -anp and grep $client_port?";
add_critical $msg;
}
elsif (length $warning and $max >= $warning) {
$msg .= " procpid:$procpid, client_addr:$client_add,
client_port:$client_port. Run sudo netstat -anp and grep $client_port?";
add_warning $msg;
}
else {
add_ok $msg;
}
}
return;
} ## end of check_query_time
--
Giles Westwood, Systems Administrator,
Semantico, Floor 1, 21-23 Dyke Road, Brighton BN1 3FE
<http://www.semantico.com/>
<mailto:Giles.Westwood at semantico.com>
<tel:+44-1273-358231> <fax:+44-1273-723232>
More information about the Check_postgres
mailing list