Nagios is sending critical false alerts about current users

Hello All,

Nagios seems to be sending false alerts about few hosts, (ex: There were no users on one host and still Nagios was reporting a critical alert and says 6 users are logged in. How do I fix this one?

Also, I have installed nagios and added 12 hosts as a start and monitoring few details on them, how do I send false alerts and check if they are being reported through nagios server?

Thanks
Saikrishna

There might be several thousand scripts for Nagios out there. Without knowing the details of the script or method you're using to monitor your hosts it'd be almost impossible to provide a useful answer.

What and how are you monitoring those hosts? Is it a custom script?, do you use NRPE,? how do you connect to them?

Hey Verdepollo,

Yes, NRPE is being used, and below are few details on the Nagios master server, do let me know if you need additional information.

[root@proof etc]# uname -a
Linux proof 2.6.18-274.el5 #1 SMP Fri Jul 22 04:43:29 EDT 2011 x86_64 x86_64 x86_64 GNU/Linux
[root@proof etc]# cat /etc/redhat-release
CentOS release 5.7 (Final)

[root@proof etc]# ls -ltr /usr/local/nagios/etc/
total 116
-rw-rw---- 1 nagios nagios 1340 Jan 13 18:51 resource.cfg
-rw-rw-r-- 1 nagios nagios 43774 Jan 13 18:51 nagios.cfg.01.16.2012
-rw-rw-r-- 1 nagios nagios 11408 Jan 13 18:51 cgi.cfg
-rw-r--r-- 1 root root 26 Jan 16 09:53 htpasswd.users
-rw-rw-r-- 1 nagios nagios 43830 Jan 16 17:37 nagios.cfg
drwxrwxr-x 2 nagios nagios 4096 Jan 17 09:04 objects
-rw-rw-r-- 1 nagios nagios 2813 Jan 18 14:15 services.cfg

[root@proof etc]# cat services.cfg
# Generic service definition template
define service{
name generic-service ;
active_checks_enabled 1 ;
passive_checks_enabled 1 ;
parallelize_check 1 ;
obsess_over_service 1 ;
check_freshness 0 ;
notifications_enabled 1 ;
event_handler_enabled 1 ;
flap_detection_enabled 1 ;
process_perf_data 1 ;
retain_status_information 1 ;
retain_nonstatus_information 1 ;

register 0 ; DONT REGISTER TEMPLATE!
}

# Service definition
define service{
use generic-service ; Name of service template

hostgroup_name admins
service_description HTTP
is_volatile 0
check_period 24x7
max_check_attempts 3
normal_check_interval 3
retry_check_interval 1
contact_groups admins,
notification_interval 30
notification_period 24x7
notification_options w,u,c,r
check_command check_http
}

# Service definition
define service{
use generic-service ; Name of service template

hostgroup_name admins
service_description LocalDisk
is_volatile 0
check_period 24x7
max_check_attempts 3

normal_check_interval 3
retry_check_interval 1
contact_groups admins
notification_interval 30
notification_period 24x7
notification_options w,u,c,r
check_command check_local_disk
}

# Service definition
define service{
use generic-service ; Name of service template

hostgroup_name admins
service_description HTTP
is_volatile 0
check_period 24x7
max_check_attempts 3
normal_check_interval 3
retry_check_interval 1
contact_groups admins
notification_interval 30
notification_period 24x7
notification_options w,u,c,r
check_command check_http
}

maybe your client configuration is also useful. :wink:

Here you go.

I've used the same config file for all servers and updated relevant hostname & IPAddress respectively, do let me know if you need additional information.

define host{
use linux-server ; Inherit default values from a template
host_name clientnagios; The name we're giving to this server
alias CentOS 5; A longer name for the server
address 10.10.2.19; IP address of the server
}

#define service{
# use generic-service
# host_name clientnagios
# service_description HTTP
# check_command check_http!check_http
#}
define service{
use generic-service
host_name clientnagios
service_description PING
check_command check_ping!100.0,20%!500.0,60%
initial_state u
max_check_attempts 2
check_interval 5
retry_interval 1
check_period 24x7
notification_interval 15
notification_period 24x7
notifications_enabled 1
notification_options c,r
}

define service{
use generic-service
host_name clientnagios
service_description Root Partition
check_command check_local_disk!20%!10%!/
initial_state u
max_check_attempts 3
check_interval 10
retry_interval 2
check_period 24x7
notification_interval 30
first_notification_delay 15
notification_period 24x7
notifications_enabled 1
notification_options c,r
}

define service{
use generic-service ; Name of service template to use
host_name clientnagios
service_description Current Users
check_command check_local_users!3!5
initial_state u
max_check_attempts 3
check_interval 10
retry_interval 2
check_period 24x7
notification_interval 30
first_notification_delay 15
notification_period 24x7
notifications_enabled 1
notification_options c,r
}

define service{
use generic-service ; Name of service template to use
host_name clientnagios
service_description Total Processes
check_command check_local_procs!375!400!RSZDT
initial_state u
max_check_attempts 3
check_interval 10
retry_interval 2
check_period 24x7
notification_interval 30
first_notification_delay 15
notification_period 24x7
notifications_enabled 1
notification_options c,r
}

define service{
use generic-service ; Name of service template to use
host_name clientnagios
service_description Current Load
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
initial_state u
max_check_attempts 3
check_interval 10
retry_interval 2
check_period 24x7

notification_interval 30
first_notification_delay 15
notification_period 24x7
notifications_enabled 1
notification_options c,r
}

define service{
use generic-service ; Name of service template to use
host_name clientnagios
service_description Swap Usage
check_command check_local_swap!20!10
initial_state u
max_check_attempts 3
check_interval 10
retry_interval 2
check_period 24x7
notification_interval 30
first_notification_delay 15
notification_period 24x7
notifications_enabled 1
notification_options c,r
}