開源監控解決方案:ICINGA(Nagios)監控Linux
阿新 • • 發佈:2018-08-06
ios mat lin plugins linu rmi roc usm lib [root@king02 ~]# useradd nagios
[root@king02 ~]# tar zxvf nagios-plugins-2.2.1.tar.gz
[root@king02 ~]# cd nagios-plugins-2.2.1
[root@king02 nagios-plugins-2.2.1]# ./configure --prefix=/usr/local/nagios --with-cgiurl=/nagios/cgi-bin --with-nagios-user=nagios --with-nagios-group=nagios
[root@king02 nagios-plugins-2.2.1]# make
[root@king02 nagios-plugins-2.2.1]# make install
[root@king02 ~]# yum install -y xinetd
[root@king02 ~]# tar zxvf nrpe-3.2.1.tar.gz
[root@king02 ~]# cd nrpe-3.2.1
[root@king02 nrpe-3.2.1]# ./configure --prefix=/usr/local/nagios --enable-ssl
[root@king02 nrpe-3.2.1]# make all
[root@king02 nrpe-3.2.1]# make install
[root@king02 nrpe-3.2.1]# make install-plugin
[root@king02 nrpe-3.2.1]# make install-daemon
[root@king02 nrpe-3.2.1]# make install-config
[root@king02 nrpe-3.2.1]# make install-inetd
[root@king02 ~]# vi /etc/xinetd.d/nrpe
# default: off
# description: NRPE (Nagios Remote Plugin Executor)
service nrpe
{
disable = no
socket_type = stream
port = 5666
wait = no
user = nagios
group = nagios
server = /usr/local/nagios/bin/nrpe
server_args = -c /usr/local/nagios/etc/nrpe.cfg --inetd
only_from = 192.168.1.201
log_on_success =
}
[root@king02 ~]# vi /etc/services
nrpe 5666/tcp # nagios
[root@king02 ~]# yum -y install perl-Time-HiRes
[root@king02 ~]# rpm -ivh perl-UNIVERSAL-require-0.13-1.el6.rf.noarch.rpm
[root@king02 ~]# tar zxvf Sys-Statistics-Linux-0.66.tar.gz
[root@king02 ~]# cd Sys-Statistics-Linux-0.66
[root@king02 Sys-Statistics-Linux-0.66]# perl Makefile.PL
[root@king02 Sys-Statistics-Linux-0.66]# make
[root@king02 Sys-Statistics-Linux-0.66]# make install
[root@king02 ~]# vi /usr/local/nagios/etc/nrpe.cfg
allowed_hosts=192.168.1.201
command[check_cpu]=/usr/local/nagios/libexec/check_linux_stats.pl -C -w 50 -c 80 -s 5
command[check_memory]=/usr/local/nagios/libexec/check_memory.py -w 20 -c 10
command[check_disk]=/usr/local/nagios/libexec/check_disk.pl -w 20 -c 10
command[check_network]=/usr/local/nagios/libexec/check_linux_stats.pl -N -w 1048576 -c 2097152 -p eth0
command[check_uptime]=/usr/local/nagios/libexec/check_linux_stats.pl -U -w 5
[root@king02 ~]# cd /usr/local/nagios/libexec
[root@king02 libexec]# chmod a+x check_linux_stats.pl
[root@king02 libexec]# chmod a+x check_memory.py
[root@king02 libexec]# chmod a+x check_disk.pl
[root@king02 libexec]# ./check_linux_stats.pl -C -w 50 -c 80 -s 5
CPU OK : idle 97.79% |idle=97.79%;50;80 user=0.00% system=0.00% iowait=2.21% steal=0.00%
[root@king02 ~]# /etc/init.d/xinetd start
Starting xinetd: [ OK ]
[root@king02 ~]# netstat -tunlp | grep 5666
tcp 0 0 :::5666 :::* LISTEN 2409/xinetd
[root@king01 ~]# cd /usr/local/icinga/etc/objects/ [root@king01 objects]# vi linux.cfg --主機 # define a host define host{ use linux-server host_name sales_zx alias sales_zx icon_image redhat.gif statusmap_image redhat.gd2 address 192.168.1.202 } # define an hostgroup define hostgroup{ hostgroup_name sales-servers alias sales-servers members sales_zx } # define an servicegroup define servicegroup { servicegroup_name ping alias ping } define servicegroup { servicegroup_name cpu alias cpu } define servicegroup { servicegroup_name memory alias memory } define servicegroup { servicegroup_name disk alias disk } define servicegroup { servicegroup_name network alias network } define servicegroup { servicegroup_name uptime alias uptime } # Define a service define service{ hostgroup_name sales-servers use generic-service service_description alive servicegroups ping check_command check_ping!100.0,20%!500.0,60% } define service{ hostgroup_name sales-servers use generic-service service_description os cpu usage servicegroups cpu check_command check_nrpe!check_cpu } define service{ hostgroup_name sales-servers use generic-service service_description os memory usage servicegroups memory check_command check_nrpe!check_memory } define service{ hostgroup_name sales-servers use generic-service service_description os disk usage servicegroups disk check_command check_nrpe!check_disk } define service{ hostgroup_name sales-servers use generic-service service_description os network usage servicegroups network check_command check_nrpe!check_network } define service{ hostgroup_name sales-servers use generic-service service_description os uptime servicegroups uptime check_command check_nrpe!check_uptime } [root@king01 objects]# vi commands.cfg --命令 # 'check_nrpe' command definition define command{ command_name check_nrpe command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c $ARG1$ -t 30 } [root@king01 objects]# vi templates.cfg --模板 define host{ name linux-server ; The name of this host template use generic-host ; This template inherits other values from the generic-host template check_period 24x7 ; By default, Linux hosts are checked round the clock check_interval 1 ; Actively check the host every 5 minutes retry_interval 1 ; Schedule host check retries at 1 minute intervals max_check_attempts 2 ; Check each Linux host 10 times (max) check_command check-host-alive ; Default command to check Linux hosts notification_period workhours ; Linux admins hate to be woken up, so we only notify during the day ; Note that the notification_period variable is being overridden from ; the value that is inherited from the generic-host template! notification_interval 120 ; Resend notifications every 2 hours notification_options d,u,r ; Only send notifications for specific host states contact_groups admins ; Notifications get sent to the admins by default register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL HOST, JUST A TEMPLATE! } define service{ name generic-service ; The 'name' of this service template active_checks_enabled 1 ; Active service checks are enabled passive_checks_enabled 1 ; Passive service checks are enabled/accepted parallelize_check 1 ; Active service checks should be parallelized (disabling this can lead to major performance problems) obsess_over_service 1 ; We should obsess over this service (if necessary) check_freshness 0 ; Default is to NOT check service 'freshness' notifications_enabled 1 ; Service notifications are enabled event_handler_enabled 1 ; Service event handler is enabled flap_detection_enabled 1 ; Flap detection is enabled failure_prediction_enabled 1 ; Failure prediction is enabled process_perf_data 1 ; Process performance data retain_status_information 1 ; Retain status information across program restarts retain_nonstatus_information 1 ; Retain non-status information across program restarts is_volatile 0 ; The service is not volatile check_period 24x7 ; The service can be checked at any time of the day max_check_attempts 2 ; Re-check the service up to 3 times in order to determine its final (hard) state check_interval 1 ; Check the service every 10 minutes under normal conditions retry_interval 1 ; Re-check the service every two minutes until a hard state can be determined contact_groups admins ; Notifications get sent out to everyone in the 'admins' group notification_options w,u,c,r ; Send notifications about warning, unknown, critical, and recovery events notification_interval 60 ; Re-notify about service problems every hour notification_period 24x7 ; Notifications can be sent out at any time register 0 ; DONT REGISTER THIS DEFINITION - ITS NOT A REAL SERVICE, JUST A TEMPLATE! }
開源監控解決方案:ICINGA(Nagios)監控Linux