十年网站开发经验 + 多家企业客户 + 靠谱的建站团队
量身定制 + 运营维护+专业推广+无忧售后,网站问题一站解决
nagios简介: Nagios是一款开源的电脑系统和网络监视工具,能有效监控Windows、Linux和Unix的主机状态,交换机路由器等网络设置,打印机等。在系统或服务状态异常时发出邮件或短信报警第一时间通知网站运维人员,在状态恢复后发出正常的邮件或短信通知。
以下主机所用系统为红帽6.5
nagios的安装:
在server2.example.com这台主机中:
主机ip为172.25.254.2
关闭防火墙和selinux
nagios从官网下载所需要的源码包和插件包。
这里用的源码包是 nagios-cn-3.2.3.tar.bz2插件包是 nagios-plugins-2.1.1.tar.gz
yum install gcc gd-devel #安装所需的编译环境
yum install httpd #要通过apache访问
tar jxf nagios-cn-3.2.3.tar.bz2 #解压源码包
cd nagios-cn-3.2.3
useradd -M -d /usr/local/nagios nagios #创建nagios用户
groupadd nagcmd #创建可以通过web提交外部命令的组
usermod -G nagcmd nagios
usermod -G nagcmd apache #将apache和nagios加入这个组
./configure --with-command-group=nagcmd #配置编译环境指定组为nagcmd
make all #将源码包编译成可执行的二进制文件
make install #安装
makeinstall-init #安装启动脚本
make install-config #安装简单的配置文件
make install-webconf #安装简单的web配置文件
make install-commandmode
htpasswd/usr/local/nagios/etc/htpasswd.users nagiosadmin #设定nagios登录的用户密码
/usr/local/nagios/bin/nagios-v /usr/local/nagios/etc/nagios.cfg #检查配置正确性
/etc/init.d/nagios start #启动nagios
/etc/init.d/httpdstart #启动apache
我们可以用web登录测试一下如图表示成功:
站在用户的角度思考问题,与客户深入沟通,找到罗田网站设计与罗田网站推广的解决方案,凭借多年的经验,让设计与互联网技术结合,创造个性化、用户体验好的作品,建站类型包括:成都网站建设、网站设计、企业官网、英文网站、手机端网站、网站推广、域名与空间、雅安服务器托管、企业邮箱。业务覆盖罗田地区。
接下来安装插件
tar zxf nagios-plugins-2.1.1.tar.gz #解压插件包
cd nagios-plugins-2.1.1
./configure
--with-MySQL和--with-openssl显示no表明没有之后所需的mysql-devle和openssl-devel开发包:
yum installmysql-devel openssl-devel -y
./configure #再次配置
make all
make install
cd /usr/local/nagios/libexec/ #nagiox插件所在目录
chown nagios.nagios . -R #将这个目录下的文件都改成nagios所有组和所有用户
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg #检查配置
/etc/init.d/nagios reload #重新加载nagios
如图显示插件成功:
接着是配置属于自己的nagios
cd /usr/local/nagios/etc/
在nagios.cfg中
cfg_file=/usr/local/nagios/etc/objects/hosts.cfg
#指定host.cfg文件用来指定被监控的主机地址以及相关属性信息
cfg_file=/usr/local/nagios/etc/objects/services.cfg
#指定services.cfg文件用于定义监控的服务和主机资源
#cfg_file=/usr/local/nagios/etc/objects/localhost.cfg
#取消示例
cd /usr/local/nagios/etc/objects/
cp localhost.cfg hosts.cfg -p
cp localhost.cfg services.cfg -p
通过示例来配置hosts.cfg和services.cfg文件
在hosts.cfg中因为此时只监控本机一台主机所以只保留:
definehost{
use linux-server
host_name server2.example.com
alias Manager
address 172.25.254.2
icon_p_w_picpath server.gif
statusmap_p_w_picpath server.gd2
2d_coords 500,200
3d_coords 500,200,100
}
define hostgroup{
hostgroup_name linux-servers
alias Linux Servers
members *
}
在services.cfg中因为只件监控一台主机所以只写一个服务组
define servicegroup{
servicegroup_name 系统负荷检查
alias 负荷检查
members server2.example.com,进程总数,server2.example.com用户登录 数,server2.example.com,根分区,server2.example.com,交换空间利用率
}
监控ping的情况
defineservice{
use local-service
host_name *
service_description PING
check_command check_ping!100.0,20%!500.0,60%
}
监控硬盘情况
defineservice{
use local-service
host_name server2.example.com
service_description 根分区
check_command check_local_disk!20%!10%!/
}
监控登录用户数
defineservice{
use local-service
host_name server2.example.com
service_description 登录用户数
check_command check_local_users!20!50
}
监控系统负荷
define service{
use local-service
host_name server2.example.com
service_description 系统负荷
check_command check_local_load!5.0,4.0,3.0!10.0,6.0,4.0
}
监控进程总数
defineservice{
use local-service
host_name server2.example.com
service_description 进程总数
check_command check_local_procs!250!400!RSZDT
}
监控交换空间利用率
define service{
use local-service
host_name server2.example.com
service_description 交换空间利用率
check_command check_local_swap!20!10
}
监控ssh情况
define service{
use local-service
host_name server2.example.com
service_description SSH
check_command check_tcp!22!1.0!10.0
notifications_enabled 0
}
监控apache
defineservice{
use local-service
host_name server2.example.com
service_description HTTP
check_command check_http
notifications_enabled 0
}
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg #检查配置
/etc/init.d/nagios reload #重新加载nagios
如图显示本机的监控成功
在server3.example.com主机中:
ip为172.25.254.3
关闭防火墙和selinux
yum install mysql-server #安装mysql服务
/etc/init.d/mysqld start #启动mysql
mysql_secure_installation #设置mysql root用户密码
mysql -p #用root用户进入mysql
create database nagdb #创建nagdb库
grant select on nagdb.* tonagios@172.25.254.2 identified by 'nagios'
#给远程用户nagios查看nagdb的权力并设定密码为nagios
在server2.example.com中
cd /usr/local/nagios/libexec
./check_mysql -H 172.25.254.3 -unagios -pnagios
出现如图画面nagix可以监控在server3.example.com中创建的mysql库
cd /usr/local/nagios/etc/objects
在 commands.cfg 中添加监控mysql的命令
# 'check_mysql' command definition
definecommand{
command_name check_mysql
command_line $USER1$/check_mysql -H $HOSTADDRESS$ -u $ARG1$ -p $ARG2$
}
在services.cfg中添加监控mysql
##########################check_mysql
define service{
use local-service
host_name server3.example.com
service_description MYSQL
check_command check_mysql!nagios!nagios
notifications_enabled 0
}
在hosts.cfg中添加
define host{
use linux-server
host_name server3.example.com
alias Manager
parents server2.example.com
address 172.25.254.3
icon_p_w_picpath server.gif
statusmap_p_w_picpath server.gd2
2d_coords 400,100
3d_coords 400,100,100
}
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg #检查配置
/etc/init.d/nagios reload #重新加载nagios
如图表示成功检测server3主机上的mysql
nrpe安装使nagios监控多台主机的情况
tar zxfnrpe-2.15.tar.gz #解压nrpe包
tar zxf nagios-plugins-2.1.1.tar.gz #解压插件包
yum install mysql-devel openssl-devel -y
useradd -M -d /usr/local/nagios nagios #创建nagios用户
yum install gcc
cd nagios-plugins-2.1.1
./configure
make all
make install
cd /usr/local/nagios/libexec/ #nagiox插件所在目录
chown nagios.nagios . -R #将这个目录下的文件都改成nagios所有组和所有用户
yum install xinetd
cd /root/nrpe-2.15 #nrpe-2.15.tar.gz解压目录
./configure
make all
make install
make install-plugin
make install-daemon
make install-daemon-config
make install-xinetd
vim /etc/xinetd.d/nrpe #设定监控主机为172.25.254.2
only_from = 172.25.254.2
vim /etc/services
nrpe 5666/tcp #nrpe的5666端口
/etc/init.d/xinetd start #开启xinetd
cd /usr/local/nagios/etc/
vim nrpe.cfg
将command[check_hda1]=/usr/local/nagios/libexec/check_disk-w 20% -c 10% -p /dev/hda1改为
command[check_disk]=/usr/local/nagios/libexec/check_disk -w 20% -c 10% -p/ #监控根分区
/etc/init.d/xinetdrestart #重启xinetd服务
cd /usr/local/nagios/libexec/
scp check_nrpe 172.25.254.2:/usr/local/nagios/libexec/ 将check_nrpe传到server2.exampl.com主机中 在server2.example.com中
cd /usr/local/nagios/libexec/
chown nagios.nagios check_nrpe #将check_nrpe所有人和所有组改为nagios3
./chek_nrpe -H 172.25.254.3 #检测nrpe是否可用成功显示nrpe版本号
cd /usr/local/nagios/etc/objcts/
在 commands.cfg 中添加check_nrpe的命令
# 'check_nrpe' commanddefinition
define command{
command_name check_nrpe
command_line $USER1$/check_nrpe -H $HOSTADDRESS$ -c$ARG1$
}
在services.cfg中添加
define service{
use local-service
host_name server3.example.com
service_description 根分区
check_command check_nrpe!check_disk
}
define service{
use local-service
host_name server3.example.com
service_description 登录用户数
check_command check_nrpe!check_users
}
监控根分区和登录人数
/usr/local/nagios/bin/nagios -v/usr/local/nagios/etc/nagios.cfg #检查配置
/etc/init.d/nagios reload #重新加载nagios
在server3.example.com中重启xinet.d服务
如图可见监控成功
nagios的110云报警
到onealter官网下载所用的软件包这里用的是
alert-agent-4.1.3.1-linux-x64.tar.gz
在onealter网页添加nagios应用
获取应用key
在server2.example.com主机中:
tar zxf alert-agent-4.1.3.1-linux-x64.tar.gz
cp -R alert-agent /usr/local/nagios/libexec/ #将解压好的包放入nagios插件目录中
cp alert-agent/plugin/nagios-plugin/nagios /usr/local/nagios/libexec/
chmod +x /usr/local/nagios/libexec/nagios
cp alert-agent/plugin/nagios-plugin/110monitor.cfg/usr/local/nagios/etc/objects/ #将110配置文件放入nagios配置文件目录
在110monitor.cfg文件中添加Key:
修改/usr/local/nagios/etc/objects/contacts.cfg,新增110monitor到默认联系组
define contactgroup{
contactgroup_name admins
alias Nagios Administrators
members nagiosadmin,110monitor
}
修改/usr/local/nagios/etc/nagios.cfg,将110monitor.cfg新增到nagios.cfg中
cfg_file=/usr/local/nagios/etc/objects/110monitor.cfg
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg #检查配置
/etc/init.d/nagios reload #重新加载nagios
用ganglia监控系统性能:
Ganglia是UCBerkeley发起的一个开源集群监视项目,设计用于测量数以千计的节点。Ganglia的核心包含gmond、gmetad以及一个Web前端。主要是用来监控系统性能,如:cpu 、mem、硬盘利用率, I/O负载、网络流量情况等,通过曲线很容易见到每个节点的工作状态,对合理调整、分配系统资源,提高系统整体性能起到重要作用。
ganlia的安装:
在server2.exampl.com中
yum instll rpm-build
yum install -y libart_lgpl_devel autoconf automake libtool pcre-develexpat-devel rrdtool-devel-1.3.8-6.e16.x86_64
yum install ibconfuse-2.6-3.el6.x86_64.rpm libconfuse-devel-2.6-3.el6.x86_64.rpm
#安装软件包依赖性
rpmbuild -tb ganglia-3.3.7.tar.gz #将源码包编下译成二进制可执行文件
rpmbuild -tb ganglia-web-3.4.2.tar.gz
cd /root/rpmbuild/RPMS/x86_64
rpm -ivh * #安装此目录下生成的所有rpm包
其中ganglia-gmond-3.4.0-1.x86_64.rpm ganglia-gmond-modules-python-3.4.0-1.x86_64.rpm libganglia-3.4.0-1.x86_64.rpm三个包是客户端所需的
cd /root/rpmbuild/RPMS/noarch
yum install php-gd php -y
rpm -ivh ganglia-web-3.4.2-1.noarch.rpm #安装此目录下需要解决依赖性的包
vim /etc/ganglia/gmetad.conf #进入gmetad.conf中更改cluster名
cluster {
name = "hello cluster"
owner = "unspecified"
latlong = "unspecified"
url = "unspecified"解决
}
进入gmond.conf中更改为相同的名字
/etc/init.d/gmetad start
/etc/init.d/gmond start #启动ganglia服务端和客户端
此时可以登陆http:server2.example.com/gweb测试以下
集成nagios报告ganglia指标
tar zxf ganglia-3.4.0.tar.gz
cp /root/ganglia-3.4.0/contrib/check_ganglia.py /usr/local/nagios/libexec/
vim vim /usr/local/nagios/libexec/check_ganglia.py
配置nagios
vim usr/local/nagios/etc/objects/commands.cfg
define command {
command_name check_ganglia
command_line $USER1$/check_ganglia.py -h$HOSTNAME$ -m $ARG1$ -w $ARG2$ -c $ARG3$
}
vim /usr/local/nagios/etc/objects/templates.cfg
define service {
use generic-service
name ganglia-service
hostgroup_name ganglia-servers
service_groups ganglia-metrics
} #配置ganglia模版
vim /usr/local/nagios/etc/objects/hosts.cfg #添加用户和用户组
define host {
use linux-server
host_name server4.example.com
address 172.25.254.4
}
define hostgroup {
hostgroup_name ganglia-servers
alias ganglia-servers
members server4.example.com
}
vim /usr/local/nagios/etc/objects/services.cfg #设置监控的服务和资源
define servicegroup {
servicegroup_name ganglia-metrics
alias ganglia-metrics
}
define service{
use ganglia-service
service_description 根分区空闲百分比
check_commandcheck_ganglia!disk_free_percent_rootfs!20!10
}
define service{
use ganglia-service
service_description 每分钟系统负载
check_command check_ganglia!load_one!4!5
}
define service{
use ganglia-service
service_desc 内存空闲
check_command check_ganglia!mem_free!50000!30000
}
/usr/local/nagios/bin/nagios -v /usr/local/nagios/etc/nagios.cfg #检测配置
/etc/init.d/nagios reload #重新加载nagios