shell系统监控及日志分析
Posted mkmkmk
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了shell系统监控及日志分析相关的知识,希望对你有一定的参考价值。
shell系统监控及日志分析
系统监控: CPU相关信息在/proc/stat
内存/proc/meminfo
流量监控proc/net/dev
磁盘io/proc/vmstat
脚本代码
1 #!/bin/bash
2 #监控linux主机系统信息
3 #导入工具模块
4 source utils
5
6 #获取CPU占用率
7 function cpuUsage()
8 {
9 #物理CPU个数
10 phyCPUNums=`cat /proc/cpuinfo |grep "physical id"|sort |uniq|wc -l`
11 #逻辑CPU个数
12 lgCPUNums=`cat /proc/cpuinfo |grep "processor"|wc -l`
13 #core
14 cores=`cat /proc/cpuinfo |grep "cores"|uniq|awk ‘{print $4}‘`
15 logInfo "Host [${ip}] physical CPU nums is : ${phyCPUNums}"
16 logInfo "Host [${ip}] logic CPU nums is : ${lgCPUNums}"
17 logInfo "Host [${ip}] core nums is : ${cores}"
18 #CPU占用率
19 #获取CPU的总量与使用量
20 cpuTotalStart=`awk ‘BEGIN{total=0} /cpu / {for(i=2;i<=NF;i++);total+=i}END{print $total}‘ /proc/stat`
21 cpuUsedStart=`awk ‘BEGIN{used=0} /cpu / { used=$2+$3+$4+$7+$8 }END{print used}‘ /proc/stat`
22 #隔30s再获取一次CPU总量与使用量并计算差值
23 sleep 30
24 cpuTotalEnd=`awk ‘BEGIN{total=0} /cpu / {for(i=2;i<=NF;i++);total+=i}END{print $total}‘ /proc/stat`
25 cpuUsedEnd=`awk ‘BEGIN{used=0} /cpu / { used=$2+$3+$4+$7+$8 }END{print used}‘ /proc/stat`
26 usedCPU=`expr ${cpuUsedEnd} - ${cpuUsedStart}`
27 totalCPU=`expr ${cpuTotalEnd} - ${cpuTotalStart}`
28 logInfo "Host [${ip}] CPU usage is : $(usagePercent ${usedCPU} ${totalCPU}) %"
29
30 }
31
32 #获取内存使用率
33 function memUsage(){
34 logInfo "Begin to get mem usage of Host [${ip}]"
35 #获取总内存
36 totalMem=`awk ‘/MemTotal/{print $2}‘ /proc/meminfo`
37 #获取空闲内存
38 freeMem=`awk ‘/MemFree/{print $2}‘ /proc/meminfo`
39 usedMem=`expr ${totalMem} - ${freeMem}`
40 #echo $(usagePercent ${usedMem} ${totalMem})
41 #echo $(kbToGb ${totalMem})
42 logInfo "Host [${ip}] total mem is : $(kbToGb ${totalMem}) GB"
43 #计算内存使用率并打印到日志中
44 logInfo "Host [${ip}] mem usage is : $(usagePercent ${usedMem} ${totalMem}) %"
45 logInfo "End to get mem usage of Host [${ip}]"
46 }
47
48 #网卡平均每秒流量
49 function netData(){
50 logInfo "Begin to get net data of Host [${ip}]"
51 ethName=$1
52 receiveByteStart=`cat /proc/net/dev |grep -E "${ethName}"|awk ‘{print $2}‘`
53 sendByteStart=`cat /proc/net/dev |grep -E "${ethName}"|awk ‘{print $10}‘`
54 sleep 10
55 receiveByteSEnd=`cat /proc/net/dev |grep -E "${ethName}"|awk ‘{print $2}‘`
56 sendBytesEnd=`cat /proc/net/dev |grep -E "${ethName}"|awk ‘{print $10}‘`
57 inDataRate=$(echo "scale=2;(${receiveByteSEnd}-${receiveByteStart})/10" | bc)
58 outDataRate=$(echo "scale=2;(${sendBytesEnd}-${sendByteStart})/10" | bc)
59 logInfo "Host [${ip}] in data is : ${inDataRate} kb / s"
60 logInfo "Host [${ip}] out data is : ${outDataRate} kb / s"
61 logInfo "End to get net data of Host [${ip}]"
62 }
63
64 #磁盘空间使用情况
65 function diskUsage(){
66 logInfo "Begin to get disk usage of Host [${ip}]"
67 noTimeLogInfo "`df -h`"
68 logInfo "End to get disk usage of Host [${ip}]"
69 }
70
71 #disk IO in
72 function diskIOIn(){
73 #获取磁盘入方向IO
74 iniostart=`awk ‘/pgpgin/{print $2}‘ /proc/vmstat`
75 sleep 30
76 inIoEnd=`awk ‘/pgpgin/{print $2}‘ /proc/vmstat`
77 inIo=$(((inIoEnd-inIoStart)/(30*1024)))
78 logInfo "Host [${ip}] in IO is : ${inIo} MB / s"
79
80 }
81
82 #disk IO out
83 function diskIOout(){
84 #获取磁盘出方向的IO
85 outIoStart=`awk ‘/pgpgout/{print $2}‘ /proc/vmstat`
86 sleep 60
87 outIoEnd=`awk ‘/pgpgout/{print $2}‘ /proc/vmstat`
88 outIo=$(((outIoEnd-outIoStart)/(60*1024)))
89 logInfo "Host [${ip}] out IO is : ${outIo} MB / s"
90 }
91
92 #当前在线用户
93 function onlineUser(){
94 user=`w |awk ‘NR>1‘|awk ‘{print $1 " " " " $4}‘`
95 userCount=`w |awk ‘NR>1‘|wc -l`
96 #loginAt=`w |awk ‘NR>1‘|awk ‘{print $4 }‘`
97 logInfo "There are [${userCount}] users online now."
98 noTimeLogInfo "UserName loginAt"
99 noTimeLogInfo "${user}"
100 }
101
102 #判断主机网络连通性
103 function isAlive(){
104 for ip in `cat hostLists`
105 do
106 ping ${ip} -c 3 >/dev/null
107 if [ $? -eq 0 ];then
108 logInfo "${ip} is reachable"
109 #查看在线用户
110 onlineUser
111 #获取CPU相关信息
112 cpuUsage
113 #获取mem相关信息
114 memUsage
115 #获取磁盘IO
116 diskIOIn
117 diskIOout
118 #磁盘使用率
119 diskUsage
120 #平均每秒流接收或输出流量
121 netData wlp3s0
122 else
123 logInfo "ERROR ${ip} is unreachable,try login in see more details.."
124 fi
125 done
126 }
127
128 while [ 1 ]
129 do
130 isAlive
131 sleep 60
132 done
打印日志函数
1 #!/bin/bash
2 #日志打印
3 curr_path=`pwd`
4 function logInfo()
5 {
6 local curr_time=`date "+%Y-%m-%d %H:%M:%S"`
7 log_file=${curr_path}/system_status.log
8 #判断日志文件是否存在
9 if [ -e ${log_file} ]
10 then
11 #检测文件是否可写
12 if [ -w ${log_file} ]
13 then
14 #若文件无写权限则使用chmod命令赋予权限
15 chmod 770 ${log_file}
16 fi
17 else
18 #若日志文件不存在则创建
19 touch ${log_file}
20 fi
21 #写日志
22 local info=$1
23 echo "${curr_time} `whoami` [Info] ${info}">>${log_file}
24 }
25 function noTimeLogInfo(){
26 msg=$1
27 echo "${msg}">>${log_file}
28 }
29
30 #把kb转换成gb,精度为3。expr只支持整数计算
31 function kbToGb(){
32 kbVal=$1
33 gbVal=$(echo "scale=3;${kbVal}/1024/1024"| bc)
34 echo $gbVal
35 }
36 #使用率以百分比的形式
37 #第一个参数为已使用量,第二个参数为总量
38 function usagePercent(){
39 used=$1
40 total=$2
41 usedPercent=$(echo "scale=2;${used}*100/${total}"| bc)
42 echo ${usedPercent}
43 }
以上是关于shell系统监控及日志分析的主要内容,如果未能解决你的问题,请参考以下文章