pgsql 运行状态 采集脚本

Posted 我的二狗子呢

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了pgsql 运行状态 采集脚本相关的知识,希望对你有一定的参考价值。

脚本来自德哥的github地址,https://github.com/digoal/pgsql_admin_script/blob/master/generate_report.sh

 

内容如下:

#!/bin/bash

# 已在CentOS 6.x上进行测试
# author: digoal
# 2015-10
# 权限需求 , OS: root PG: Superuser
# 用法 . ./generate_report.sh >/tmp/report.log 2>&1
# 生成报告目录 grep -E "^----->>>|^|" /tmp/report.log | sed ‘s/^----->>>---->>>/ /‘ | sed ‘1 i 目录 ‘ | sed ‘$ a 正文 ‘

# 请将以下变量修改为与当前环境一致, 并且确保使用这个配置连接任何数据库都不需要输入密码

export PGHOST=127.0.0.1
export PGPORT=5432
export PGDATABASE=postgres
export PGUSER=postgres
#export PGPASSWORD=postgres
export PGDATA=/var/lib/pgsql/11/data
export PGHOME=/usr/pgsql-11/

export PATH=$PGHOME/bin:$PATH:.
export DATE=`date +"%Y%m%d%H%M"`
export LD_LIBRARY_PATH=$PGHOME/lib:/lib64:/usr/lib64:/usr/local/lib64:/lib:/usr/lib:/usr/local/lib:$LD_LIBRARY_PATH


# 记住当前目录
PWD=`pwd`

# 获取postgresql日志目录
pg_log_dir=`grep ‘^ *[a-z]‘ $PGDATA/postgresql.conf|awk -F "#" ‘{print $1}‘|grep log_directory|awk -F "=" ‘{print $2}‘`

# 检查是否standby
is_standby=`psql --pset=pager=off -q -A -t -c ‘select pg_is_in_recovery()‘`


echo " ----- PostgreSQL 巡检报告 ----- "
echo " ===== $DATE ===== "


if [ $is_standby == ‘t‘ ]; then
echo " ===== 这是standby节点 ===== "
else
echo " ===== 这是primary节点 ===== "
fi
echo ""


echo "|+++++++++++++++++++++++++++++++++++++++++++++++++++++++++|"
echo "| 操作系统信息 |"
echo "|+++++++++++++++++++++++++++++++++++++++++++++++++++++++++|"
echo ""

echo "----->>>---->>> 主机名: "
hostname -s
echo ""
echo "----->>>---->>> 以太链路信息: "
ip link show
echo ""
echo "----->>>---->>> IP地址信息: "
ip addr show
echo ""
echo "----->>>---->>> 路由信息: "
ip route show
echo ""
echo "----->>>---->>> 操作系统内核: "
uname -a
echo ""
echo "----->>>---->>> 内存(MB): "
free -m
echo ""
echo "----->>>---->>> CPU: "
lscpu
echo ""
echo "----->>>---->>> 块设备: "
lsblk
echo ""
echo "----->>>---->>> 拓扑: "
lstopo-no-graphics
echo ""
echo "----->>>---->>> 进程树: "
pstree -a -A -c -l -n -p -u -U -Z
echo ""
echo "----->>>---->>> 操作系统配置文件 静态配置信息: "
echo "----->>>---->>> /etc/sysctl.conf "
grep "^[a-z]" /etc/sysctl.conf
echo ""
echo "----->>>---->>> /etc/security/limits.conf "
grep -v "^#" /etc/security/limits.conf|grep -v "^$"
echo ""
echo "----->>>---->>> /etc/security/limits.d/*.conf "
for dir in `ls /etc/security/limits.d`; do echo "/etc/security/limits.d/$dir : "; grep -v "^#" /etc/security/limits.d/$dir|grep -v "^$"; done
echo ""
echo "----->>>---->>> /etc/sysconfig/iptables "
cat /etc/sysconfig/iptables
echo ""
echo "----->>>---->>> /etc/fstab "
cat /etc/fstab
echo ""
echo "----->>>---->>> /etc/rc.local "
cat /etc/rc.local
echo ""
echo "----->>>---->>> /etc/selinux/config "
cat /etc/selinux/config
echo ""
echo "----->>>---->>> /boot/grub/grub.conf "
cat /boot/grub/grub.conf
echo ""
echo "----->>>---->>> /var/spool/cron 用户cron配置 "
for dir in `ls /var/spool/cron`; do echo "/var/spool/cron/$dir : "; cat /var/spool/cron/$dir; done
echo ""
echo "----->>>---->>> chkconfig --list "
chkconfig --list
echo ""
echo "----->>>---->>> iptables -L -v -n -t filter 动态配置信息: "
iptables -L -v -n -t filter
echo ""
echo "----->>>---->>> iptables -L -v -n -t nat 动态配置信息: "
iptables -L -v -n -t nat
echo ""
echo "----->>>---->>> iptables -L -v -n -t mangle 动态配置信息: "
iptables -L -v -n -t mangle
echo ""
echo "----->>>---->>> iptables -L -v -n -t raw 动态配置信息: "
iptables -L -v -n -t raw
echo ""
echo "----->>>---->>> sysctl -a 动态配置信息: "
sysctl -a
echo ""
echo "----->>>---->>> mount 动态配置信息: "
mount -l
echo ""
echo "----->>>---->>> selinux 动态配置信息: "
getsebool
sestatus
echo ""
echo "----->>>---->>> 建议禁用Transparent Huge Pages (THP): "
cat /sys/kernel/mm/transparent_hugepage/enabled
cat /sys/kernel/mm/transparent_hugepage/defrag
cat /sys/kernel/mm/redhat_transparent_hugepage/enabled
cat /sys/kernel/mm/redhat_transparent_hugepage/defrag
echo ""
echo "----->>>---->>> 硬盘SMART信息(需要root): "
smartctl --scan|awk -F "#" ‘{print $1}‘ | while read i; do echo -e " DEVICE $i"; smartctl -a $i; done
echo ""
echo "----->>>---->>> /var/log/boot.log "
cat /var/log/boot.log
echo ""
#echo "----->>>---->>> /var/log/cron(需要root) "
#cat /var/log/cron
#echo ""
echo "----->>>---->>> /var/log/dmesg "
cat /var/log/dmesg
echo ""
echo "----->>>---->>> /var/log/messages(需要root) "
tail -n 500 /var/log/messages
echo ""
echo "----->>>---->>> /var/log/secure(需要root) "
cat /var/log/secure
echo ""
echo "----->>>---->>> /var/log/wtmp "
who -a /var/log/wtmp
echo -e " "


echo "|+++++++++++++++++++++++++++++++++++++++++++++++++++++++++|"
echo "| 数据库信息 |"
echo "|+++++++++++++++++++++++++++++++++++++++++++++++++++++++++|"
echo ""

echo "----->>>---->>> 数据库版本: "
psql --pset=pager=off -q -c ‘select version()‘

echo "----->>>---->>> 用户已安装的插件版本: "
for db in `psql --pset=pager=off -t -A -q -c ‘select datname from pg_database where datname not in ($$template0$$, $$template1$$)‘`
do
psql -d $db --pset=pager=off -q -c ‘select current_database(),* from pg_extension‘
done

echo "----->>>---->>> 用户使用了多少种数据类型: "
for db in `psql --pset=pager=off -t -A -q -c ‘select datname from pg_database where datname not in ($$template0$$, $$template1$$)‘`
do
psql -d $db --pset=pager=off -q -c ‘select current_database(),b.typname,count(*) from pg_attribute a,pg_type b where a.atttypid=b.oid and a.attrelid in (select oid from pg_class where relnamespace not in (select oid from pg_namespace where nspname ~ $$^pg_$$ or nspname=$$information_schema$$)) group by 1,2 order by 3 desc‘
done

echo "----->>>---->>> 用户创建了多少对象: "
for db in `psql --pset=pager=off -t -A -q -c ‘select datname from pg_database where datname not in ($$template0$$, $$template1$$)‘`
do
psql -d $db --pset=pager=off -q -c ‘select current_database(),rolname,nspname,relkind,count(*) from pg_class a,pg_authid b,pg_namespace c where a.relnamespace=c.oid and a.relowner=b.oid and nspname !~ $$^pg_$$ and nspname<>$$information_schema$$ group by 1,2,3,4 order by 5 desc‘
done

echo "----->>>---->>> 用户对象占用空间的柱状图: "
for db in `psql --pset=pager=off -t -A -q -c ‘select datname from pg_database where datname not in ($$template0$$, $$template1$$)‘`
do
psql -d $db --pset=pager=off -q -c ‘select current_database(),buk this_buk_no,cnt rels_in_this_buk,pg_size_pretty(min) buk_min,pg_size_pretty(max) buk_max from( select row_number() over (partition by buk order by tsize),tsize,buk,min(tsize) over (partition by buk),max(tsize) over (partition by buk),count(*) over (partition by buk) cnt from ( select pg_relation_size(a.oid) tsize, width_bucket(pg_relation_size(a.oid),tmin-1,tmax+1,10) buk from (select min(pg_relation_size(a.oid)) tmin,max(pg_relation_size(a.oid)) tmax from pg_class a,pg_namespace c where a.relnamespace=c.oid and nspname !~ $$^pg_$$ and nspname<>$$information_schema$$) t, pg_class a,pg_namespace c where a.relnamespace=c.oid and nspname !~ $$^pg_$$ and nspname<>$$information_schema$$ ) t)t where row_number=1;‘
done

echo "----->>>---->>> 当前用户的操作系统定时任务: "
echo "I am `whoami`"
crontab -l
echo "建议: "
echo " 仔细检查定时任务的必要性, 以及定时任务的成功与否的评判标准, 以及监控措施. "
echo " 请以启动数据库的OS用户执行本脚本. "
echo -e " "


common() {
# 进入pg_log工作目录
cd $PGDATA
eval cd $pg_log_dir

echo "----->>>---->>> 获取pg_hba.conf md5值: "
md5sum $PGDATA/pg_hba.conf
echo "建议: "
echo " 主备md5值一致(判断主备配置文件是否内容一致的一种手段, 或者使用diff)."
echo -e " "

echo "----->>>---->>> 获取pg_hba.conf配置: "
grep ‘^ *[a-z]‘ $PGDATA/pg_hba.conf
echo "建议: "
echo " 主备配置尽量保持一致, 注意trust和password认证方法的危害(password方法 验证时网络传输密码明文, 建议改为md5), 建议除了unix socket可以使用trust以外, 其他都使用md5或者LDAP认证方法."
echo " 建议先设置白名单(超级用户允许的来源IP, 可以访问的数据库), 再设置黑名单(不允许超级用户登陆, reject), 再设置白名单(普通应用), 参考pg_hba.conf中的描述. "
echo -e " "

echo "----->>>---->>> 获取postgresql.conf md5值: "
md5sum $PGDATA/postgresql.conf
echo "建议: "
echo " 主备md5值一致(判断主备配置文件是否内容一致的一种手段, 或者使用diff)."
echo -e " "

echo "----->>>---->>> 获取postgresql.conf配置: "
grep ‘^ *[a-z]‘ $PGDATA/postgresql.conf|awk -F "#" ‘{print $1}‘
echo ""
echo "建议: "
echo " 主备配置尽量保持一致, 配置合理的参数值."
echo -e " 建议修改的参数列表如下 ( 假设操作系统内存为128GB, 数据库独占操作系统, 数据库版本9.4.x, 其他版本可能略有不同, 未来再更新进来 ) : "

echo "
listen_addresses = ‘0.0.0.0‘ # 监听所有IPV4地址
port = 1921 # 监听非默认端口
max_connections = 4000 # 最大允许连接数
superuser_reserved_connections = 20 # 为超级用户保留的连接
unix_socket_directories = ‘.‘ # unix socket文件目录最好放在$PGDATA中, 确保安全
unix_socket_permissions = 0700 # 确保权限安全
tcp_keepalives_idle = 30 # 间歇性发送TCP心跳包, 防止连接被网络设备中断.
tcp_keepalives_interval = 10
tcp_keepalives_count = 10
shared_buffers = 16GB # 数据库自己管理的共享内存大小, 如果用大页, 建议设置为: 内存 - 100*work_mem - autovacuum_max_workers*(autovacuum_work_mem or autovacuum_work_mem) - max_connections*1MB
huge_pages = try # 尽量使用大页, 需要操作系统支持, 配置vm.nr_hugepages*2MB大于shared_buffers.
maintenance_work_mem = 512MB # 可以加速创建索引, 回收垃圾(假设没有设置autovacuum_work_mem)
autovacuum_work_mem = 512MB # 可以加速回收垃圾
shared_preload_libraries = ‘auth_delay,passwordcheck,pg_stat_statements,auto_explain‘ # 建议防止暴力破解, 密码复杂度检测, 开启pg_stat_statements, 开启auto_explain, 参考 http://blog.163.com/[email protected]/blog/static/16387704020149852941586
bgwriter_delay = 10ms # bgwriter process间隔多久调用write接口(注意不是fsync)将shared buffer中的dirty page写到文件系统.
bgwriter_lru_maxpages = 1000 # 一个周期最多写多少脏页
max_worker_processes = 20 # 如果要使用worker process, 最多可以允许fork 多少个worker进程.
"
}

# 调用函数
common

 






























































































































































































以上是关于pgsql 运行状态 采集脚本的主要内容,如果未能解决你的问题,请参考以下文章

电梯物联网数据采集设备(电梯物联网产品)

使用shell脚本监控网站运行状态

如何获取在 shell 脚本中运行的 R 脚本的退出状态

无线电流互感器监测电机设备的运行(OEE,开关机率)工作状态

Linux用shell脚本监控网站运行状态并发告警邮件

获取flink job运行状态脚本