alwayson的一些初步监控

Posted M哥

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了alwayson的一些初步监控相关的知识,希望对你有一定的参考价值。

最近大体自己写了点alwayson相关的监控,是通过存储过程的方法,做个笔记如下:

--alwayson启用状态
declare @isenabled sql_variant
SELECT @isenabled=SERVERPROPERTY ( IsHadrEnabled ) 
if @isenabled=1
begin
print is enabled(alwayson启用状态)
end
---------监控集群的状态
if exists(SELECT * FROM sys.dm_hadr_cluster_members where member_state <>1)
begin
    print alwayson集群有拖机机器,请查看
end
------------监控副本同步状态
if exists(select * from sys.dm_hadr_availability_replica_states  where synchronization_health<>2)
begin
    print alwayson集群同步异常,请查看
end
---------------监控数据库同步状态
if exists(select * from sys.dm_hadr_database_replica_states  where  synchronization_state_desc NOT IN(SYNCHRONIZED,SYNCHRONIZED))
begin
    print alwayson有数据库同步异常,请查看
end
if  exists(  select * from sys.dm_tcp_listener_states  where  state_desc not in(ONLINE))
 begin
     print alwayson侦听器异常,请查看
 end
 -----------------主从延迟的监控

 IF EXISTS(
 SELECT ag.name AS ag_name, ar.replica_server_name AS ag_replica_server, dr_state.database_id as database_id,
dr_state.log_send_queue_size, is_ag_replica_local = CASE
WHEN ar_state.is_local = 1 THEN NLOCAL
ELSE REMOTE
END ,
ag_replica_role = CASE
WHEN ar_state.role_desc IS NULL THEN NDISCONNECTED
ELSE ar_state.role_desc
END
FROM (( sys.availability_groups AS ag JOIN sys.availability_replicas AS ar ON ag.group_id = ar.group_id )
JOIN sys.dm_hadr_availability_replica_states AS ar_state ON ar.replica_id = ar_state.replica_id)
JOIN sys.dm_hadr_database_replica_states dr_state on
ag.group_id = dr_state.group_id and dr_state.replica_id = ar_state.replica_id where dr_state.log_send_queue_size>300
)
 begin
     print alwayson主体有超过300M的日志没有同步,请尽快查看
 end

 if exists(
 SELECT ag.name AS ag_name, ar.replica_server_name AS ag_replica_server, dr_state.database_id as database_id,
dr_state.redo_queue_size, is_ag_replica_local = CASE
WHEN ar_state.is_local = 1 THEN NLOCAL
ELSE REMOTE
END ,
ag_replica_role = CASE
WHEN ar_state.role_desc IS NULL THEN NDISCONNECTED
ELSE ar_state.role_desc
END
FROM (( sys.availability_groups AS ag JOIN sys.availability_replicas AS ar ON ag.group_id = ar.group_id )
JOIN sys.dm_hadr_availability_replica_states AS ar_state ON ar.replica_id = ar_state.replica_id)
JOIN sys.dm_hadr_database_replica_states dr_state on
ag.group_id = dr_state.group_id and dr_state.replica_id = ar_state.replica_id where dr_state.redo_queue_size>300
 )
  begin
     print alwayson副本有较多日志未重做,请登录查看
 end
 ---------------主从状态切换等监控
 DECLARE @MASTER VARCHAR(20)
--select @MASTER=a.replica_server_name,a.join_state,a.join_state_desc,b.is_local ,b.role,b.role_desc  from sys.dm_hadr_availability_replica_cluster_states a join sys.dm_hadr_availability_replica_states   b
--on a.replica_id=b.replica_id where b.role=1 --and a.replica_server_name=‘ALWAYSON241‘
select @MASTER=a.replica_server_name  from sys.dm_hadr_availability_replica_cluster_states a join sys.dm_hadr_availability_replica_states   b
on a.replica_id=b.replica_id where b.role=1 --and a.replica_server_name=‘ALWAYSON241‘
 print @MASTER
IF @MASTER IS NULL
BEGIN
    PRINT 主从同步异常
END
ELSE IF @MASTER<>ALWAYSON241
BEGIN
    PRINT alwayson进行了切换,现在主库是+@master
END
------------------------监控alwayson数据库状态
if exists(select * from sys.dm_hadr_database_replica_states a where a.database_state<>0 or a.synchronization_state not in(1,2))
begin
    print  alwayson有数据库异常
end


-----------发邮件预警
----declare @name nvarchar(222)
----declare @mailbody nvarchar(4000);
----declare @maillabel nvarchar(100);
----set @maillabel=‘作业失败‘
--EXEC msdb.dbo.sp_send_dbmail @profile_name   =   ‘sendmail‘, 
--                                                @recipients   =   ‘[email protected]‘, 
--                                                @subject   =   ‘alwayson预警‘, 
--                                                @body   =   ‘alwayson故障

于alwayson相关的系统视图主要涉及以下一些(这些系统的存储过程能让我们很直观的了解到我们可用性组的状态):

--监控alwayson可用性管理器是否启动
SELECT SERVERPROPERTY ( HadrManagerStatus ) 
 --监控alwayson可用性组是否启动
SELECT SERVERPROPERTY ( IsHadrEnabled ) 
--查看服务器的集群信息
SELECT * FROM sys.dm_hadr_cluster
SELECT * FROM sys.dm_hadr_cluster_members
select * from sys.dm_hadr_availability_replica_cluster_nodes
SELECT * FROM sys.dm_hadr_name_id_map

-----------------------------------------------
SELECT * FROM sys.availability_groups
SELECT * FROM sys.availability_groups_cluster
SELECT * FROM sys.dm_hadr_availability_group_states

--监控可用性副本
select * from sys.availability_replicas
--查看只读路由
select * from sys.availability_read_only_routing_lists
---监控可用性副本状态
select * from sys.dm_hadr_availability_replica_cluster_states
select * from sys.dm_hadr_availability_replica_states  --synchronization_health的状态为2才算正常。需要监控
--监视可用性数据库
select * from sys.availability_databases_cluster 
select * from sys.databases where name in(select database_name from  sys.availability_databases_cluster)
--该视图包含对应于给定主/辅助数据库上最新自动页修复尝试的行,每个数据库最多可对应 100 行,如果存储数据就要手动查看
select * from sys.dm_hadr_auto_page_repair
--除了已同步和为同步的alwayson的状态
select * from sys.dm_hadr_database_replica_states  where  synchronization_state_desc NOT IN(SYNCHRONIZED,SYNCHRONIZED)

 -------------------------监视可用性组侦听器的监控
 select * from sys.availability_group_listener_ip_addresses  where state_desc=ONLINE
 select * from sys.availability_group_listeners
 -----我们可以用 NOT EXISTS来判断
 select dns_name,port,ip_address from  sys.availability_group_listener_ip_addresses A JOIN sys.availability_group_listeners B 
 ON a.listener_id=b.listener_id  where a.state_desc=ONLINE
 
 ---------------------------------------
 select * from sys.dm_tcp_listener_states  where  state_desc not in(ONLINE)

 

以上是关于alwayson的一些初步监控的主要内容,如果未能解决你的问题,请参考以下文章

Zabbix 监控 AlwaysOn

使用 Always On 监控的主要指标

SQL Server Alwayson架构下 服务器 各虚拟IP漂移监控告警的功能实现 -2(虚拟IP视角)

SQL Server 2012 AlwaysOn 高可用 主切换到辅(从)的时候,怎么在主的实例上做备份

SQL Server 在Alwayson上使用内存表"踩坑"

SQL Server 2017 AlwaysOn AG 自动初始化