Python之行 --liunx常用服务监管

Posted FindSoul

tags:

篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python之行 --liunx常用服务监管相关的知识,希望对你有一定的参考价值。

Python之行--服务监管

背景

针对公司系统平台服务不断增多和复杂化,定位问题效率逐渐下降,实现各个服务统一监管显得越来越重要,在作为运维人员的我管理越来越头疼的时候,我觉得开发自己的监控程序!

动作

首先公司系统使用 java 开发,以微服务体系实现的快速搭建,服务器使用最常用的centos系统(centos7)因此有多个xxx.jar 启动命令冗长繁琐,在不断和后台人员优化统一启动命令后,统一改造成系统服务 例如: systemctl start xxx.service,类似这样都有9个之多,还不包括所依赖的其他服务,如mysql,redis,kafka...,这些服务也统一做成了系统服务!

开发(上代码)

技术图片
# coding:utf-8
# author:Liu Xiaofei
# date:2020-5-4
# mood:restless

import time
import logging
import subprocess

from copy import deepcopy
from functools import wraps


class ParsesTools(object):

    @staticmethod
    def parse_netstat_response(origin):
        """
        解析netstat -tnlp 返回结果
        :param origin:
        :return:
        """
        res = origin.strip().splitlines()
        origin_parse = deepcopy(res[2::])

        for index, item in enumerate(origin_parse):
            every_info = item.split()
            dicts = {
                "type": every_info[0],
                "process": every_info[6].split("/")[0],
                "service": every_info[6].split("/")[1].replace(":", ""),
                "port": every_info[3].replace(":", "") if "::" in every_info[3] else every_info[3].split(":")[1]
            }
            yield dicts

    @staticmethod
    def execute_result(command):
        """
        收集某些指令后的返回值
        :param command: 常规指令
        :return:
        """
        network_res = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE,
                                       shell=True)
        response_info = network_res.stdout.read()
        network_res.stdout.close()
        return response_info

    @staticmethod
    def service_run(command):
        """
        带参数装饰器执行服务启动监测
        :param command: 执行的命令
        :return:
        """
        def execute_status(func):
            @wraps(func)
            def inner(*args, **kwargs):
                try:
                    subprocess.check_call(command, shell=True)
                    status = True
                except Exception as e:
                    logging.error(e)
                    status = False
                kwargs[func.__name__.replace("_", "-")] = status
                result = func(*args, **kwargs)
                return result

            return inner

        return execute_status

class ExecuteLinuxCommands(object):
    CENTOS7_NETSTAT = "netstat -tnlp"

    CENTOS7_MYSQL = "systemctl start mysqld.service"
    CENTOS7_REDIS = "systemctl start redis-server.service"
    CENTOS7_INFLUXDB = "systemctl start influxdb"
    CENTOS7_GRAFANA = "systemctl start grafana-server"
    CENTOS7_NGINX = "systemctl start nginx.service"
    CENTOS7_KAFKA = "systemctl start kafka.service"
    CENTOS7_ZOOKEEPER = "systemctl restart kafka.service"

    JAVA_GATEWAYAPI = "systemctl start gateway-api.service"
    JAVA_CONFIGMANGER = "systemctl start config-manger.service"
    JAVA_REALTIME = "systemctl start realtime.service"
    JAVA_REALTIME_DATAVIEW = "systemctl start realtime-dataview.service"
    JAVA_ALERT_STRATEGY = "systemctl start alert-strategy.service"
    JAVA_ALERT_PUSHER = "systemctl start alert-pusher.service"
    JAVA_ALERT_ENGINE = "systemctl start alert-engine.service"
    JAVA_LOGIN = "systemctl start alert-engine.service"
    JAVA_GATEWAY = "systemctl start gateway.service"

class YiLianSystemServe(object):
    pt = ParsesTools
    els = ExecuteLinuxCommands

    def __init__(self):
        self.serve_counter = 0
        self.serve_recorder = {}

    @staticmethod
    def comm(*args, **kwargs):
        """
        校验服务是否正常,返回记录结果
        :param args:
        :param kwargs:
        :return:
        """
        result = dict()
        count = 0
        result.update(kwargs)
        serve_name = kwargs.keys()[0].replace("_", "-")
        if result.get(serve_name):
            count += 1
        else:
            print "{}服务启动失败,请检查启动命令是否正确以及跟踪错误日志!!!".format(serve_name)
        return result, count

    def record(self, *args):
        """
        记录信息和统计次数
        :param args:
        :return: None
        """
        self.serve_recorder.update(args[0])
        self.serve_counter += args[1]

    @pt.service_run(els.CENTOS7_MYSQL)
    def mysql_service(self, *args, **kwargs):
        result, count = self.comm(args, kwargs)
        self.record(result, count)

    @pt.service_run(els.CENTOS7_REDIS)
    def redis_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

    @pt.service_run(els.CENTOS7_INFLUXDB)
    def influx_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

    @pt.service_run(els.CENTOS7_GRAFANA)
    def grafana_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

    @pt.service_run(els.CENTOS7_KAFKA)
    def kafka_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

    @pt.service_run(els.CENTOS7_ZOOKEEPER)
    def zookeeper_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

    @pt.service_run(els.CENTOS7_REDIS)
    def redis_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

    @pt.service_run(els.JAVA_GATEWAYAPI)
    def gateway_api_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

    @pt.service_run(els.JAVA_CONFIGMANGER)
    def config_manger_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

    @pt.service_run(els.JAVA_ALERT_STRATEGY)
    def alert_strategy_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

    @pt.service_run(els.JAVA_ALERT_PUSHER)
    def alert_pusher_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

    @pt.service_run(els.JAVA_ALERT_ENGINE)
    def alert_engine_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

    @pt.service_run(els.JAVA_REALTIME_DATAVIEW)
    def realtime_dataview_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

    @pt.service_run(els.JAVA_REALTIME)
    def realtime_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

    @pt.service_run(els.JAVA_LOGIN)
    def login_service(self, *args, **kwargs):
        res, count = self.comm(*args, **kwargs)
        self.record(res, count)

    @pt.service_run(els.JAVA_GATEWAY)
    def gateway_service(self, *args, **kwargs):
        result, count = self.comm(*args, **kwargs)
        self.record(result, count)

class TotalServicesRun(YiLianSystemServe):
    YiLianJavaServices = {
        "8089": "gateway",
        "8090": "config-manger",
        "8080": "gateway-api",
        "6077": "alert-strategy",
        "6088": "alert-pusher",
        "6099": "alert-engine",
        "8099": "realtime-dataview",
        "8100": "realtime",
        "8101": "login"
    }
    YiLianDependOnServices = {
        "3306": "mysql",
        "6379": "redis",
        "80": "nginx",
        "8088": "influx",
        "3000": "grafana",
        "9092": "kafka",
        "2181": "zookeeper"
    }

    def __init__(self):
        self.elc = ExecuteLinuxCommands
        self.pt = ParsesTools
        self.ports = []
        self.error_info = None
        self.total_status = dict()
        super(self.__class__, self).__init__()

    def methods(self, dictionary):
        """
        校验初始状态下服务情况,已成功启动服务做记录,未启动则执行启动
        :param dictionary: 各个服务详情
        :return: None
        """
        self.serve_recorder = {}
        self.serve_counter = 0

        for k, v in dictionary.items():
            if k not in self.ports:
                v = v.replace("-", "_") if "-" in v else v
                print "{}服务未启动!!!".format(v)
                getattr(self, "{}_service".format(v))()
            else:
                print "{}服务启动成功!".format(v)
                self.serve_recorder[v] = True
                self.serve_counter += 1


    def statistical_services(self,services,before=False):
        """
        执行不同服务组的统计
        :param services: 要启动的服务组
        :param before: 依赖服务标识
        :return:
        """
        self.methods(services)
        self.total_status.update(self.serve_recorder)
        services_name = "依赖" if before else "Java"
        if self.serve_counter < len(services):
            self.error_info = {k: v for k, v in self.serve_recorder.items() if v is False}

            print "易联系统{}服务启动失败,共{}个服务未启动{}失败服务详情(json格式):{}".format(
                services_name,len(self.error_info), "
", self.error_info)
            return False
        print "易联系统{}服务已全部启动正常{}启动详情(字典格式):{}".format(services_name,"
", self.serve_recorder)
        return True

    def run(self):
        origin_ingo = self.pt.execute_result(self.elc.CENTOS7_NETSTAT)

        self.ports = [i.get("port") for i in self.pt.parse_netstat_response(origin_ingo)]

        if self.statistical_services(self.YiLianDependOnServices,before=True):
            self.statistical_services(self.YiLianJavaServices)

        print "易联系统服务启动状态总览(字典格式):{}".format(self.total_status)
        return self.total_status

if __name__ == __main__:
    count = 1
    while True:
        start = time.time()
        ts = TotalServicesRun()
        ts.run()
        diff_time = time.time() - start
        print "程序执行耗时{}s".format(diff_time)
        time.sleep(300)
ServicesSupervision

代码开发已完成,只用了些常用模块,逻辑相对简单,功能已满足当前公司需要。各位大佬,欢迎可劲指正!有更好的写法烦请留言告知我,帮助小弟更好的优化,共同进步!O(∩_∩)O谢谢

 

以上是关于Python之行 --liunx常用服务监管的主要内容,如果未能解决你的问题,请参考以下文章

北海之行-小纪

西安商洛之行

c3p0数据库连接池 原创: Java之行 Java之行 5月8日 连接池概述 实际开发中“获得连接”或“释放资源”是非常消耗系统资源的两个过程

InnoDB事务锁之行锁-insert加锁-隐式锁

MySQL之行锁

电车的高速之行