Python之行 --liunx常用服务监管
Posted FindSoul
tags:
篇首语:本文由小常识网(cha138.com)小编为大家整理,主要介绍了Python之行 --liunx常用服务监管相关的知识,希望对你有一定的参考价值。
Python之行--服务监管
背景
针对公司系统平台服务不断增多和复杂化,定位问题效率逐渐下降,实现各个服务统一监管显得越来越重要,在作为运维人员的我管理越来越头疼的时候,我觉得开发自己的监控程序!
动作
首先公司系统使用 java 开发,以微服务体系实现的快速搭建,服务器使用最常用的centos系统(centos7)因此有多个xxx.jar 启动命令冗长繁琐,在不断和后台人员优化统一启动命令后,统一改造成系统服务 例如: systemctl start xxx.service,类似这样都有9个之多,还不包括所依赖的其他服务,如mysql,redis,kafka...,这些服务也统一做成了系统服务!
开发(上代码)
# coding:utf-8 # author:Liu Xiaofei # date:2020-5-4 # mood:restless import time import logging import subprocess from copy import deepcopy from functools import wraps class ParsesTools(object): @staticmethod def parse_netstat_response(origin): """ 解析netstat -tnlp 返回结果 :param origin: :return: """ res = origin.strip().splitlines() origin_parse = deepcopy(res[2::]) for index, item in enumerate(origin_parse): every_info = item.split() dicts = { "type": every_info[0], "process": every_info[6].split("/")[0], "service": every_info[6].split("/")[1].replace(":", ""), "port": every_info[3].replace(":", "") if "::" in every_info[3] else every_info[3].split(":")[1] } yield dicts @staticmethod def execute_result(command): """ 收集某些指令后的返回值 :param command: 常规指令 :return: """ network_res = subprocess.Popen(command, stdin=subprocess.PIPE, stdout=subprocess.PIPE, shell=True) response_info = network_res.stdout.read() network_res.stdout.close() return response_info @staticmethod def service_run(command): """ 带参数装饰器执行服务启动监测 :param command: 执行的命令 :return: """ def execute_status(func): @wraps(func) def inner(*args, **kwargs): try: subprocess.check_call(command, shell=True) status = True except Exception as e: logging.error(e) status = False kwargs[func.__name__.replace("_", "-")] = status result = func(*args, **kwargs) return result return inner return execute_status class ExecuteLinuxCommands(object): CENTOS7_NETSTAT = "netstat -tnlp" CENTOS7_MYSQL = "systemctl start mysqld.service" CENTOS7_REDIS = "systemctl start redis-server.service" CENTOS7_INFLUXDB = "systemctl start influxdb" CENTOS7_GRAFANA = "systemctl start grafana-server" CENTOS7_NGINX = "systemctl start nginx.service" CENTOS7_KAFKA = "systemctl start kafka.service" CENTOS7_ZOOKEEPER = "systemctl restart kafka.service" JAVA_GATEWAYAPI = "systemctl start gateway-api.service" JAVA_CONFIGMANGER = "systemctl start config-manger.service" JAVA_REALTIME = "systemctl start realtime.service" JAVA_REALTIME_DATAVIEW = "systemctl start realtime-dataview.service" JAVA_ALERT_STRATEGY = "systemctl start alert-strategy.service" JAVA_ALERT_PUSHER = "systemctl start alert-pusher.service" JAVA_ALERT_ENGINE = "systemctl start alert-engine.service" JAVA_LOGIN = "systemctl start alert-engine.service" JAVA_GATEWAY = "systemctl start gateway.service" class YiLianSystemServe(object): pt = ParsesTools els = ExecuteLinuxCommands def __init__(self): self.serve_counter = 0 self.serve_recorder = {} @staticmethod def comm(*args, **kwargs): """ 校验服务是否正常,返回记录结果 :param args: :param kwargs: :return: """ result = dict() count = 0 result.update(kwargs) serve_name = kwargs.keys()[0].replace("_", "-") if result.get(serve_name): count += 1 else: print "{}服务启动失败,请检查启动命令是否正确以及跟踪错误日志!!!".format(serve_name) return result, count def record(self, *args): """ 记录信息和统计次数 :param args: :return: None """ self.serve_recorder.update(args[0]) self.serve_counter += args[1] @pt.service_run(els.CENTOS7_MYSQL) def mysql_service(self, *args, **kwargs): result, count = self.comm(args, kwargs) self.record(result, count) @pt.service_run(els.CENTOS7_REDIS) def redis_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) @pt.service_run(els.CENTOS7_INFLUXDB) def influx_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) @pt.service_run(els.CENTOS7_GRAFANA) def grafana_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) @pt.service_run(els.CENTOS7_KAFKA) def kafka_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) @pt.service_run(els.CENTOS7_ZOOKEEPER) def zookeeper_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) @pt.service_run(els.CENTOS7_REDIS) def redis_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) @pt.service_run(els.JAVA_GATEWAYAPI) def gateway_api_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) @pt.service_run(els.JAVA_CONFIGMANGER) def config_manger_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) @pt.service_run(els.JAVA_ALERT_STRATEGY) def alert_strategy_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) @pt.service_run(els.JAVA_ALERT_PUSHER) def alert_pusher_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) @pt.service_run(els.JAVA_ALERT_ENGINE) def alert_engine_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) @pt.service_run(els.JAVA_REALTIME_DATAVIEW) def realtime_dataview_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) @pt.service_run(els.JAVA_REALTIME) def realtime_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) @pt.service_run(els.JAVA_LOGIN) def login_service(self, *args, **kwargs): res, count = self.comm(*args, **kwargs) self.record(res, count) @pt.service_run(els.JAVA_GATEWAY) def gateway_service(self, *args, **kwargs): result, count = self.comm(*args, **kwargs) self.record(result, count) class TotalServicesRun(YiLianSystemServe): YiLianJavaServices = { "8089": "gateway", "8090": "config-manger", "8080": "gateway-api", "6077": "alert-strategy", "6088": "alert-pusher", "6099": "alert-engine", "8099": "realtime-dataview", "8100": "realtime", "8101": "login" } YiLianDependOnServices = { "3306": "mysql", "6379": "redis", "80": "nginx", "8088": "influx", "3000": "grafana", "9092": "kafka", "2181": "zookeeper" } def __init__(self): self.elc = ExecuteLinuxCommands self.pt = ParsesTools self.ports = [] self.error_info = None self.total_status = dict() super(self.__class__, self).__init__() def methods(self, dictionary): """ 校验初始状态下服务情况,已成功启动服务做记录,未启动则执行启动 :param dictionary: 各个服务详情 :return: None """ self.serve_recorder = {} self.serve_counter = 0 for k, v in dictionary.items(): if k not in self.ports: v = v.replace("-", "_") if "-" in v else v print "{}服务未启动!!!".format(v) getattr(self, "{}_service".format(v))() else: print "{}服务启动成功!".format(v) self.serve_recorder[v] = True self.serve_counter += 1 def statistical_services(self,services,before=False): """ 执行不同服务组的统计 :param services: 要启动的服务组 :param before: 依赖服务标识 :return: """ self.methods(services) self.total_status.update(self.serve_recorder) services_name = "依赖" if before else "Java" if self.serve_counter < len(services): self.error_info = {k: v for k, v in self.serve_recorder.items() if v is False} print "易联系统{}服务启动失败,共{}个服务未启动{}失败服务详情(json格式):{}".format( services_name,len(self.error_info), " ", self.error_info) return False print "易联系统{}服务已全部启动正常{}启动详情(字典格式):{}".format(services_name," ", self.serve_recorder) return True def run(self): origin_ingo = self.pt.execute_result(self.elc.CENTOS7_NETSTAT) self.ports = [i.get("port") for i in self.pt.parse_netstat_response(origin_ingo)] if self.statistical_services(self.YiLianDependOnServices,before=True): self.statistical_services(self.YiLianJavaServices) print "易联系统服务启动状态总览(字典格式):{}".format(self.total_status) return self.total_status if __name__ == ‘__main__‘: count = 1 while True: start = time.time() ts = TotalServicesRun() ts.run() diff_time = time.time() - start print "程序执行耗时{}s".format(diff_time) time.sleep(300)
代码开发已完成,只用了些常用模块,逻辑相对简单,功能已满足当前公司需要。各位大佬,欢迎可劲指正!有更好的写法烦请留言告知我,帮助小弟更好的优化,共同进步!O(∩_∩)O谢谢
以上是关于Python之行 --liunx常用服务监管的主要内容,如果未能解决你的问题,请参考以下文章
c3p0数据库连接池 原创: Java之行 Java之行 5月8日 连接池概述 实际开发中“获得连接”或“释放资源”是非常消耗系统资源的两个过程