0基础上手python3编程,远程JVM内存占用监控、企业微信告警

0基础上手python3编程,远程JVM内存占用监控、企业微信告警

王忘杰
2023-11-08 / 0 评论 / 150 阅读 / 正在检测是否收录...

前言
0、添加注释
1、采用json配置文件添加主机,可批量监控jvm主机并分别报警
2、日志以每天为单位通过web展示,如XX/jvmlog/20231108app02.txt
3、添加redis重复报警控制位,不会重复报警
4、json配置文件采用编码存储密码,配置文件不存在明文密码
5、已开源

基础知识
查看JVM内存占用
jps 获取jvm的ID
jmap -heap ID 查看jvm详细信息

-------------------------------
Attaching to process ID 15005, please wait...
Debugger attached successfully.
Server compiler detected.
JVM version is 8.1.086 11.0.14+000

using thread-local object allocation.
Concurrent Mark-Sweep GC

Heap Configuration:
   MinHeapFreeRatio         = 40
   MaxHeapFreeRatio         = 70
   MaxHeapSize              = 34359738368 (32768.0MB)
   NewSize                  = 11453595648 (10923.0MB)
   MaxNewSize               = 11453595648 (10923.0MB)
   OldSize                  = 22906142720 (21845.0MB)
   NewRatio                 = 2
   SurvivorRatio            = 6
   MetaspaceSize            = 21807104 (20.796875MB)
   CompressedClassSpaceSize = 268435456 (256.0MB)
   MaxMetaspaceSize         = 10737418240 (10240.0MB)
   G1HeapRegionSize         = 0 (0.0MB)

Heap Usage:
New Generation (Eden + 1 Survivor Space):
   capacity = 10021896192 (9557.625MB)
   used     = 1244541496 (1186.8872604370117MB)
   free     = 8777354696 (8370.737739562988MB)
   12.418223778784078% used
Eden Space:
   capacity = 8590196736 (8192.25MB)
   used     = 1003974176 (957.4643859863281MB)
   free     = 7586222560 (7234.785614013672MB)
   11.687441008103123% used
From Space:
   capacity = 1431699456 (1365.375MB)
   used     = 240567320 (229.4228744506836MB)
   free     = 1191132136 (1135.9521255493164MB)
   16.802920402869805% used
To Space:
   capacity = 1431699456 (1365.375MB)
   used     = 0 (0.0MB)
   free     = 1431699456 (1365.375MB)
   0.0% used
concurrent mark-sweep generation:
   capacity = 22906142720 (21845.0MB)
   used     = 13349348192 (12730.93051147461MB)
   free     = 9556794528 (9114.06948852539MB)
   58.278464231973494% used

JVM的内存占用可计算为(New Generation)used + (concurrent mark-sweep generation)used,即为物理内存占用,与JVM配置的内存量即可计算占用百分比

架构图
1、执行jvmcheck程序
2、登录远程主机
3、通过jmap命令获取、计算内存占用百分比
4、检查故障标记位、记录日志并进行告警
lopcreqg.png

预览
企业微信机器人告警
lopde35d.png

web页面日志查看
lopcvchq.png

程序组成
jvmcheck.pyjvmcheck.json组成,需要配置redis数据库

程序代码
jvmcheck.py

import paramiko,linecache,json,requests,time,base64,redis
from io import StringIO

def get_config():
    config = json.loads(open("/root/jvmcheck.json", encoding='utf-8').read()) #读取配置文件,填写绝对路径
    return config

class redis_operate():
    def redis_set(text1,text2):     #radis写入
        redis_pool.set(text1, text2)
    def redis_get(text):        #redis读取
        return redis_pool.get(text)

def getjvm():
    try:
        # 实例化一个transport对象,填写IP和端口
        trans = paramiko.Transport((sshdata['ip'], int(sshdata['port'])))
        # 建立连接,输入用户名密码,密码使用base64解码
        trans.connect(username=sshdata['username'], password=base64.b64decode(sshdata['password']))
        # 将sshclient的对象的transport指定为以上的trans
        ssh = paramiko.SSHClient()
        ssh._transport = trans
        # 执行命令,和传统方法一样
        stdin, stdout, stderr = ssh.exec_command('jps')     #执行shell命令
        jps = StringIO(stdout.read().decode())
        startid = 0
        for line in jps:
            if "Start" in line:     #找到运行中的jvm进程ID
                startid = line.split()
        stdin, stdout, stderr = ssh.exec_command('jmap -heap ' + startid[0])    #执行shell命令
        jmaporiginal = stdout.read().decode()
        jmap = StringIO(jmaporiginal).readlines()
        # 关闭连接
        trans.close()
        NewGeneration = int(jmap[25].split()[2])
        concurrentmarksweepgeneration = int(jmap[45].split()[2])
        #print(NewGeneration)
        #print(concurrentmarksweepgeneration)
        used = NewGeneration + concurrentmarksweepgeneration    #计算使用的内存总量
        Usagerate = used / 34359738368  # JVM配置32G内存
        print(Usagerate)
        if Usagerate > 0.8:     #设置为超过80%报警
            if redis_operate.redis_get(sshdata['hostname']) != "NO":    #读取主机名命名的redis key
                Usagerate = '%.2f%%' % (Usagerate * 100)
                post_weixin(sshdata['hostname'] + " " + sshdata['ip'] + "\nJVM内存使用率为" + str(
                    Usagerate) + ",请立即处理。\n点击可查看日志")
                f = open(sshconfig['path'] + time.strftime('%Y%m%d', time.localtime()) + sshdata['hostname'] + ".txt",
                         'a')
                f.write("-------------------------------\n" + time.strftime('%Y-%m-%d %H:%M:%S',
                                                                            time.localtime()) + "\n" + jmaporiginal + "\n")
                print("发送告警")
                redis_operate.redis_set(sshdata['hostname'],"NO")   #写入主机名命名的redis key值
            else:
                print("告警已存在")
        else:
            if redis_operate.redis_get(sshdata['hostname']) != "YES":
                Usagerate = '%.2f%%' % (Usagerate * 100)
                post_weixin(sshdata['hostname'] + " " + sshdata['ip'] + "\nJVM内存使用率为" + str(
                    Usagerate) + ",已恢复正常。\n点击可查看日志")
                redis_operate.redis_set(sshdata['hostname'],"YES")
            else:
                print("告警已解除")

    except:
        print(sshdata['ip']+"主机连接失败")

def post_weixin(stats): #发送微信
    url = sshconfig['weixin']['url']
    body = {
        "msgtype": "news",
        "news": {
            "articles": [
                {
                    "title": sshconfig['weixin']['title'],
                    "description": stats,
                    "url": sshconfig['weixin']['url2']+time.strftime('%Y%m%d', time.localtime())+sshdata['hostname']+".txt",
                    "picurl": sshconfig['weixin']['picurl']
                }
            ]
        }}
    response = requests.post(url, json=body)
    print(response.text)
    print(response.status_code)

sshconfig = get_config()
#建立redis连接池
redis_pool = redis.Redis(connection_pool=redis.ConnectionPool(host=sshconfig['redis']['host'],
                                                                     port=sshconfig['redis']['port'],
                                                                     password=sshconfig['redis']['password'],
                                                                     decode_responses=sshconfig['redis']['decode']))
#开始依次执行json文件中的主机
for sshdata in sshconfig['data']:
    getjvm()
print("\n程序执行完成")

jvmcheck.json

{
  "weixin" : {
    "url" : "https://qyapi.weixin.qq.com/cgi-bin/webhook/send?key=填写自己的",
    "title": "java虚拟机内存监控",
    "url2": "http://90apt.com/",
    "picurl": "自定义图片连接"

  },
  "path" : "./",
  "redis" : {
    "host": "IP地址",
    "port": "端口",
    "password": "密码",
    "decode": "True"
  },
  "data" : [
    { "ip" : "IP地址" , "username" : "用户名" , "password" : "base64转换后的密码" , "port" : "端口","hostname" :  "主机名"},
    { "ip" : "IP地址" , "username" : "用户名" , "password" : "base64转换后的密码" , "port" : "端口","hostname" :  "主机名"}
  ]
}

定时运行
配置定时任务,每五分钟执行一次,并记录日志

crontab -e
*/5 * * * * python3.11 /root/jvmcheck/jvmcheck.py >> /root/jvmcheck/jvmcheck.log 2>&1

总结
好好好

2

评论

博主关闭了所有页面的评论