feat: 添加RAID监控功能,定时检查磁盘状态并发送告警消息
This commit is contained in:
parent
95ccd1f8f3
commit
23662cc150
@ -6,7 +6,10 @@ import (
|
||||
"os/exec"
|
||||
"strconv"
|
||||
"strings"
|
||||
"time"
|
||||
|
||||
"git.lxtend.com/qqbot/action"
|
||||
"git.lxtend.com/qqbot/config"
|
||||
"git.lxtend.com/qqbot/constants"
|
||||
"git.lxtend.com/qqbot/handler"
|
||||
"git.lxtend.com/qqbot/model"
|
||||
@ -14,6 +17,104 @@ import (
|
||||
|
||||
func init() {
|
||||
handler.RegisterHandler("!raid", RaidHandler, constants.LEVEL_USER)
|
||||
// 启动定时检测任务
|
||||
go startRaidMonitor()
|
||||
}
|
||||
|
||||
// 添加磁盘状态检测的阈值配置
|
||||
const (
|
||||
// 温度阈值(摄氏度),高于此值将触发告警
|
||||
temperatureThreshold = 60
|
||||
// 检测间隔时间(秒)
|
||||
checkIntervalSeconds = 30
|
||||
)
|
||||
|
||||
// 启动RAID监控定时任务
|
||||
func startRaidMonitor() {
|
||||
ticker := time.NewTicker(time.Duration(checkIntervalSeconds) * time.Second)
|
||||
// 首次启动时立即检查一次
|
||||
checkRaidStatus()
|
||||
|
||||
for range ticker.C {
|
||||
checkRaidStatus()
|
||||
}
|
||||
}
|
||||
|
||||
// 检查RAID状态
|
||||
func checkRaidStatus() {
|
||||
log.Println("开始检查RAID状态...")
|
||||
diskInfoList := GetDiskInfo()
|
||||
|
||||
if len(diskInfoList) == 0 {
|
||||
log.Println("未获取到阵列信息,跳过检查")
|
||||
return
|
||||
}
|
||||
|
||||
var alertMessages []string
|
||||
|
||||
for _, disk := range diskInfoList {
|
||||
// 检查固件状态
|
||||
if !strings.Contains(disk.FirmwareState, "Online") {
|
||||
alertMessages = append(alertMessages,
|
||||
fmt.Sprintf("槽位 %d 的磁盘状态异常: %s", disk.SlotNumber, disk.FirmwareState))
|
||||
}
|
||||
|
||||
// 检查温度
|
||||
tempStr := disk.DriveTemperature
|
||||
if strings.Contains(tempStr, "C") {
|
||||
// 提取温度数值
|
||||
tempParts := strings.Split(tempStr, "C")
|
||||
tempValue, err := strconv.Atoi(strings.TrimSpace(tempParts[0]))
|
||||
if err == nil && tempValue > temperatureThreshold {
|
||||
alertMessages = append(alertMessages,
|
||||
fmt.Sprintf("槽位 %d 的磁盘温度过高: %s", disk.SlotNumber, tempStr))
|
||||
}
|
||||
}
|
||||
|
||||
// 检查SMART告警
|
||||
if disk.SmartAlert {
|
||||
alertMessages = append(alertMessages,
|
||||
fmt.Sprintf("槽位 %d 的磁盘触发了S.M.A.R.T告警", disk.SlotNumber))
|
||||
}
|
||||
}
|
||||
|
||||
// 如果有告警消息,发送到管理群
|
||||
if len(alertMessages) > 0 {
|
||||
sb := strings.Builder{}
|
||||
sb.WriteString("⚠️ RAID磁盘阵列异常告警 ⚠️\n")
|
||||
sb.WriteString(fmt.Sprintf("检测时间: %s\n\n", time.Now().Format("2006-01-02 15:04:05")))
|
||||
|
||||
for _, msg := range alertMessages {
|
||||
sb.WriteString("- " + msg + "\n")
|
||||
}
|
||||
|
||||
// 发送告警消息到管理群
|
||||
sendAlertMessage(sb.String())
|
||||
} else {
|
||||
log.Println("RAID状态检查完成,未发现异常")
|
||||
}
|
||||
}
|
||||
|
||||
// 发送告警消息
|
||||
func sendAlertMessage(alertMsg string) {
|
||||
reportGroupID := config.ConfigManager.GetConfig().Management.ReportGroup
|
||||
|
||||
if reportGroupID == 0 {
|
||||
log.Println("未配置管理群,告警消息无法发送:", alertMsg)
|
||||
return
|
||||
}
|
||||
|
||||
action.ActionManager.SendMsg(&model.Reply{
|
||||
ReplyMsg: alertMsg,
|
||||
ReferOriginMsg: false,
|
||||
FromMsg: model.Message{
|
||||
GroupInfo: model.GroupInfo{
|
||||
GroupId: reportGroupID,
|
||||
},
|
||||
},
|
||||
})
|
||||
|
||||
log.Println("已发送RAID告警消息到管理群")
|
||||
}
|
||||
|
||||
func RaidHandler(msg model.Message) (reply *model.Reply) {
|
||||
|
Loading…
x
Reference in New Issue
Block a user