qq_bot/handler/raid/raid.go

271 lines
7.4 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package raid
import (
"fmt"
"log"
"os/exec"
"regexp"
"strconv"
"strings"
"time"
"git.lxtend.com/qqbot/action"
"git.lxtend.com/qqbot/config"
"git.lxtend.com/qqbot/constants"
"git.lxtend.com/qqbot/handler"
"git.lxtend.com/qqbot/message"
"git.lxtend.com/qqbot/model"
)
func init() {
handler.RegisterHandler("!raid", RaidHandler, constants.LEVEL_USER)
// 启动定时检测任务
go startRaidMonitor()
}
// 添加磁盘状态检测的阈值配置
const (
// 温度阈值(摄氏度),高于此值将触发告警
temperatureThreshold = 60
// 检测间隔时间(秒)
checkIntervalSeconds = 30
)
// 启动RAID监控定时任务
func startRaidMonitor() {
ticker := time.NewTicker(time.Duration(checkIntervalSeconds) * time.Second)
// 首次启动时立即检查一次
checkRaidStatus()
for range ticker.C {
checkRaidStatus()
}
}
// 检查RAID状态
func checkRaidStatus() {
diskInfoList := GetDiskInfo()
if len(diskInfoList) == 0 {
return
}
var alertMessages []string
for _, disk := range diskInfoList {
// 检查固件状态
if strings.Contains(disk.FirmwareState, "bad") {
alertMessages = append(alertMessages,
fmt.Sprintf("槽位 %d 的磁盘状态异常: %s", disk.SlotNumber, disk.FirmwareState))
}
// 检查温度
tempStr := disk.DriveTemperature
if strings.Contains(tempStr, "C") {
// 使用更精确的正则表达式
tempRegex := regexp.MustCompile(`(\d+)C`)
if matches := tempRegex.FindStringSubmatch(tempStr); len(matches) > 1 {
tempValue, err := strconv.Atoi(matches[1])
if err == nil && tempValue > temperatureThreshold {
alertMessages = append(alertMessages,
fmt.Sprintf("槽位 %d 的磁盘温度过高: %s", disk.SlotNumber, tempStr))
}
}
}
// 检查SMART告警
if disk.SmartAlert {
alertMessages = append(alertMessages,
fmt.Sprintf("槽位 %d 的磁盘触发了S.M.A.R.T告警", disk.SlotNumber))
}
}
// 如果有告警消息,发送到管理群
if len(alertMessages) > 0 {
sb := strings.Builder{}
sb.WriteString("⚠️ RAID磁盘阵列异常告警 ⚠️\n")
sb.WriteString(fmt.Sprintf("检测时间: %s\n\n", time.Now().Format("2006-01-02 15:04:05")))
for _, msg := range alertMessages {
sb.WriteString("- " + msg + "\n")
}
// 发送告警消息到管理群
sendAlertMessage([]string{sb.String()})
} else {
log.Println("RAID状态检查完成未发现异常")
}
}
// 发送告警消息
func sendAlertMessage(alertMsgs []string) {
reportGroupID := config.ConfigManager.GetConfig().Management.ReportGroup
if reportGroupID == 0 {
log.Println("未配置管理群,告警消息无法发送:", alertMsgs)
return
}
nodes := []message.NodeMessage{}
selfInfo, err := action.GetLoginAccountInfo()
if err != nil {
log.Println("获取登录账号信息失败:", err)
return
}
userId := strconv.FormatInt(int64(selfInfo.Data.UserID), 10)
nickname := selfInfo.Data.Nickname
for _, alertMsg := range alertMsgs {
textMsg := message.NewTextMessage().ParseMessage(alertMsg)
nodes = append(nodes, *message.NewNodeMessage().ParseMessage(userId, nickname, []any{textMsg}))
}
action.ActionManager.SendForward(&model.Reply{
ReplyMsg: nodes,
ReferOriginMsg: false,
FromMsg: model.Message{
GroupInfo: model.GroupInfo{
GroupId: reportGroupID,
},
},
})
log.Println("已发送RAID告警消息到管理群")
}
func RaidHandler(msg model.Message) (reply *model.Reply) {
diskInfoList := GetDiskInfo()
if len(diskInfoList) == 0 {
return &model.Reply{
FromMsg: msg,
ReplyMsg: "未获取到阵列信息",
ReferOriginMsg: false,
}
}
nodes := []message.NodeMessage{}
selfInfo, err := action.GetLoginAccountInfo()
if err != nil {
log.Println("获取登录账号信息失败:", err)
return nil
}
userId := strconv.FormatInt(int64(selfInfo.Data.UserID), 10)
nickname := selfInfo.Data.Nickname
nodes = append(nodes, *message.NewNodeMessage().ParseMessage(userId, nickname, []any{message.NewTextMessage().ParseMessage("阵列信息:")}))
for _, diskInfo := range diskInfoList {
textMsg := message.NewTextMessage().ParseMessage(diskInfo.String())
nodes = append(nodes, *message.NewNodeMessage().ParseMessage(userId, nickname, []any{textMsg}))
}
action.ActionManager.SendForward(&model.Reply{
ReplyMsg: nodes,
ReferOriginMsg: false,
FromMsg: msg,
})
return nil
}
type DiskInfo struct {
SlotNumber int
DriveTemperature string
Type string
Sn string
FirmwareState string
SmartAlert bool
}
func (d *DiskInfo) String() string {
sb := strings.Builder{}
sb.WriteString(fmt.Sprintf("槽位: %d\n", d.SlotNumber+1))
sb.WriteString(fmt.Sprintf("温度: %s℃\n", d.DriveTemperature))
sb.WriteString(fmt.Sprintf("型号: %s\n", d.Type))
sb.WriteString(fmt.Sprintf("SN: %s\n", d.Sn))
sb.WriteString(fmt.Sprintf("状态: %s\n", d.FirmwareState))
sb.WriteString(fmt.Sprintf("S.M.A.R.T报警: %t", d.SmartAlert))
return sb.String()
}
func ParseDiskInfoList(lines []string) []*DiskInfo {
var diskInfoList []*DiskInfo
var currentDisk *DiskInfo
for _, line := range lines {
line = strings.TrimSpace(line)
if strings.HasPrefix(line, "Slot Number:") {
// 新的磁盘信息开始
if currentDisk != nil {
diskInfoList = append(diskInfoList, currentDisk)
}
slotStr := strings.TrimSpace(strings.TrimPrefix(line, "Slot Number:"))
slotNum, err := strconv.Atoi(slotStr)
if err != nil {
log.Printf("解析槽位号失败: %v", err)
slotNum = -1
}
currentDisk = &DiskInfo{
SlotNumber: slotNum,
}
} else if currentDisk != nil {
// 处理当前磁盘的其他信息
if strings.HasPrefix(line, "Firmware state:") {
currentDisk.FirmwareState = strings.TrimSpace(strings.TrimPrefix(line, "Firmware state:"))
} else if strings.HasPrefix(line, "Inquiry Data:") {
inquiryData := strings.TrimSpace(strings.TrimPrefix(line, "Inquiry Data:"))
parts := strings.Fields(inquiryData)
if len(parts) >= 2 {
currentDisk.Type = strings.Join(parts[:len(parts)-1], " ")
currentDisk.Sn = parts[len(parts)-1]
}
} else if strings.HasPrefix(line, "Drive Temperature") {
tempParts := strings.Split(line, ":")
mode := regexp.MustCompile(`(\d+) C`)
if len(tempParts) >= 2 {
matches := mode.FindStringSubmatch(tempParts[1])
if len(matches) >= 1 {
currentDisk.DriveTemperature = matches[0]
} else {
currentDisk.DriveTemperature = strings.TrimSpace(tempParts[1])
}
}
} else if strings.HasPrefix(line, "Drive has flagged a S.M.A.R.T alert") {
smartParts := strings.Split(line, ":")
if len(smartParts) >= 2 {
currentDisk.SmartAlert = strings.TrimSpace(smartParts[1]) != "No"
}
}
}
}
// 添加最后一个磁盘
if currentDisk != nil {
diskInfoList = append(diskInfoList, currentDisk)
}
return diskInfoList
}
func GetDiskInfo() []*DiskInfo {
// 首先尝试运行MegaCli64命令
cmd := exec.Command("MegaCli64", "-PDList", "-aALL")
outputBytes, err := cmd.Output()
if err != nil {
log.Printf("运行MegaCli64失败: %v", err)
// 尝试运行grep命令模拟示例输出
grepCmd := exec.Command("grep", "-E", "Slot Number|Drive Temperature|Inquiry Data|Firmware state|S.M.A.R.T alert")
outputBytes, err = grepCmd.Output()
if err != nil {
log.Printf("获取磁盘信息失败: %v", err)
return nil
}
}
output := string(outputBytes)
lines := strings.Split(output, "\n")
return ParseDiskInfoList(lines)
}