283 lines
7.8 KiB
Go
283 lines
7.8 KiB
Go
package raid
|
||
|
||
import (
|
||
"fmt"
|
||
"log"
|
||
"os/exec"
|
||
"strconv"
|
||
"strings"
|
||
"time"
|
||
|
||
"git.lxtend.com/qqbot/action"
|
||
"git.lxtend.com/qqbot/config"
|
||
"git.lxtend.com/qqbot/constants"
|
||
"git.lxtend.com/qqbot/handler"
|
||
"git.lxtend.com/qqbot/model"
|
||
)
|
||
|
||
func init() {
|
||
handler.RegisterHandler("!raid", RaidHandler, constants.LEVEL_USER)
|
||
// 启动定时检测任务
|
||
go startRaidMonitor()
|
||
}
|
||
|
||
// 添加磁盘状态检测的阈值配置
|
||
const (
|
||
// 温度阈值(摄氏度),高于此值将触发告警
|
||
temperatureThreshold = 60
|
||
// 检测间隔时间(秒)
|
||
checkIntervalSeconds = 30
|
||
)
|
||
|
||
// 启动RAID监控定时任务
|
||
func startRaidMonitor() {
|
||
ticker := time.NewTicker(time.Duration(checkIntervalSeconds) * time.Second)
|
||
// 首次启动时立即检查一次
|
||
checkRaidStatus()
|
||
|
||
for range ticker.C {
|
||
checkRaidStatus()
|
||
}
|
||
}
|
||
|
||
// 检查RAID状态
|
||
func checkRaidStatus() {
|
||
log.Println("开始检查RAID状态...")
|
||
diskInfoList := GetDiskInfo()
|
||
|
||
if len(diskInfoList) == 0 {
|
||
log.Println("未获取到阵列信息,跳过检查")
|
||
return
|
||
}
|
||
|
||
var alertMessages []string
|
||
|
||
for _, disk := range diskInfoList {
|
||
// 检查固件状态
|
||
if strings.Contains(disk.FirmwareState, "bad") {
|
||
alertMessages = append(alertMessages,
|
||
fmt.Sprintf("槽位 %d 的磁盘状态异常: %s", disk.SlotNumber, disk.FirmwareState))
|
||
}
|
||
|
||
// 检查温度
|
||
tempStr := disk.DriveTemperature
|
||
if strings.Contains(tempStr, "C") {
|
||
// 提取温度数值
|
||
tempParts := strings.Split(tempStr, "C")
|
||
tempValue, err := strconv.Atoi(strings.TrimSpace(tempParts[0]))
|
||
if err == nil && tempValue > temperatureThreshold {
|
||
alertMessages = append(alertMessages,
|
||
fmt.Sprintf("槽位 %d 的磁盘温度过高: %s", disk.SlotNumber, tempStr))
|
||
}
|
||
}
|
||
|
||
// 检查SMART告警
|
||
if disk.SmartAlert {
|
||
alertMessages = append(alertMessages,
|
||
fmt.Sprintf("槽位 %d 的磁盘触发了S.M.A.R.T告警", disk.SlotNumber))
|
||
}
|
||
}
|
||
|
||
// 如果有告警消息,发送到管理群
|
||
if len(alertMessages) > 0 {
|
||
sb := strings.Builder{}
|
||
sb.WriteString("⚠️ RAID磁盘阵列异常告警 ⚠️\n")
|
||
sb.WriteString(fmt.Sprintf("检测时间: %s\n\n", time.Now().Format("2006-01-02 15:04:05")))
|
||
|
||
for _, msg := range alertMessages {
|
||
sb.WriteString("- " + msg + "\n")
|
||
}
|
||
|
||
// 发送告警消息到管理群
|
||
sendAlertMessage(sb.String())
|
||
} else {
|
||
log.Println("RAID状态检查完成,未发现异常")
|
||
}
|
||
}
|
||
|
||
// 发送告警消息
|
||
func sendAlertMessage(alertMsg string) {
|
||
reportGroupID := config.ConfigManager.GetConfig().Management.ReportGroup
|
||
|
||
if reportGroupID == 0 {
|
||
log.Println("未配置管理群,告警消息无法发送:", alertMsg)
|
||
return
|
||
}
|
||
|
||
action.ActionManager.SendMsg(&model.Reply{
|
||
ReplyMsg: alertMsg,
|
||
ReferOriginMsg: false,
|
||
FromMsg: model.Message{
|
||
GroupInfo: model.GroupInfo{
|
||
GroupId: reportGroupID,
|
||
},
|
||
},
|
||
})
|
||
|
||
log.Println("已发送RAID告警消息到管理群")
|
||
}
|
||
|
||
func RaidHandler(msg model.Message) (reply *model.Reply) {
|
||
diskInfoList := GetDiskInfo()
|
||
|
||
if len(diskInfoList) == 0 {
|
||
return &model.Reply{
|
||
FromMsg: msg,
|
||
ReplyMsg: "未获取到阵列信息",
|
||
ReferOriginMsg: false,
|
||
}
|
||
}
|
||
|
||
sb := strings.Builder{}
|
||
sb.WriteString("阵列信息:\n")
|
||
for _, diskInfo := range diskInfoList {
|
||
sb.WriteString(diskInfo.String())
|
||
}
|
||
|
||
return &model.Reply{
|
||
FromMsg: msg,
|
||
ReplyMsg: sb.String(),
|
||
ReferOriginMsg: false,
|
||
}
|
||
}
|
||
|
||
// MegaCli64 -PDList -aALL | grep -E "Slot Number|Drive Temperature|Inquiry Data|Firmware state|S.M.A.R.T alert" | cat
|
||
// Slot Number: 0
|
||
// Firmware state: Online, Spun Up
|
||
// Inquiry Data: TOSHIBA MG04SCA60EE 010356A0A001FWWB
|
||
// Drive Temperature :56C (132.80 F)
|
||
// Drive has flagged a S.M.A.R.T alert : No
|
||
// Slot Number: 1
|
||
// Firmware state: Online, Spun Up
|
||
// Inquiry Data: TOSHIBA MG04SCA60EE 0103X6S0A0LRFWWB
|
||
// Drive Temperature :54C (129.20 F)
|
||
// Drive has flagged a S.M.A.R.T alert : No
|
||
// Slot Number: 2
|
||
// Firmware state: Online, Spun Up
|
||
// Inquiry Data: TOSHIBA MG04SCA60EE 010356F0A00AFWWB
|
||
// Drive Temperature :51C (123.80 F)
|
||
// Drive has flagged a S.M.A.R.T alert : No
|
||
// Slot Number: 3
|
||
// Firmware state: Online, Spun Up
|
||
// Inquiry Data: TOSHIBA MG04SCA60EE 0103X6Q0A0RZFWWB
|
||
// Drive Temperature :46C (114.80 F)
|
||
// Drive has flagged a S.M.A.R.T alert : No
|
||
// Slot Number: 4
|
||
// Firmware state: Unconfigured(bad)
|
||
// Inquiry Data: TOSHIBA MG04SCA60EE 0103X6R0A09VFWWB
|
||
// Drive Temperature :49C (120.20 F)
|
||
// Drive has flagged a S.M.A.R.T alert : No
|
||
// Slot Number: 5
|
||
// Firmware state: Online, Spun Up
|
||
// Inquiry Data: TOSHIBA MG04SCA60EE 0103X6R0A0JZFWWB
|
||
// Drive Temperature :53C (127.40 F)
|
||
// Drive has flagged a S.M.A.R.T alert : No
|
||
// Slot Number: 6
|
||
// Firmware state: Online, Spun Up
|
||
// Inquiry Data: TOSHIBA MG04SCA60EE 010356F0A004FWWB
|
||
// Drive Temperature :56C (132.80 F)
|
||
// Drive has flagged a S.M.A.R.T alert : No
|
||
// Slot Number: 7
|
||
// Firmware state: Online, Spun Up
|
||
// Inquiry Data: TOSHIBA MG04SCA60EE 01034680A01WFWWB
|
||
// Drive Temperature :58C (136.40 F)
|
||
// Drive has flagged a S.M.A.R.T alert : No
|
||
|
||
type DiskInfo struct {
|
||
SlotNumber int
|
||
DriveTemperature string
|
||
Type string
|
||
Sn string
|
||
FirmwareState string
|
||
SmartAlert bool
|
||
}
|
||
|
||
func (d *DiskInfo) String() string {
|
||
sb := strings.Builder{}
|
||
sb.WriteString(fmt.Sprintf("槽位: %d\n", d.SlotNumber))
|
||
sb.WriteString(fmt.Sprintf("温度: %s\n", d.DriveTemperature))
|
||
sb.WriteString(fmt.Sprintf("型号: %s\n", d.Type))
|
||
sb.WriteString(fmt.Sprintf("SN: %s\n", d.Sn))
|
||
sb.WriteString(fmt.Sprintf("状态: %s\n", d.FirmwareState))
|
||
sb.WriteString(fmt.Sprintf("S.M.A.R.T报警: %t\n", d.SmartAlert))
|
||
return sb.String()
|
||
}
|
||
|
||
func ParseDiskInfoList(lines []string) []*DiskInfo {
|
||
var diskInfoList []*DiskInfo
|
||
var currentDisk *DiskInfo
|
||
|
||
for _, line := range lines {
|
||
line = strings.TrimSpace(line)
|
||
|
||
if strings.HasPrefix(line, "Slot Number:") {
|
||
// 新的磁盘信息开始
|
||
if currentDisk != nil {
|
||
diskInfoList = append(diskInfoList, currentDisk)
|
||
}
|
||
|
||
slotStr := strings.TrimSpace(strings.TrimPrefix(line, "Slot Number:"))
|
||
slotNum, err := strconv.Atoi(slotStr)
|
||
if err != nil {
|
||
log.Printf("解析槽位号失败: %v", err)
|
||
slotNum = -1
|
||
}
|
||
|
||
currentDisk = &DiskInfo{
|
||
SlotNumber: slotNum,
|
||
}
|
||
} else if currentDisk != nil {
|
||
// 处理当前磁盘的其他信息
|
||
if strings.HasPrefix(line, "Firmware state:") {
|
||
currentDisk.FirmwareState = strings.TrimSpace(strings.TrimPrefix(line, "Firmware state:"))
|
||
} else if strings.HasPrefix(line, "Inquiry Data:") {
|
||
inquiryData := strings.TrimSpace(strings.TrimPrefix(line, "Inquiry Data:"))
|
||
parts := strings.Fields(inquiryData)
|
||
if len(parts) >= 2 {
|
||
currentDisk.Type = strings.Join(parts[:len(parts)-1], " ")
|
||
currentDisk.Sn = parts[len(parts)-1]
|
||
}
|
||
} else if strings.HasPrefix(line, "Drive Temperature") {
|
||
tempParts := strings.Split(line, ":")
|
||
if len(tempParts) >= 2 {
|
||
currentDisk.DriveTemperature = strings.TrimSpace(tempParts[1])
|
||
}
|
||
} else if strings.HasPrefix(line, "Drive has flagged a S.M.A.R.T alert") {
|
||
smartParts := strings.Split(line, ":")
|
||
if len(smartParts) >= 2 {
|
||
currentDisk.SmartAlert = strings.TrimSpace(smartParts[1]) != "No"
|
||
}
|
||
}
|
||
}
|
||
}
|
||
|
||
// 添加最后一个磁盘
|
||
if currentDisk != nil {
|
||
diskInfoList = append(diskInfoList, currentDisk)
|
||
}
|
||
|
||
return diskInfoList
|
||
}
|
||
|
||
func GetDiskInfo() []*DiskInfo {
|
||
// 首先尝试运行MegaCli64命令
|
||
cmd := exec.Command("MegaCli64", "-PDList", "-aALL")
|
||
outputBytes, err := cmd.Output()
|
||
if err != nil {
|
||
log.Printf("运行MegaCli64失败: %v", err)
|
||
|
||
// 尝试运行grep命令模拟示例输出
|
||
grepCmd := exec.Command("grep", "-E", "Slot Number|Drive Temperature|Inquiry Data|Firmware state|S.M.A.R.T alert")
|
||
outputBytes, err = grepCmd.Output()
|
||
if err != nil {
|
||
log.Printf("获取磁盘信息失败: %v", err)
|
||
return nil
|
||
}
|
||
}
|
||
|
||
output := string(outputBytes)
|
||
lines := strings.Split(output, "\n")
|
||
|
||
return ParseDiskInfoList(lines)
|
||
}
|