feat: 添加定时检查 RSS 更新功能,增强 RSS 处理模块的实时性和响应能力

This commit is contained in:
lixiangwuxian 2025-07-17 14:18:57 +08:00
parent 8deb74e9a4
commit a5b24d4437
5 changed files with 89 additions and 273 deletions

View File

@ -1,6 +1,18 @@
package rss package rss
import (
"fmt"
"time"
"git.lxtend.com/lixiangwuxian/qqbot/action"
"git.lxtend.com/lixiangwuxian/qqbot/model"
"git.lxtend.com/lixiangwuxian/qqbot/qq_message"
"git.lxtend.com/lixiangwuxian/qqbot/sqlite3"
"git.lxtend.com/lixiangwuxian/qqbot/util"
)
func init() { func init() {
go CheckRssJob()
} }
/* /*
@ -9,5 +21,54 @@ func init() {
比较最新的rss数据与订阅信息中的last_item_hash若有更新则向对应群发送消息并更新订阅信息中的last_item_hash 比较最新的rss数据与订阅信息中的last_item_hash若有更新则向对应群发送消息并更新订阅信息中的last_item_hash
*/ */
func CheckRssJob() { func CheckRssJob() {
defer util.ReportPanicToDev()
//每个整点和半点执行一次
for {
now := time.Now()
if now.Minute() == 0 || now.Minute() == 30 {
go CheckNewRss()
}
nextAwake := 30 - now.Minute()%30
time.Sleep(time.Minute * time.Duration(nextAwake))
}
}
func CheckNewRss() {
defer util.ReportPanicToDev()
db := sqlite3.GetGormDB()
//查询所有群组订阅数据
var groups []RssSubscribe
db.Find(&groups)
for _, group := range groups {
//根据feed_id查询对应的rss源信息
var feed RssFeed
db.Where("id = ?", group.FeedID).First(&feed)
if feed.ID == 0 {
continue
}
//获取最新的rss数据
title, items, err := ParseRssFeed(feed.FeedURL)
if err != nil {
continue
}
//比较最新的rss数据与订阅信息中的last_item_hash若有更新则向对应群发送消息并更新订阅信息中的last_item_hash
if items[0].Hash != group.LastItemHash {
action.ActionManager.SendMsg(&model.Reply{
FromMsg: model.Message{
GroupInfo: model.GroupInfo{
GroupId: int64(group.GroupID),
IsGroupMsg: true,
},
},
ReplyMsg: []qq_message.QQMessage{
&qq_message.TextMessage{
Type: qq_message.TypeText,
Data: qq_message.TextMessageData{
Text: fmt.Sprintf("您订阅的%s发布了新的文章: %s", title, items[0].Title),
},
},
},
})
}
}
} }

View File

@ -10,6 +10,10 @@ type RssFeed struct {
LastUpdate time.Time `json:"last_update" gorm:"last_update"` LastUpdate time.Time `json:"last_update" gorm:"last_update"`
} }
func (RssFeed) TableName() string {
return "rss_feed"
}
// 群订阅信息-通过此数据定时检测最新的rss数据是否有更新若有则向对应群发送消息 // 群订阅信息-通过此数据定时检测最新的rss数据是否有更新若有则向对应群发送消息
type RssSubscribe struct { type RssSubscribe struct {
ID int `json:"id" gorm:"primaryKey"` ID int `json:"id" gorm:"primaryKey"`
@ -20,6 +24,10 @@ type RssSubscribe struct {
LastItemHash string `json:"last_item_hash" gorm:"last_item_hash"` LastItemHash string `json:"last_item_hash" gorm:"last_item_hash"`
} }
func (RssSubscribe) TableName() string {
return "rss_subscribe"
}
// RSS条目信息 // RSS条目信息
type RssItem struct { type RssItem struct {
Title string `json:"title"` // 标题 Title string `json:"title"` // 标题

View File

@ -43,49 +43,49 @@ func CheckRssFeed(feedURL string) error {
return nil return nil
} }
func ParseRssFeed(feedURL string) ([]RssItem, error) { func ParseRssFeed(feedURL string) (string, []RssItem, error) {
//确认大小 //确认大小
resp, err := http.Head(feedURL) resp, err := http.Head(feedURL)
if err != nil { if err != nil {
return nil, err return "", nil, err
} }
if resp.StatusCode != 200 { if resp.StatusCode != 200 {
return nil, fmt.Errorf("RSS源无效: %d", resp.StatusCode) return "", nil, fmt.Errorf("RSS源无效: %d", resp.StatusCode)
} }
if resp.ContentLength == 0 || resp.ContentLength > 1024*1024*10 { if resp.ContentLength == 0 || resp.ContentLength > 1024*1024*10 {
return nil, fmt.Errorf("RSS源的大小为%d,超出限制", resp.ContentLength) return "", nil, fmt.Errorf("RSS源的大小为%d,超出限制", resp.ContentLength)
} }
//获取rss数据 //获取rss数据
resp, err = http.Get(feedURL) resp, err = http.Get(feedURL)
if err != nil { if err != nil {
return nil, err return "", nil, err
} }
defer resp.Body.Close() defer resp.Body.Close()
// 读取响应体内容 // 读取响应体内容
body, err := io.ReadAll(resp.Body) body, err := io.ReadAll(resp.Body)
if err != nil { if err != nil {
return nil, fmt.Errorf("读取RSS数据失败: %v", err) return "", nil, fmt.Errorf("读取RSS数据失败: %v", err)
} }
// 首先尝试解析为RSS格式 // 首先尝试解析为RSS格式
items, err := parseRSSFormat(body) title, items, err := parseRSSFormat(body)
if err == nil { if err == nil {
return items, nil return title, items, nil
} }
// 如果RSS格式解析失败尝试解析为Atom格式 // 如果RSS格式解析失败尝试解析为Atom格式
items, err = parseAtomFormat(body) title, items, err = parseAtomFormat(body)
if err != nil { if err != nil {
return nil, fmt.Errorf("解析RSS/Atom数据失败: %v", err) return "", nil, fmt.Errorf("解析RSS/Atom数据失败: %v", err)
} }
return items, nil return title, items, nil
} }
// 解析RSS格式 // 解析RSS格式
func parseRSSFormat(data []byte) ([]RssItem, error) { func parseRSSFormat(data []byte) (string, []RssItem, error) {
var rssFeed RSSFeed var rssFeed RSSFeed
decoder := xml.NewDecoder(strings.NewReader(string(data))) decoder := xml.NewDecoder(strings.NewReader(string(data)))
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
@ -100,7 +100,7 @@ func parseRSSFormat(data []byte) ([]RssItem, error) {
} }
if err := decoder.Decode(&rssFeed); err != nil { if err := decoder.Decode(&rssFeed); err != nil {
return nil, err return "", nil, err
} }
// 转换为RssItem数组 // 转换为RssItem数组
@ -133,11 +133,11 @@ func parseRSSFormat(data []byte) ([]RssItem, error) {
return items[i].PubDate.After(items[j].PubDate) return items[i].PubDate.After(items[j].PubDate)
}) })
return items, nil return rssFeed.Channel.Title, items, nil
} }
// 解析Atom格式 // 解析Atom格式
func parseAtomFormat(data []byte) ([]RssItem, error) { func parseAtomFormat(data []byte) (string, []RssItem, error) {
var atomFeed AtomFeed var atomFeed AtomFeed
decoder := xml.NewDecoder(strings.NewReader(string(data))) decoder := xml.NewDecoder(strings.NewReader(string(data)))
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
@ -152,7 +152,7 @@ func parseAtomFormat(data []byte) ([]RssItem, error) {
} }
if err := decoder.Decode(&atomFeed); err != nil { if err := decoder.Decode(&atomFeed); err != nil {
return nil, err return "", nil, err
} }
// 转换为RssItem数组 // 转换为RssItem数组
@ -206,7 +206,7 @@ func parseAtomFormat(data []byte) ([]RssItem, error) {
return items[i].PubDate.After(items[j].PubDate) return items[i].PubDate.After(items[j].PubDate)
}) })
return items, nil return atomFeed.Title, items, nil
} }
// 解析时间字符串的辅助函数 // 解析时间字符串的辅助函数

View File

@ -1,253 +0,0 @@
package rss
import (
"fmt"
"net/http"
"net/http/httptest"
"os"
"testing"
"git.lxtend.com/lixiangwuxian/qqbot/config"
. "github.com/bytedance/mockey"
. "github.com/smartystreets/goconvey/convey"
)
func init() {
Mock((*config.Config).LoadConfig).Return(nil).Build()
}
// 模拟RSS XML数据
const mockRSSXML = `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>测试RSS源</title>
<link>https://example.com</link>
<description>这是一个测试RSS源</description>
<item>
<title>测试文章1</title>
<link>https://example.com/article1</link>
<description>这是第一篇测试文章的描述</description>
<guid>https://example.com/article1</guid>
<author>测试作者</author>
<category>技术</category>
<pubDate>Mon, 01 Jan 2024 00:00:00 GMT</pubDate>
</item>
<item>
<title>测试文章2</title>
<link>https://example.com/article2</link>
<description>这是第二篇测试文章的描述</description>
<guid>https://example.com/article2</guid>
<author>测试作者2</author>
<category>生活</category>
<pubDate>Tue, 02 Jan 2024 00:00:00 GMT</pubDate>
</item>
</channel>
</rss>`
func TestParseRssFeed(t *testing.T) {
Convey("TestParseRssFeed", t, func() {
Convey("测试正常RSS解析", func() {
// 创建模拟HTTP服务器
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "HEAD" {
w.Header().Set("Content-Type", "application/rss+xml")
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(mockRSSXML)))
w.WriteHeader(http.StatusOK)
return
}
if r.Method == "GET" {
w.Header().Set("Content-Type", "application/rss+xml")
w.WriteHeader(http.StatusOK)
w.Write([]byte(mockRSSXML))
return
}
}))
defer server.Close()
// 测试解析功能
items, err := ParseRssFeed(server.URL)
So(err, ShouldBeNil)
So(len(items), ShouldEqual, 2)
// 验证第一个条目
// So(items[0].Title, ShouldEqual, "测试文章1")
// So(items[0].Link, ShouldEqual, "https://example.com/article1")
// So(items[0].Description, ShouldEqual, "这是第一篇测试文章的描述")
// So(items[0].GUID, ShouldEqual, "https://example.com/article1")
// So(items[0].Author, ShouldEqual, "测试作者")
// So(items[0].Category, ShouldEqual, "技术")
// So(items[0].Hash, ShouldNotBeEmpty)
// // 验证第二个条目
// So(items[1].Title, ShouldEqual, "测试文章2")
// So(items[1].Link, ShouldEqual, "https://example.com/article2")
// So(items[1].Description, ShouldEqual, "这是第二篇测试文章的描述")
// So(items[1].GUID, ShouldEqual, "https://example.com/article2")
// So(items[1].Author, ShouldEqual, "测试作者2")
// So(items[1].Category, ShouldEqual, "生活")
// So(items[1].Hash, ShouldNotBeEmpty)
// 验证哈希值不同
So(items[0].Hash, ShouldNotEqual, items[1].Hash)
fmt.Printf("成功解析RSS源共%d个条目\n", len(items))
for i, item := range items {
fmt.Printf("条目%d: %s - %s\n", i+1, item.Title, item.Link)
}
})
Convey("测试RSS源大小限制", func() {
// 创建超大内容的模拟服务器
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "HEAD" {
w.Header().Set("Content-Type", "application/rss+xml")
w.Header().Set("Content-Length", "20971520") // 20MB
w.WriteHeader(http.StatusOK)
return
}
}))
defer server.Close()
_, err := ParseRssFeed(server.URL)
So(err, ShouldNotBeNil)
So(err.Error(), ShouldContainSubstring, "超出限制")
})
Convey("测试无效RSS源状态码", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
}))
defer server.Close()
_, err := ParseRssFeed(server.URL)
So(err, ShouldNotBeNil)
So(err.Error(), ShouldContainSubstring, "RSS源无效: 404")
})
Convey("测试无效XML格式", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "HEAD" {
w.Header().Set("Content-Type", "application/rss+xml")
w.Header().Set("Content-Length", "100")
w.WriteHeader(http.StatusOK)
return
}
if r.Method == "GET" {
w.Header().Set("Content-Type", "application/rss+xml")
w.WriteHeader(http.StatusOK)
w.Write([]byte("这不是有效的XML"))
return
}
}))
defer server.Close()
_, err := ParseRssFeed(server.URL)
So(err, ShouldNotBeNil)
So(err.Error(), ShouldContainSubstring, "解析RSS数据失败")
})
})
}
func TestCheckRssFeed(t *testing.T) {
Convey("TestCheckRssFeed", t, func() {
Convey("测试有效RSS源检查", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/rss+xml")
w.WriteHeader(http.StatusOK)
}))
defer server.Close()
err := CheckRssFeed(server.URL)
So(err, ShouldBeNil)
})
Convey("测试无效Content-Type", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
w.WriteHeader(http.StatusOK)
}))
defer server.Close()
err := CheckRssFeed(server.URL)
So(err, ShouldNotBeNil)
So(err.Error(), ShouldContainSubstring, "RSS源无效: text/html")
})
Convey("测试无效状态码", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
}))
defer server.Close()
err := CheckRssFeed(server.URL)
So(err, ShouldNotBeNil)
So(err.Error(), ShouldContainSubstring, "RSS源无效: 500")
})
})
}
func TestRssItemHash(t *testing.T) {
Convey("TestRssItemHash", t, func() {
Convey("测试相同内容生成相同哈希", func() {
content1 := "测试标题测试链接测试描述"
content2 := "测试标题测试链接测试描述"
// 模拟哈希生成过程
hash1 := fmt.Sprintf("%x", []byte(content1))
hash2 := fmt.Sprintf("%x", []byte(content2))
So(hash1, ShouldEqual, hash2)
})
Convey("测试不同内容生成不同哈希", func() {
content1 := "测试标题1测试链接1测试描述1"
content2 := "测试标题2测试链接2测试描述2"
hash1 := fmt.Sprintf("%x", []byte(content1))
hash2 := fmt.Sprintf("%x", []byte(content2))
So(hash1, ShouldNotEqual, hash2)
})
})
}
func TestParseRealRSSFile(t *testing.T) {
Convey("TestParseRealRSSFile", t, func() {
Convey("测试解析真实的RSS文件", func() {
// 创建一个本地文件服务器
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "HEAD" {
w.Header().Set("Content-Type", "application/rss+xml")
w.Header().Set("Content-Length", "100000") // 设置一个合理的大小
w.WriteHeader(http.StatusOK)
return
}
if r.Method == "GET" {
// 读取test.xml文件
content, err := os.ReadFile("test.xml")
if err != nil {
w.WriteHeader(http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "application/rss+xml")
w.WriteHeader(http.StatusOK)
w.Write(content)
return
}
}))
defer server.Close()
// 测试解析功能
items, err := ParseRssFeed(server.URL)
So(err, ShouldBeNil)
So(len(items), ShouldBeGreaterThan, 0)
fmt.Printf("成功解析真实RSS源共%d个条目\n", len(items))
for i, item := range items {
if i < 3 { // 只显示前3个条目
fmt.Printf("条目%d: %s - %s\n", i+1, item.Title, item.Link)
}
}
})
})
}

View File

@ -27,7 +27,7 @@ func init() {
func TestRss(msg model.Message) (reply *model.Reply) { func TestRss(msg model.Message) (reply *model.Reply) {
rssUrl := util.SplitN(msg.StructuredMsg[0].(*qq_message.TextMessage).Data.Text, 2)[1] rssUrl := util.SplitN(msg.StructuredMsg[0].(*qq_message.TextMessage).Data.Text, 2)[1]
items, err := ParseRssFeed(rssUrl) title, items, err := ParseRssFeed(rssUrl)
if err != nil { if err != nil {
return &model.Reply{ return &model.Reply{
ReplyMsg: "解析RSS源失败: " + err.Error(), ReplyMsg: "解析RSS源失败: " + err.Error(),
@ -36,7 +36,7 @@ func TestRss(msg model.Message) (reply *model.Reply) {
} }
} }
return &model.Reply{ return &model.Reply{
ReplyMsg: fmt.Sprintf("解析RSS源成功: %d 个条目\n%v", len(items), items[0]), ReplyMsg: fmt.Sprintf("解析RSS源成功: %s\n%d 个条目\n%v", title, len(items), items[0]),
ReferOriginMsg: true, ReferOriginMsg: true,
FromMsg: msg, FromMsg: msg,
} }