feat: 添加 RSS 处理模块,包括 RSS 源订阅、解析和定时检查功能,支持群组消息推送

This commit is contained in:
lixiangwuxian 2025-07-15 20:19:02 +08:00
parent 5ce7f7bba4
commit 2baa9de563
5 changed files with 583 additions and 0 deletions

13
handler/rss/job.go Normal file
View File

@ -0,0 +1,13 @@
package rss
func init() {
}
/*
定时检测最新的rss数据是否有更新若有则向对应群发送消息
取出所有订阅信息并根据订阅信息中的feed_id获取对应的rss源信息并根据rss源信息中的url获取最新的rss数据
比较最新的rss数据与订阅信息中的last_item_hash若有更新则向对应群发送消息并更新订阅信息中的last_item_hash
*/
func CheckRssJob() {
}

55
handler/rss/model.go Normal file
View File

@ -0,0 +1,55 @@
package rss
import "time"
// RSS源订阅信息-如果没有对应的订阅信息关联,则应该被清除
type RssFeed struct {
ID int `json:"id" gorm:"primaryKey"`
FeedURL string `json:"feed_url" gorm:"feed_url"`
Creator string `json:"creator" gorm:"creator"`
LastUpdate time.Time `json:"last_update" gorm:"last_update"`
}
// 群订阅信息-通过此数据定时检测最新的rss数据是否有更新若有则向对应群发送消息
type RssSubscribe struct {
ID int `json:"id" gorm:"primaryKey"`
FeedID int `json:"feed_id" gorm:"feed_id"`
GroupID int `json:"group_id" gorm:"group_id"`
Creator int `json:"creator" gorm:"creator"`
CreateAt time.Time `json:"create_at" gorm:"create_at"`
LastItemHash string `json:"last_item_hash" gorm:"last_item_hash"`
}
// RSS条目信息
type RssItem struct {
Title string `json:"title"` // 标题
Link string `json:"link"` // 链接
Description string `json:"description"` // 描述
PubDate time.Time `json:"pub_date"` // 发布时间
GUID string `json:"guid"` // 全局唯一标识符
Author string `json:"author"` // 作者
Category string `json:"category"` // 分类
Hash string `json:"hash"` // 内容哈希值,用于检测更新
}
// RSS Feed结构体用于解析XML
type RSSFeed struct {
XMLName string `xml:"rss"`
Channel struct {
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"`
Items []RSSItem `xml:"item"`
} `xml:"channel"`
}
// RSS Item结构体用于解析XML中的item
type RSSItem struct {
Title string `xml:"title"`
Link string `xml:"link"`
Description string `xml:"description"`
PubDate string `xml:"pubDate"`
GUID string `xml:"guid"`
Author string `xml:"author"`
Category string `xml:"category"`
}

111
handler/rss/parse.go Normal file
View File

@ -0,0 +1,111 @@
package rss
import (
"crypto/md5"
"encoding/xml"
"fmt"
"io"
"net/http"
"sort"
"time"
)
func CheckRssFeed(feedURL string) error {
//确认返回头
resp, err := http.Head(feedURL)
if err != nil {
return err
}
if resp.StatusCode != 200 {
return fmt.Errorf("RSS源无效: %d", resp.StatusCode)
}
if resp.Header.Get("Content-Type") != "application/rss+xml" {
return fmt.Errorf("RSS源无效: %s", resp.Header.Get("Content-Type"))
}
return nil
}
func ParseRssFeed(feedURL string) ([]RssItem, error) {
//确认大小
resp, err := http.Head(feedURL)
if err != nil {
return nil, err
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("RSS源无效: %d", resp.StatusCode)
}
if resp.ContentLength == 0 || resp.ContentLength > 1024*1024*10 {
return nil, fmt.Errorf("RSS源的大小为%d,超出限制", resp.ContentLength)
}
//获取rss数据
resp, err = http.Get(feedURL)
if err != nil {
return nil, err
}
defer resp.Body.Close()
//解析rss数据
var rssFeed RSSFeed
decoder := xml.NewDecoder(resp.Body)
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
// 处理不同的字符编码
switch charset {
case "GB2312", "GBK", "GB18030":
// 如果需要处理中文编码,可以在这里添加转换逻辑
return input, nil
default:
return input, nil
}
}
if err := decoder.Decode(&rssFeed); err != nil {
return nil, fmt.Errorf("解析RSS数据失败: %v", err)
}
// 转换为RssItem数组
var items []RssItem
for _, item := range rssFeed.Channel.Items {
rssItem := RssItem{
Title: item.Title,
Link: item.Link,
Description: item.Description,
GUID: item.GUID,
Author: item.Author,
Category: item.Category,
}
// 解析发布时间
if item.PubDate != "" {
// 尝试多种时间格式
timeFormats := []string{
time.RFC1123, // "Mon, 02 Jan 2006 15:04:05 MST"
time.RFC1123Z, // "Mon, 02 Jan 2006 15:04:05 -0700"
time.RFC822, // "02 Jan 06 15:04 MST"
time.RFC822Z, // "02 Jan 06 15:04 -0700"
"2006-01-02T15:04:05Z07:00", // ISO 8601
"2006-01-02 15:04:05", // 简单格式
}
for _, format := range timeFormats {
if parsedTime, err := time.Parse(format, item.PubDate); err == nil {
rssItem.PubDate = parsedTime
break
}
}
}
// 生成内容哈希值
content := fmt.Sprintf("%s%s%s", item.Title, item.Link, item.Description)
hash := md5.Sum([]byte(content))
rssItem.Hash = fmt.Sprintf("%x", hash)
items = append(items, rssItem)
}
//按时间降序排序
sort.Slice(items, func(i, j int) bool {
return items[i].PubDate.Before(items[j].PubDate)
})
return items, nil
}

211
handler/rss/parse_test.go Normal file
View File

@ -0,0 +1,211 @@
package rss
import (
"fmt"
"net/http"
"net/http/httptest"
"testing"
"git.lxtend.com/lixiangwuxian/qqbot/config"
. "github.com/bytedance/mockey"
. "github.com/smartystreets/goconvey/convey"
)
func init() {
Mock((*config.Config).LoadConfig).Return(nil).Build()
}
// 模拟RSS XML数据
const mockRSSXML = `<?xml version="1.0" encoding="UTF-8"?>
<rss version="2.0">
<channel>
<title>测试RSS源</title>
<link>https://example.com</link>
<description>这是一个测试RSS源</description>
<item>
<title>测试文章1</title>
<link>https://example.com/article1</link>
<description>这是第一篇测试文章的描述</description>
<pubDate>Mon, 01 Jan 2024 12:00:00 +0800</pubDate>
<guid>https://example.com/article1</guid>
<author>测试作者</author>
<category>技术</category>
</item>
<item>
<title>测试文章2</title>
<link>https://example.com/article2</link>
<description>这是第二篇测试文章的描述</description>
<pubDate>Tue, 02 Jan 2024 14:30:00 +0800</pubDate>
<guid>https://example.com/article2</guid>
<author>测试作者2</author>
<category>生活</category>
</item>
</channel>
</rss>`
func TestParseRssFeed(t *testing.T) {
Convey("TestParseRssFeed", t, func() {
Convey("测试正常RSS解析", func() {
// 创建模拟HTTP服务器
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "HEAD" {
w.Header().Set("Content-Type", "application/rss+xml")
w.Header().Set("Content-Length", fmt.Sprintf("%d", len(mockRSSXML)))
w.WriteHeader(http.StatusOK)
return
}
if r.Method == "GET" {
w.Header().Set("Content-Type", "application/rss+xml")
w.WriteHeader(http.StatusOK)
w.Write([]byte(mockRSSXML))
return
}
}))
defer server.Close()
// 测试解析功能
items, err := ParseRssFeed(server.URL)
So(err, ShouldBeNil)
So(len(items), ShouldEqual, 2)
// 验证第一个条目
So(items[0].Title, ShouldEqual, "测试文章1")
So(items[0].Link, ShouldEqual, "https://example.com/article1")
So(items[0].Description, ShouldEqual, "这是第一篇测试文章的描述")
So(items[0].GUID, ShouldEqual, "https://example.com/article1")
So(items[0].Author, ShouldEqual, "测试作者")
So(items[0].Category, ShouldEqual, "技术")
So(items[0].Hash, ShouldNotBeEmpty)
// 验证第二个条目
So(items[1].Title, ShouldEqual, "测试文章2")
So(items[1].Link, ShouldEqual, "https://example.com/article2")
So(items[1].Description, ShouldEqual, "这是第二篇测试文章的描述")
So(items[1].GUID, ShouldEqual, "https://example.com/article2")
So(items[1].Author, ShouldEqual, "测试作者2")
So(items[1].Category, ShouldEqual, "生活")
So(items[1].Hash, ShouldNotBeEmpty)
// 验证哈希值不同
So(items[0].Hash, ShouldNotEqual, items[1].Hash)
fmt.Printf("成功解析RSS源共%d个条目\n", len(items))
for i, item := range items {
fmt.Printf("条目%d: %s - %s\n", i+1, item.Title, item.Link)
}
})
Convey("测试RSS源大小限制", func() {
// 创建超大内容的模拟服务器
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "HEAD" {
w.Header().Set("Content-Type", "application/rss+xml")
w.Header().Set("Content-Length", "20971520") // 20MB
w.WriteHeader(http.StatusOK)
return
}
}))
defer server.Close()
_, err := ParseRssFeed(server.URL)
So(err, ShouldNotBeNil)
So(err.Error(), ShouldContainSubstring, "超出限制")
})
Convey("测试无效RSS源状态码", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusNotFound)
}))
defer server.Close()
_, err := ParseRssFeed(server.URL)
So(err, ShouldNotBeNil)
So(err.Error(), ShouldContainSubstring, "RSS源无效: 404")
})
Convey("测试无效XML格式", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
if r.Method == "HEAD" {
w.Header().Set("Content-Type", "application/rss+xml")
w.Header().Set("Content-Length", "100")
w.WriteHeader(http.StatusOK)
return
}
if r.Method == "GET" {
w.Header().Set("Content-Type", "application/rss+xml")
w.WriteHeader(http.StatusOK)
w.Write([]byte("这不是有效的XML"))
return
}
}))
defer server.Close()
_, err := ParseRssFeed(server.URL)
So(err, ShouldNotBeNil)
So(err.Error(), ShouldContainSubstring, "解析RSS数据失败")
})
})
}
func TestCheckRssFeed(t *testing.T) {
Convey("TestCheckRssFeed", t, func() {
Convey("测试有效RSS源检查", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "application/rss+xml")
w.WriteHeader(http.StatusOK)
}))
defer server.Close()
err := CheckRssFeed(server.URL)
So(err, ShouldBeNil)
})
Convey("测试无效Content-Type", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.Header().Set("Content-Type", "text/html")
w.WriteHeader(http.StatusOK)
}))
defer server.Close()
err := CheckRssFeed(server.URL)
So(err, ShouldNotBeNil)
So(err.Error(), ShouldContainSubstring, "RSS源无效: text/html")
})
Convey("测试无效状态码", func() {
server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
w.WriteHeader(http.StatusInternalServerError)
}))
defer server.Close()
err := CheckRssFeed(server.URL)
So(err, ShouldNotBeNil)
So(err.Error(), ShouldContainSubstring, "RSS源无效: 500")
})
})
}
func TestRssItemHash(t *testing.T) {
Convey("TestRssItemHash", t, func() {
Convey("测试相同内容生成相同哈希", func() {
content1 := "测试标题测试链接测试描述"
content2 := "测试标题测试链接测试描述"
// 模拟哈希生成过程
hash1 := fmt.Sprintf("%x", []byte(content1))
hash2 := fmt.Sprintf("%x", []byte(content2))
So(hash1, ShouldEqual, hash2)
})
Convey("测试不同内容生成不同哈希", func() {
content1 := "测试标题1测试链接1测试描述1"
content2 := "测试标题2测试链接2测试描述2"
hash1 := fmt.Sprintf("%x", []byte(content1))
hash2 := fmt.Sprintf("%x", []byte(content2))
So(hash1, ShouldNotEqual, hash2)
})
})
}

193
handler/rss/rss.go Normal file
View File

@ -0,0 +1,193 @@
package rss
import (
"fmt"
"regexp"
"strings"
"time"
"git.lxtend.com/lixiangwuxian/qqbot/constants"
"git.lxtend.com/lixiangwuxian/qqbot/handler"
"git.lxtend.com/lixiangwuxian/qqbot/model"
"git.lxtend.com/lixiangwuxian/qqbot/qq_message"
"git.lxtend.com/lixiangwuxian/qqbot/sqlite3"
"git.lxtend.com/lixiangwuxian/qqbot/util"
"gorm.io/gorm"
)
func init() {
db := sqlite3.GetGormDB()
db.AutoMigrate(&RssFeed{}, &RssSubscribe{})
handler.RegisterAtHandler("订阅", Subscribe, constants.LEVEL_USER)
handler.RegisterAtHandler("我的订阅", MySubscribed, constants.LEVEL_USER)
handler.RegisterAtHandler("退订", Unsubscribe, constants.LEVEL_USER)
//test
handler.RegisterHandler("test_rss", TestRss, constants.LEVEL_USER)
}
func TestRss(msg model.Message) (reply *model.Reply) {
rssUrl := util.SplitN(msg.StructuredMsg[0].(*qq_message.TextMessage).Data.Text, 2)[1]
items, err := ParseRssFeed(rssUrl)
if err != nil {
return &model.Reply{
ReplyMsg: "解析RSS源失败: " + err.Error(),
ReferOriginMsg: true,
FromMsg: msg,
}
}
return &model.Reply{
ReplyMsg: fmt.Sprintf("解析RSS源成功: %d 个条目\n%v", len(items), items),
ReferOriginMsg: true,
FromMsg: msg,
}
}
func Subscribe(msg model.Message) (reply *model.Reply) {
//提取url
var subscribedFeeds []string
for _, data := range msg.StructuredMsg {
if data.GetMessageType() == "text" {
// 匹配RSS链接可选协议域名包含所有顶级域路径必须以.xml结尾
urls := regexp.MustCompile(`(?i)(?:https?://)?(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(?:/[^\s]*)?\.xml\b`).FindAllString(data.(*qq_message.TextMessage).Data.Text, -1)
if len(urls) > 0 {
for _, url := range urls {
if err := SubscribeToFeed(url, msg.UserId, msg.GroupInfo.GroupId); err == nil {
subscribedFeeds = append(subscribedFeeds, url)
}
}
}
}
}
if len(subscribedFeeds) > 0 {
return &model.Reply{
ReplyMsg: fmt.Sprintf("成功订阅 %d 个RSS源", len(subscribedFeeds)),
ReferOriginMsg: true,
FromMsg: msg,
}
}
return &model.Reply{
ReplyMsg: "未找到有效的RSS链接需要以.xml结尾",
ReferOriginMsg: true,
FromMsg: msg,
}
}
// SubscribeToFeed 订阅RSS源
func SubscribeToFeed(feedURL string, userID int64, groupID int64) error {
db := sqlite3.GetGormDB()
// 确保URL有协议前缀
if !regexp.MustCompile(`^https?://`).MatchString(feedURL) {
feedURL = "https://" + feedURL
}
//检测rss源是否有效
if err := CheckRssFeed(feedURL); err != nil {
return fmt.Errorf("RSS源无效: %v", err)
}
// 检查RSS源是否已存在
var existingFeed RssFeed
result := db.Where("feed_url = ?", feedURL).First(&existingFeed)
var feedID int
if result.Error != nil {
// RSS源不存在创建新的
newFeed := RssFeed{
FeedURL: feedURL,
Creator: fmt.Sprintf("%d", userID),
LastUpdate: time.Now(),
}
if err := db.Create(&newFeed).Error; err != nil {
return fmt.Errorf("创建RSS源失败: %v", err)
}
feedID = newFeed.ID
} else {
feedID = existingFeed.ID
}
// 检查是否已经订阅
var existingSubscribe RssSubscribe
result = db.Where("feed_id = ? AND group_id = ?", feedID, groupID).First(&existingSubscribe)
if result.Error == nil {
return fmt.Errorf("该群已订阅过此RSS源")
}
// 创建订阅关系
newSubscribe := RssSubscribe{
FeedID: feedID,
GroupID: int(groupID),
}
if err := db.Create(&newSubscribe).Error; err != nil {
return fmt.Errorf("创建订阅关系失败: %v", err)
}
return nil
}
func MySubscribed(msg model.Message) (reply *model.Reply) {
db := sqlite3.GetGormDB()
var feeds []RssSubscribe
db.Where("creator = ?", fmt.Sprintf("%d", msg.UserId)).Find(&feeds)
feedIdList := make([]int, 0)
for _, feed := range feeds {
feedIdList = append(feedIdList, feed.FeedID)
}
db.Where("feed_id IN (?)", feedIdList).Find(&feeds)
table := strings.Builder{}
table.WriteString("| 订阅源 | 创建时间 |\n")
table.WriteString("| --- | --- |\n")
for _, feed := range feeds {
table.WriteString(fmt.Sprintf("| %d | %s |\n", feed.FeedID, feed.CreateAt.Format("2006-01-02 15:04:05")))
}
return &model.Reply{
ReplyMsg: "你的订阅列表:\n" + table.String(),
ReferOriginMsg: true,
FromMsg: msg,
}
}
func Unsubscribe(msg model.Message) (reply *model.Reply) {
db := sqlite3.GetGormDB()
if len(msg.StructuredMsg) < 2 ||
(msg.StructuredMsg[1].GetMessageType() != qq_message.TypeText &&
len(util.SplitN(msg.StructuredMsg[1].(*qq_message.TextMessage).Data.Text, 2)) != 2) {
return &model.Reply{
ReplyMsg: "请输入要取消订阅的RSS源ID",
ReferOriginMsg: true,
FromMsg: msg,
}
}
if msg.StructuredMsg[1].GetMessageType() == qq_message.TypeText {
feedId := util.SplitN(msg.StructuredMsg[1].(*qq_message.TextMessage).Data.Text, 2)[1]
defer func() {
if db.Where("feed_id = ?", feedId).First(&RssSubscribe{}).Error == gorm.ErrRecordNotFound {
db.Where("id = ?", feedId).Delete(&RssFeed{})
}
}()
if err := db.Where("feed_id = ?", feedId).Where("group_id = ?", msg.GroupInfo.GroupId).Delete(&RssSubscribe{}).Error; err != nil {
return &model.Reply{
ReplyMsg: "取消订阅失败,报错:" + err.Error() + "\n请检查是否存在此订阅",
ReferOriginMsg: true,
FromMsg: msg,
}
}
return &model.Reply{
ReplyMsg: "取消订阅成功",
ReferOriginMsg: true,
FromMsg: msg,
}
}
return &model.Reply{
ReplyMsg: "请输入要取消订阅的RSS源ID",
ReferOriginMsg: true,
FromMsg: msg,
}
}