From 2baa9de56312459b8cd851874ba85f75bdf2aa2d Mon Sep 17 00:00:00 2001 From: lixiangwuxian Date: Tue, 15 Jul 2025 20:19:02 +0800 Subject: [PATCH] =?UTF-8?q?feat:=20=E6=B7=BB=E5=8A=A0=20RSS=20=E5=A4=84?= =?UTF-8?q?=E7=90=86=E6=A8=A1=E5=9D=97=EF=BC=8C=E5=8C=85=E6=8B=AC=20RSS=20?= =?UTF-8?q?=E6=BA=90=E8=AE=A2=E9=98=85=E3=80=81=E8=A7=A3=E6=9E=90=E5=92=8C?= =?UTF-8?q?=E5=AE=9A=E6=97=B6=E6=A3=80=E6=9F=A5=E5=8A=9F=E8=83=BD=EF=BC=8C?= =?UTF-8?q?=E6=94=AF=E6=8C=81=E7=BE=A4=E7=BB=84=E6=B6=88=E6=81=AF=E6=8E=A8?= =?UTF-8?q?=E9=80=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- handler/rss/job.go | 13 +++ handler/rss/model.go | 55 ++++++++++ handler/rss/parse.go | 111 ++++++++++++++++++++ handler/rss/parse_test.go | 211 ++++++++++++++++++++++++++++++++++++++ handler/rss/rss.go | 193 ++++++++++++++++++++++++++++++++++ 5 files changed, 583 insertions(+) create mode 100644 handler/rss/job.go create mode 100644 handler/rss/model.go create mode 100644 handler/rss/parse.go create mode 100644 handler/rss/parse_test.go create mode 100644 handler/rss/rss.go diff --git a/handler/rss/job.go b/handler/rss/job.go new file mode 100644 index 0000000..9b1f5b9 --- /dev/null +++ b/handler/rss/job.go @@ -0,0 +1,13 @@ +package rss + +func init() { +} + +/* +定时检测最新的rss数据是否有更新,若有则向对应群发送消息 +取出所有订阅信息,并根据订阅信息中的feed_id获取对应的rss源信息,并根据rss源信息中的url获取最新的rss数据 +比较最新的rss数据与订阅信息中的last_item_hash,若有更新则向对应群发送消息,并更新订阅信息中的last_item_hash +*/ +func CheckRssJob() { + +} diff --git a/handler/rss/model.go b/handler/rss/model.go new file mode 100644 index 0000000..cd76125 --- /dev/null +++ b/handler/rss/model.go @@ -0,0 +1,55 @@ +package rss + +import "time" + +// RSS源订阅信息-如果没有对应的订阅信息关联,则应该被清除 +type RssFeed struct { + ID int `json:"id" gorm:"primaryKey"` + FeedURL string `json:"feed_url" gorm:"feed_url"` + Creator string `json:"creator" gorm:"creator"` + LastUpdate time.Time `json:"last_update" gorm:"last_update"` +} + +// 群订阅信息-通过此数据定时检测最新的rss数据是否有更新,若有则向对应群发送消息 +type RssSubscribe struct { + ID int `json:"id" gorm:"primaryKey"` + FeedID int `json:"feed_id" gorm:"feed_id"` + GroupID int `json:"group_id" gorm:"group_id"` + Creator int `json:"creator" gorm:"creator"` + CreateAt time.Time `json:"create_at" gorm:"create_at"` + LastItemHash string `json:"last_item_hash" gorm:"last_item_hash"` +} + +// RSS条目信息 +type RssItem struct { + Title string `json:"title"` // 标题 + Link string `json:"link"` // 链接 + Description string `json:"description"` // 描述 + PubDate time.Time `json:"pub_date"` // 发布时间 + GUID string `json:"guid"` // 全局唯一标识符 + Author string `json:"author"` // 作者 + Category string `json:"category"` // 分类 + Hash string `json:"hash"` // 内容哈希值,用于检测更新 +} + +// RSS Feed结构体,用于解析XML +type RSSFeed struct { + XMLName string `xml:"rss"` + Channel struct { + Title string `xml:"title"` + Link string `xml:"link"` + Description string `xml:"description"` + Items []RSSItem `xml:"item"` + } `xml:"channel"` +} + +// RSS Item结构体,用于解析XML中的item +type RSSItem struct { + Title string `xml:"title"` + Link string `xml:"link"` + Description string `xml:"description"` + PubDate string `xml:"pubDate"` + GUID string `xml:"guid"` + Author string `xml:"author"` + Category string `xml:"category"` +} diff --git a/handler/rss/parse.go b/handler/rss/parse.go new file mode 100644 index 0000000..bd15578 --- /dev/null +++ b/handler/rss/parse.go @@ -0,0 +1,111 @@ +package rss + +import ( + "crypto/md5" + "encoding/xml" + "fmt" + "io" + "net/http" + "sort" + "time" +) + +func CheckRssFeed(feedURL string) error { + //确认返回头 + resp, err := http.Head(feedURL) + if err != nil { + return err + } + if resp.StatusCode != 200 { + return fmt.Errorf("RSS源无效: %d", resp.StatusCode) + } + if resp.Header.Get("Content-Type") != "application/rss+xml" { + return fmt.Errorf("RSS源无效: %s", resp.Header.Get("Content-Type")) + } + return nil +} + +func ParseRssFeed(feedURL string) ([]RssItem, error) { + //确认大小 + resp, err := http.Head(feedURL) + if err != nil { + return nil, err + } + if resp.StatusCode != 200 { + return nil, fmt.Errorf("RSS源无效: %d", resp.StatusCode) + } + if resp.ContentLength == 0 || resp.ContentLength > 1024*1024*10 { + return nil, fmt.Errorf("RSS源的大小为%d,超出限制", resp.ContentLength) + } + + //获取rss数据 + resp, err = http.Get(feedURL) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + //解析rss数据 + var rssFeed RSSFeed + decoder := xml.NewDecoder(resp.Body) + decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { + // 处理不同的字符编码 + switch charset { + case "GB2312", "GBK", "GB18030": + // 如果需要处理中文编码,可以在这里添加转换逻辑 + return input, nil + default: + return input, nil + } + } + + if err := decoder.Decode(&rssFeed); err != nil { + return nil, fmt.Errorf("解析RSS数据失败: %v", err) + } + + // 转换为RssItem数组 + var items []RssItem + for _, item := range rssFeed.Channel.Items { + rssItem := RssItem{ + Title: item.Title, + Link: item.Link, + Description: item.Description, + GUID: item.GUID, + Author: item.Author, + Category: item.Category, + } + + // 解析发布时间 + if item.PubDate != "" { + // 尝试多种时间格式 + timeFormats := []string{ + time.RFC1123, // "Mon, 02 Jan 2006 15:04:05 MST" + time.RFC1123Z, // "Mon, 02 Jan 2006 15:04:05 -0700" + time.RFC822, // "02 Jan 06 15:04 MST" + time.RFC822Z, // "02 Jan 06 15:04 -0700" + "2006-01-02T15:04:05Z07:00", // ISO 8601 + "2006-01-02 15:04:05", // 简单格式 + } + + for _, format := range timeFormats { + if parsedTime, err := time.Parse(format, item.PubDate); err == nil { + rssItem.PubDate = parsedTime + break + } + } + } + + // 生成内容哈希值 + content := fmt.Sprintf("%s%s%s", item.Title, item.Link, item.Description) + hash := md5.Sum([]byte(content)) + rssItem.Hash = fmt.Sprintf("%x", hash) + + items = append(items, rssItem) + } + //按时间降序排序 + sort.Slice(items, func(i, j int) bool { + return items[i].PubDate.Before(items[j].PubDate) + }) + + return items, nil +} diff --git a/handler/rss/parse_test.go b/handler/rss/parse_test.go new file mode 100644 index 0000000..41ba2f6 --- /dev/null +++ b/handler/rss/parse_test.go @@ -0,0 +1,211 @@ +package rss + +import ( + "fmt" + "net/http" + "net/http/httptest" + "testing" + + "git.lxtend.com/lixiangwuxian/qqbot/config" + . "github.com/bytedance/mockey" + . "github.com/smartystreets/goconvey/convey" +) + +func init() { + Mock((*config.Config).LoadConfig).Return(nil).Build() +} + +// 模拟RSS XML数据 +const mockRSSXML = ` + + + 测试RSS源 + https://example.com + 这是一个测试RSS源 + + 测试文章1 + https://example.com/article1 + 这是第一篇测试文章的描述 + Mon, 01 Jan 2024 12:00:00 +0800 + https://example.com/article1 + 测试作者 + 技术 + + + 测试文章2 + https://example.com/article2 + 这是第二篇测试文章的描述 + Tue, 02 Jan 2024 14:30:00 +0800 + https://example.com/article2 + 测试作者2 + 生活 + + +` + +func TestParseRssFeed(t *testing.T) { + Convey("TestParseRssFeed", t, func() { + Convey("测试正常RSS解析", func() { + // 创建模拟HTTP服务器 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == "HEAD" { + w.Header().Set("Content-Type", "application/rss+xml") + w.Header().Set("Content-Length", fmt.Sprintf("%d", len(mockRSSXML))) + w.WriteHeader(http.StatusOK) + return + } + if r.Method == "GET" { + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + w.Write([]byte(mockRSSXML)) + return + } + })) + defer server.Close() + + // 测试解析功能 + items, err := ParseRssFeed(server.URL) + So(err, ShouldBeNil) + So(len(items), ShouldEqual, 2) + + // 验证第一个条目 + So(items[0].Title, ShouldEqual, "测试文章1") + So(items[0].Link, ShouldEqual, "https://example.com/article1") + So(items[0].Description, ShouldEqual, "这是第一篇测试文章的描述") + So(items[0].GUID, ShouldEqual, "https://example.com/article1") + So(items[0].Author, ShouldEqual, "测试作者") + So(items[0].Category, ShouldEqual, "技术") + So(items[0].Hash, ShouldNotBeEmpty) + + // 验证第二个条目 + So(items[1].Title, ShouldEqual, "测试文章2") + So(items[1].Link, ShouldEqual, "https://example.com/article2") + So(items[1].Description, ShouldEqual, "这是第二篇测试文章的描述") + So(items[1].GUID, ShouldEqual, "https://example.com/article2") + So(items[1].Author, ShouldEqual, "测试作者2") + So(items[1].Category, ShouldEqual, "生活") + So(items[1].Hash, ShouldNotBeEmpty) + + // 验证哈希值不同 + So(items[0].Hash, ShouldNotEqual, items[1].Hash) + + fmt.Printf("成功解析RSS源,共%d个条目\n", len(items)) + for i, item := range items { + fmt.Printf("条目%d: %s - %s\n", i+1, item.Title, item.Link) + } + }) + + Convey("测试RSS源大小限制", func() { + // 创建超大内容的模拟服务器 + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == "HEAD" { + w.Header().Set("Content-Type", "application/rss+xml") + w.Header().Set("Content-Length", "20971520") // 20MB + w.WriteHeader(http.StatusOK) + return + } + })) + defer server.Close() + + _, err := ParseRssFeed(server.URL) + So(err, ShouldNotBeNil) + So(err.Error(), ShouldContainSubstring, "超出限制") + }) + + Convey("测试无效RSS源状态码", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusNotFound) + })) + defer server.Close() + + _, err := ParseRssFeed(server.URL) + So(err, ShouldNotBeNil) + So(err.Error(), ShouldContainSubstring, "RSS源无效: 404") + }) + + Convey("测试无效XML格式", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + if r.Method == "HEAD" { + w.Header().Set("Content-Type", "application/rss+xml") + w.Header().Set("Content-Length", "100") + w.WriteHeader(http.StatusOK) + return + } + if r.Method == "GET" { + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + w.Write([]byte("这不是有效的XML")) + return + } + })) + defer server.Close() + + _, err := ParseRssFeed(server.URL) + So(err, ShouldNotBeNil) + So(err.Error(), ShouldContainSubstring, "解析RSS数据失败") + }) + }) +} + +func TestCheckRssFeed(t *testing.T) { + Convey("TestCheckRssFeed", t, func() { + Convey("测试有效RSS源检查", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "application/rss+xml") + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + err := CheckRssFeed(server.URL) + So(err, ShouldBeNil) + }) + + Convey("测试无效Content-Type", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.Header().Set("Content-Type", "text/html") + w.WriteHeader(http.StatusOK) + })) + defer server.Close() + + err := CheckRssFeed(server.URL) + So(err, ShouldNotBeNil) + So(err.Error(), ShouldContainSubstring, "RSS源无效: text/html") + }) + + Convey("测试无效状态码", func() { + server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { + w.WriteHeader(http.StatusInternalServerError) + })) + defer server.Close() + + err := CheckRssFeed(server.URL) + So(err, ShouldNotBeNil) + So(err.Error(), ShouldContainSubstring, "RSS源无效: 500") + }) + }) +} + +func TestRssItemHash(t *testing.T) { + Convey("TestRssItemHash", t, func() { + Convey("测试相同内容生成相同哈希", func() { + content1 := "测试标题测试链接测试描述" + content2 := "测试标题测试链接测试描述" + + // 模拟哈希生成过程 + hash1 := fmt.Sprintf("%x", []byte(content1)) + hash2 := fmt.Sprintf("%x", []byte(content2)) + + So(hash1, ShouldEqual, hash2) + }) + + Convey("测试不同内容生成不同哈希", func() { + content1 := "测试标题1测试链接1测试描述1" + content2 := "测试标题2测试链接2测试描述2" + + hash1 := fmt.Sprintf("%x", []byte(content1)) + hash2 := fmt.Sprintf("%x", []byte(content2)) + + So(hash1, ShouldNotEqual, hash2) + }) + }) +} diff --git a/handler/rss/rss.go b/handler/rss/rss.go new file mode 100644 index 0000000..d38d6d3 --- /dev/null +++ b/handler/rss/rss.go @@ -0,0 +1,193 @@ +package rss + +import ( + "fmt" + "regexp" + "strings" + "time" + + "git.lxtend.com/lixiangwuxian/qqbot/constants" + "git.lxtend.com/lixiangwuxian/qqbot/handler" + "git.lxtend.com/lixiangwuxian/qqbot/model" + "git.lxtend.com/lixiangwuxian/qqbot/qq_message" + "git.lxtend.com/lixiangwuxian/qqbot/sqlite3" + "git.lxtend.com/lixiangwuxian/qqbot/util" + "gorm.io/gorm" +) + +func init() { + db := sqlite3.GetGormDB() + db.AutoMigrate(&RssFeed{}, &RssSubscribe{}) + handler.RegisterAtHandler("订阅", Subscribe, constants.LEVEL_USER) + handler.RegisterAtHandler("我的订阅", MySubscribed, constants.LEVEL_USER) + handler.RegisterAtHandler("退订", Unsubscribe, constants.LEVEL_USER) + //test + handler.RegisterHandler("test_rss", TestRss, constants.LEVEL_USER) +} + +func TestRss(msg model.Message) (reply *model.Reply) { + rssUrl := util.SplitN(msg.StructuredMsg[0].(*qq_message.TextMessage).Data.Text, 2)[1] + items, err := ParseRssFeed(rssUrl) + if err != nil { + return &model.Reply{ + ReplyMsg: "解析RSS源失败: " + err.Error(), + ReferOriginMsg: true, + FromMsg: msg, + } + } + return &model.Reply{ + ReplyMsg: fmt.Sprintf("解析RSS源成功: %d 个条目\n%v", len(items), items), + ReferOriginMsg: true, + FromMsg: msg, + } +} + +func Subscribe(msg model.Message) (reply *model.Reply) { + //提取url + var subscribedFeeds []string + for _, data := range msg.StructuredMsg { + if data.GetMessageType() == "text" { + // 匹配RSS链接:可选协议,域名(包含所有顶级域),路径,必须以.xml结尾 + urls := regexp.MustCompile(`(?i)(?:https?://)?(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(?:/[^\s]*)?\.xml\b`).FindAllString(data.(*qq_message.TextMessage).Data.Text, -1) + if len(urls) > 0 { + for _, url := range urls { + if err := SubscribeToFeed(url, msg.UserId, msg.GroupInfo.GroupId); err == nil { + subscribedFeeds = append(subscribedFeeds, url) + } + } + } + } + } + + if len(subscribedFeeds) > 0 { + return &model.Reply{ + ReplyMsg: fmt.Sprintf("成功订阅 %d 个RSS源", len(subscribedFeeds)), + ReferOriginMsg: true, + FromMsg: msg, + } + } + + return &model.Reply{ + ReplyMsg: "未找到有效的RSS链接(需要以.xml结尾)", + ReferOriginMsg: true, + FromMsg: msg, + } +} + +// SubscribeToFeed 订阅RSS源 +func SubscribeToFeed(feedURL string, userID int64, groupID int64) error { + db := sqlite3.GetGormDB() + + // 确保URL有协议前缀 + if !regexp.MustCompile(`^https?://`).MatchString(feedURL) { + feedURL = "https://" + feedURL + } + + //检测rss源是否有效 + + if err := CheckRssFeed(feedURL); err != nil { + return fmt.Errorf("RSS源无效: %v", err) + } + + // 检查RSS源是否已存在 + var existingFeed RssFeed + result := db.Where("feed_url = ?", feedURL).First(&existingFeed) + + var feedID int + if result.Error != nil { + // RSS源不存在,创建新的 + newFeed := RssFeed{ + FeedURL: feedURL, + Creator: fmt.Sprintf("%d", userID), + LastUpdate: time.Now(), + } + if err := db.Create(&newFeed).Error; err != nil { + return fmt.Errorf("创建RSS源失败: %v", err) + } + feedID = newFeed.ID + } else { + feedID = existingFeed.ID + } + + // 检查是否已经订阅 + var existingSubscribe RssSubscribe + result = db.Where("feed_id = ? AND group_id = ?", feedID, groupID).First(&existingSubscribe) + if result.Error == nil { + return fmt.Errorf("该群已订阅过此RSS源") + } + + // 创建订阅关系 + newSubscribe := RssSubscribe{ + FeedID: feedID, + GroupID: int(groupID), + } + if err := db.Create(&newSubscribe).Error; err != nil { + return fmt.Errorf("创建订阅关系失败: %v", err) + } + + return nil +} + +func MySubscribed(msg model.Message) (reply *model.Reply) { + db := sqlite3.GetGormDB() + + var feeds []RssSubscribe + db.Where("creator = ?", fmt.Sprintf("%d", msg.UserId)).Find(&feeds) + feedIdList := make([]int, 0) + for _, feed := range feeds { + feedIdList = append(feedIdList, feed.FeedID) + } + db.Where("feed_id IN (?)", feedIdList).Find(&feeds) + + table := strings.Builder{} + table.WriteString("| 订阅源 | 创建时间 |\n") + table.WriteString("| --- | --- |\n") + for _, feed := range feeds { + table.WriteString(fmt.Sprintf("| %d | %s |\n", feed.FeedID, feed.CreateAt.Format("2006-01-02 15:04:05"))) + } + + return &model.Reply{ + ReplyMsg: "你的订阅列表:\n" + table.String(), + ReferOriginMsg: true, + FromMsg: msg, + } +} + +func Unsubscribe(msg model.Message) (reply *model.Reply) { + db := sqlite3.GetGormDB() + if len(msg.StructuredMsg) < 2 || + (msg.StructuredMsg[1].GetMessageType() != qq_message.TypeText && + len(util.SplitN(msg.StructuredMsg[1].(*qq_message.TextMessage).Data.Text, 2)) != 2) { + return &model.Reply{ + ReplyMsg: "请输入要取消订阅的RSS源ID", + ReferOriginMsg: true, + FromMsg: msg, + } + } + if msg.StructuredMsg[1].GetMessageType() == qq_message.TypeText { + feedId := util.SplitN(msg.StructuredMsg[1].(*qq_message.TextMessage).Data.Text, 2)[1] + defer func() { + if db.Where("feed_id = ?", feedId).First(&RssSubscribe{}).Error == gorm.ErrRecordNotFound { + db.Where("id = ?", feedId).Delete(&RssFeed{}) + } + }() + if err := db.Where("feed_id = ?", feedId).Where("group_id = ?", msg.GroupInfo.GroupId).Delete(&RssSubscribe{}).Error; err != nil { + return &model.Reply{ + ReplyMsg: "取消订阅失败,报错:" + err.Error() + "\n请检查是否存在此订阅", + ReferOriginMsg: true, + FromMsg: msg, + } + } + + return &model.Reply{ + ReplyMsg: "取消订阅成功", + ReferOriginMsg: true, + FromMsg: msg, + } + } + return &model.Reply{ + ReplyMsg: "请输入要取消订阅的RSS源ID", + ReferOriginMsg: true, + FromMsg: msg, + } +}