feat: 添加 OPML 文件解析功能,支持从 OPML 文件导入 RSS 源,并增强 OPML 文件有效性检查

This commit is contained in:
lixiangwuxian 2025-07-17 18:29:29 +08:00
parent b13f9158c4
commit 3c5faddece
3 changed files with 265 additions and 3 deletions

View File

@ -107,3 +107,53 @@ type AtomContent struct {
type AtomCategory struct {
Term string `xml:"term,attr"`
}
// OPML文件结构体用于解析OPML XML
type OPML struct {
XMLName string `xml:"opml"`
Version string `xml:"version,attr"`
Head OPMLHead `xml:"head"`
Body OPMLBody `xml:"body"`
}
// OPML Head结构体
type OPMLHead struct {
Title string `xml:"title"`
DateCreated string `xml:"dateCreated"`
DateModified string `xml:"dateModified"`
OwnerName string `xml:"ownerName"`
OwnerEmail string `xml:"ownerEmail"`
ExpansionState string `xml:"expansionState"`
VertScrollState string `xml:"vertScrollState"`
WindowTop string `xml:"windowTop"`
WindowLeft string `xml:"windowLeft"`
WindowBottom string `xml:"windowBottom"`
WindowRight string `xml:"windowRight"`
}
// OPML Body结构体
type OPMLBody struct {
Outlines []OPMLOutline `xml:"outline"`
}
// OPML Outline结构体表示一个RSS源或分类
type OPMLOutline struct {
Text string `xml:"text,attr"`
Title string `xml:"title,attr"`
Type string `xml:"type,attr"`
XMLURL string `xml:"xmlUrl,attr"`
HTMLURL string `xml:"htmlUrl,attr"`
Description string `xml:"description,attr"`
Language string `xml:"language,attr"`
Version string `xml:"version,attr"`
Outlines []OPMLOutline `xml:"outline"`
}
// OPML解析结果
type OPMLFeedInfo struct {
Title string `json:"title"` // RSS源标题
XMLURL string `json:"xml_url"` // RSS源XML地址
HTMLURL string `json:"html_url"` // RSS源HTML地址
Description string `json:"description"` // RSS源描述
Category string `json:"category"` // 分类名称
}

View File

@ -125,8 +125,8 @@ func parseRSSFormat(data []byte) (string, []RssItem, error) {
rssItem.PubDate = parseTimeString(item.PubDate)
}
// 生成内容哈希值
content := fmt.Sprintf("%s%s%s", item.Title, item.Link, item.Description)
// 生成标题哈希值
content := fmt.Sprintf("%s%s", item.Title, item.Link)
hash := md5.Sum([]byte(content))
rssItem.Hash = fmt.Sprintf("%x", hash)
@ -199,7 +199,7 @@ func parseAtomFormat(data []byte) (string, []RssItem, error) {
}
// 生成内容哈希值
content := fmt.Sprintf("%s%s%s", rssItem.Title, rssItem.Link, rssItem.Description)
content := fmt.Sprintf("%s%s", rssItem.Title, rssItem.Link)
hash := md5.Sum([]byte(content))
rssItem.Hash = fmt.Sprintf("%x", hash)
@ -238,3 +238,126 @@ func parseTimeString(timeStr string) time.Time {
// 如果所有格式都失败,返回零时间
return time.Time{}
}
// CheckOPMLFile 检查OPML文件是否有效
func CheckOPMLFile(opmlURL string) error {
//确认返回头
resp, err := http.Head(opmlURL)
if err != nil {
return err
}
if resp.StatusCode != 200 {
return fmt.Errorf("OPML文件无效: %d", resp.StatusCode)
}
contentType := resp.Header.Get("Content-Type")
// 支持多种OPML的Content-Type
validContentTypes := []string{
"application/xml",
"text/xml",
"text/x-opml",
"application/x-opml+xml",
}
isValid := false
for _, validType := range validContentTypes {
if strings.Contains(contentType, validType) {
isValid = true
break
}
}
if !isValid {
return fmt.Errorf("OPML文件无效: %s", resp.Header.Get("Content-Type"))
}
return nil
}
// ParseOPML 解析OPML文件返回RSS源列表
func ParseOPML(opmlURL string) ([]OPMLFeedInfo, error) {
//确认大小
resp, err := http.Head(opmlURL)
if err != nil {
return nil, err
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("OPML文件无效: %d", resp.StatusCode)
}
if resp.ContentLength == 0 || resp.ContentLength > 1024*1024*10 {
return nil, fmt.Errorf("OPML文件的大小为%d,超出限制", resp.ContentLength)
}
//获取OPML数据
resp, err = http.Get(opmlURL)
if err != nil {
return nil, err
}
defer resp.Body.Close()
// 读取响应体内容
body, err := io.ReadAll(resp.Body)
if err != nil {
return nil, fmt.Errorf("读取OPML数据失败: %v", err)
}
return ParseOPMLFormat(body)
}
// ParseOPMLFormat 解析OPML格式数据
func ParseOPMLFormat(data []byte) ([]OPMLFeedInfo, error) {
var opml OPML
decoder := xml.NewDecoder(strings.NewReader(string(data)))
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
// 处理不同的字符编码
switch charset {
case "GB2312", "GBK", "GB18030":
// 如果需要处理中文编码,可以在这里添加转换逻辑
return input, nil
default:
return input, nil
}
}
if err := decoder.Decode(&opml); err != nil {
return nil, fmt.Errorf("解析OPML数据失败: %v", err)
}
var feedInfos []OPMLFeedInfo
extractFeeds(opml.Body.Outlines, "", &feedInfos)
if len(feedInfos) == 0 {
return nil, errors.New("未在OPML文件中找到RSS源")
}
return feedInfos, nil
}
// extractFeeds 递归提取RSS源信息
func extractFeeds(outlines []OPMLOutline, category string, feedInfos *[]OPMLFeedInfo) {
for _, outline := range outlines {
// 如果有xmlUrl说明这是一个RSS源
if outline.XMLURL != "" {
title := outline.Title
if title == "" {
title = outline.Text
}
feedInfo := OPMLFeedInfo{
Title: title,
XMLURL: outline.XMLURL,
HTMLURL: outline.HTMLURL,
Description: outline.Description,
Category: category,
}
*feedInfos = append(*feedInfos, feedInfo)
} else if len(outline.Outlines) > 0 {
// 如果没有xmlUrl但有子outline说明这是一个分类
categoryName := outline.Title
if categoryName == "" {
categoryName = outline.Text
}
// 递归处理子outline
extractFeeds(outline.Outlines, categoryName, feedInfos)
}
}
}

View File

@ -24,6 +24,8 @@ func init() {
handler.RegisterHelpInform("@我 我的订阅", "rss", "查看我的订阅")
handler.RegisterAtHandler("退订", Unsubscribe, constants.LEVEL_USER)
handler.RegisterHelpInform("@我 退订 <id>", "rss", "退订rss源")
handler.RegisterAtHandler("导入OPML", ImportOPML, constants.LEVEL_USER)
handler.RegisterHelpInform("@我 导入OPML <url>", "rss", "从OPML文件导入RSS源")
//test
handler.RegisterHandler("test_rss", TestRss, constants.LEVEL_ADMIN)
}
@ -186,3 +188,90 @@ func Unsubscribe(msg model.Message) (reply *model.Reply) {
FromMsg: msg,
}
}
// ImportOPML 导入OPML文件中的RSS源
func ImportOPML(msg model.Message) (reply *model.Reply) {
//提取OPML文件URL
var opmlURL string
for _, data := range msg.StructuredMsg {
if data.GetMessageType() == "text" {
// 匹配OPML文件链接
urls := regexp.MustCompile(`(?i)(?:https?://)?(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(?:/[^\s]*)?(?:\.opml)?\b`).FindAllString(data.(*qq_message.TextMessage).Data.Text, -1)
if len(urls) > 0 {
opmlURL = urls[0]
break
}
}
}
if opmlURL == "" {
return &model.Reply{
ReplyMsg: "请提供有效的OPML文件链接",
ReferOriginMsg: true,
FromMsg: msg,
}
}
// 确保URL有协议前缀
if !regexp.MustCompile(`^https?://`).MatchString(opmlURL) {
opmlURL = "https://" + opmlURL
}
// 检查OPML文件是否有效
if err := CheckOPMLFile(opmlURL); err != nil {
return &model.Reply{
ReplyMsg: fmt.Sprintf("OPML文件无效: %v", err),
ReferOriginMsg: true,
FromMsg: msg,
}
}
// 解析OPML文件
feedInfos, err := ParseOPML(opmlURL)
if err != nil {
return &model.Reply{
ReplyMsg: fmt.Sprintf("解析OPML文件失败: %v", err),
ReferOriginMsg: true,
FromMsg: msg,
}
}
// 批量订阅RSS源
var successCount int
var failedFeeds []string
var successFeeds []string
for _, feedInfo := range feedInfos {
if feedInfo.XMLURL != "" {
title, err := SubscribeToFeed(feedInfo.XMLURL, msg.UserId, msg.GroupInfo.GroupId)
if err != nil {
failedFeeds = append(failedFeeds, fmt.Sprintf("%s: %v", feedInfo.Title, err))
} else {
successCount++
if feedInfo.Category != "" {
successFeeds = append(successFeeds, fmt.Sprintf("[%s] %s", feedInfo.Category, title))
} else {
successFeeds = append(successFeeds, title)
}
}
}
}
// 构建回复消息
replyMsg := fmt.Sprintf("OPML导入完成\n成功订阅 %d 个RSS源", successCount)
if len(successFeeds) > 0 {
replyMsg += ":\n" + strings.Join(successFeeds, "\n")
}
if len(failedFeeds) > 0 {
replyMsg += fmt.Sprintf("\n\n失败 %d 个:\n", len(failedFeeds))
replyMsg += strings.Join(failedFeeds, "\n")
}
return &model.Reply{
ReplyMsg: replyMsg,
ReferOriginMsg: true,
FromMsg: msg,
}
}