diff --git a/handler/rss/model.go b/handler/rss/model.go index 64c9bb1..a0b6280 100644 --- a/handler/rss/model.go +++ b/handler/rss/model.go @@ -107,3 +107,53 @@ type AtomContent struct { type AtomCategory struct { Term string `xml:"term,attr"` } + +// OPML文件结构体,用于解析OPML XML +type OPML struct { + XMLName string `xml:"opml"` + Version string `xml:"version,attr"` + Head OPMLHead `xml:"head"` + Body OPMLBody `xml:"body"` +} + +// OPML Head结构体 +type OPMLHead struct { + Title string `xml:"title"` + DateCreated string `xml:"dateCreated"` + DateModified string `xml:"dateModified"` + OwnerName string `xml:"ownerName"` + OwnerEmail string `xml:"ownerEmail"` + ExpansionState string `xml:"expansionState"` + VertScrollState string `xml:"vertScrollState"` + WindowTop string `xml:"windowTop"` + WindowLeft string `xml:"windowLeft"` + WindowBottom string `xml:"windowBottom"` + WindowRight string `xml:"windowRight"` +} + +// OPML Body结构体 +type OPMLBody struct { + Outlines []OPMLOutline `xml:"outline"` +} + +// OPML Outline结构体,表示一个RSS源或分类 +type OPMLOutline struct { + Text string `xml:"text,attr"` + Title string `xml:"title,attr"` + Type string `xml:"type,attr"` + XMLURL string `xml:"xmlUrl,attr"` + HTMLURL string `xml:"htmlUrl,attr"` + Description string `xml:"description,attr"` + Language string `xml:"language,attr"` + Version string `xml:"version,attr"` + Outlines []OPMLOutline `xml:"outline"` +} + +// OPML解析结果 +type OPMLFeedInfo struct { + Title string `json:"title"` // RSS源标题 + XMLURL string `json:"xml_url"` // RSS源XML地址 + HTMLURL string `json:"html_url"` // RSS源HTML地址 + Description string `json:"description"` // RSS源描述 + Category string `json:"category"` // 分类名称 +} diff --git a/handler/rss/parse.go b/handler/rss/parse.go index 4ee27f2..965f3d3 100644 --- a/handler/rss/parse.go +++ b/handler/rss/parse.go @@ -125,8 +125,8 @@ func parseRSSFormat(data []byte) (string, []RssItem, error) { rssItem.PubDate = parseTimeString(item.PubDate) } - // 生成内容哈希值 - content := fmt.Sprintf("%s%s%s", item.Title, item.Link, item.Description) + // 生成标题哈希值 + content := fmt.Sprintf("%s%s", item.Title, item.Link) hash := md5.Sum([]byte(content)) rssItem.Hash = fmt.Sprintf("%x", hash) @@ -199,7 +199,7 @@ func parseAtomFormat(data []byte) (string, []RssItem, error) { } // 生成内容哈希值 - content := fmt.Sprintf("%s%s%s", rssItem.Title, rssItem.Link, rssItem.Description) + content := fmt.Sprintf("%s%s", rssItem.Title, rssItem.Link) hash := md5.Sum([]byte(content)) rssItem.Hash = fmt.Sprintf("%x", hash) @@ -238,3 +238,126 @@ func parseTimeString(timeStr string) time.Time { // 如果所有格式都失败,返回零时间 return time.Time{} } + +// CheckOPMLFile 检查OPML文件是否有效 +func CheckOPMLFile(opmlURL string) error { + //确认返回头 + resp, err := http.Head(opmlURL) + if err != nil { + return err + } + if resp.StatusCode != 200 { + return fmt.Errorf("OPML文件无效: %d", resp.StatusCode) + } + contentType := resp.Header.Get("Content-Type") + // 支持多种OPML的Content-Type + validContentTypes := []string{ + "application/xml", + "text/xml", + "text/x-opml", + "application/x-opml+xml", + } + + isValid := false + for _, validType := range validContentTypes { + if strings.Contains(contentType, validType) { + isValid = true + break + } + } + + if !isValid { + return fmt.Errorf("OPML文件无效: %s", resp.Header.Get("Content-Type")) + } + return nil +} + +// ParseOPML 解析OPML文件,返回RSS源列表 +func ParseOPML(opmlURL string) ([]OPMLFeedInfo, error) { + //确认大小 + resp, err := http.Head(opmlURL) + if err != nil { + return nil, err + } + if resp.StatusCode != 200 { + return nil, fmt.Errorf("OPML文件无效: %d", resp.StatusCode) + } + if resp.ContentLength == 0 || resp.ContentLength > 1024*1024*10 { + return nil, fmt.Errorf("OPML文件的大小为%d,超出限制", resp.ContentLength) + } + + //获取OPML数据 + resp, err = http.Get(opmlURL) + if err != nil { + return nil, err + } + defer resp.Body.Close() + + // 读取响应体内容 + body, err := io.ReadAll(resp.Body) + if err != nil { + return nil, fmt.Errorf("读取OPML数据失败: %v", err) + } + + return ParseOPMLFormat(body) +} + +// ParseOPMLFormat 解析OPML格式数据 +func ParseOPMLFormat(data []byte) ([]OPMLFeedInfo, error) { + var opml OPML + decoder := xml.NewDecoder(strings.NewReader(string(data))) + decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { + // 处理不同的字符编码 + switch charset { + case "GB2312", "GBK", "GB18030": + // 如果需要处理中文编码,可以在这里添加转换逻辑 + return input, nil + default: + return input, nil + } + } + + if err := decoder.Decode(&opml); err != nil { + return nil, fmt.Errorf("解析OPML数据失败: %v", err) + } + + var feedInfos []OPMLFeedInfo + extractFeeds(opml.Body.Outlines, "", &feedInfos) + + if len(feedInfos) == 0 { + return nil, errors.New("未在OPML文件中找到RSS源") + } + + return feedInfos, nil +} + +// extractFeeds 递归提取RSS源信息 +func extractFeeds(outlines []OPMLOutline, category string, feedInfos *[]OPMLFeedInfo) { + for _, outline := range outlines { + // 如果有xmlUrl,说明这是一个RSS源 + if outline.XMLURL != "" { + title := outline.Title + if title == "" { + title = outline.Text + } + + feedInfo := OPMLFeedInfo{ + Title: title, + XMLURL: outline.XMLURL, + HTMLURL: outline.HTMLURL, + Description: outline.Description, + Category: category, + } + *feedInfos = append(*feedInfos, feedInfo) + } else if len(outline.Outlines) > 0 { + // 如果没有xmlUrl但有子outline,说明这是一个分类 + categoryName := outline.Title + if categoryName == "" { + categoryName = outline.Text + } + + // 递归处理子outline + extractFeeds(outline.Outlines, categoryName, feedInfos) + } + } +} diff --git a/handler/rss/rss.go b/handler/rss/rss.go index 9427036..0631d16 100644 --- a/handler/rss/rss.go +++ b/handler/rss/rss.go @@ -24,6 +24,8 @@ func init() { handler.RegisterHelpInform("@我 我的订阅", "rss", "查看我的订阅") handler.RegisterAtHandler("退订", Unsubscribe, constants.LEVEL_USER) handler.RegisterHelpInform("@我 退订 ", "rss", "退订rss源") + handler.RegisterAtHandler("导入OPML", ImportOPML, constants.LEVEL_USER) + handler.RegisterHelpInform("@我 导入OPML ", "rss", "从OPML文件导入RSS源") //test handler.RegisterHandler("test_rss", TestRss, constants.LEVEL_ADMIN) } @@ -186,3 +188,90 @@ func Unsubscribe(msg model.Message) (reply *model.Reply) { FromMsg: msg, } } + +// ImportOPML 导入OPML文件中的RSS源 +func ImportOPML(msg model.Message) (reply *model.Reply) { + //提取OPML文件URL + var opmlURL string + for _, data := range msg.StructuredMsg { + if data.GetMessageType() == "text" { + // 匹配OPML文件链接 + urls := regexp.MustCompile(`(?i)(?:https?://)?(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(?:/[^\s]*)?(?:\.opml)?\b`).FindAllString(data.(*qq_message.TextMessage).Data.Text, -1) + if len(urls) > 0 { + opmlURL = urls[0] + break + } + } + } + + if opmlURL == "" { + return &model.Reply{ + ReplyMsg: "请提供有效的OPML文件链接", + ReferOriginMsg: true, + FromMsg: msg, + } + } + + // 确保URL有协议前缀 + if !regexp.MustCompile(`^https?://`).MatchString(opmlURL) { + opmlURL = "https://" + opmlURL + } + + // 检查OPML文件是否有效 + if err := CheckOPMLFile(opmlURL); err != nil { + return &model.Reply{ + ReplyMsg: fmt.Sprintf("OPML文件无效: %v", err), + ReferOriginMsg: true, + FromMsg: msg, + } + } + + // 解析OPML文件 + feedInfos, err := ParseOPML(opmlURL) + if err != nil { + return &model.Reply{ + ReplyMsg: fmt.Sprintf("解析OPML文件失败: %v", err), + ReferOriginMsg: true, + FromMsg: msg, + } + } + + // 批量订阅RSS源 + var successCount int + var failedFeeds []string + var successFeeds []string + + for _, feedInfo := range feedInfos { + if feedInfo.XMLURL != "" { + title, err := SubscribeToFeed(feedInfo.XMLURL, msg.UserId, msg.GroupInfo.GroupId) + if err != nil { + failedFeeds = append(failedFeeds, fmt.Sprintf("%s: %v", feedInfo.Title, err)) + } else { + successCount++ + if feedInfo.Category != "" { + successFeeds = append(successFeeds, fmt.Sprintf("[%s] %s", feedInfo.Category, title)) + } else { + successFeeds = append(successFeeds, title) + } + } + } + } + + // 构建回复消息 + replyMsg := fmt.Sprintf("OPML导入完成!\n成功订阅 %d 个RSS源", successCount) + + if len(successFeeds) > 0 { + replyMsg += ":\n" + strings.Join(successFeeds, "\n") + } + + if len(failedFeeds) > 0 { + replyMsg += fmt.Sprintf("\n\n失败 %d 个:\n", len(failedFeeds)) + replyMsg += strings.Join(failedFeeds, "\n") + } + + return &model.Reply{ + ReplyMsg: replyMsg, + ReferOriginMsg: true, + FromMsg: msg, + } +}