feat: 添加 OPML 文件解析功能,支持从 OPML 文件导入 RSS 源,并增强 OPML 文件有效性检查
This commit is contained in:
parent
b13f9158c4
commit
3c5faddece
@ -107,3 +107,53 @@ type AtomContent struct {
|
||||
type AtomCategory struct {
|
||||
Term string `xml:"term,attr"`
|
||||
}
|
||||
|
||||
// OPML文件结构体,用于解析OPML XML
|
||||
type OPML struct {
|
||||
XMLName string `xml:"opml"`
|
||||
Version string `xml:"version,attr"`
|
||||
Head OPMLHead `xml:"head"`
|
||||
Body OPMLBody `xml:"body"`
|
||||
}
|
||||
|
||||
// OPML Head结构体
|
||||
type OPMLHead struct {
|
||||
Title string `xml:"title"`
|
||||
DateCreated string `xml:"dateCreated"`
|
||||
DateModified string `xml:"dateModified"`
|
||||
OwnerName string `xml:"ownerName"`
|
||||
OwnerEmail string `xml:"ownerEmail"`
|
||||
ExpansionState string `xml:"expansionState"`
|
||||
VertScrollState string `xml:"vertScrollState"`
|
||||
WindowTop string `xml:"windowTop"`
|
||||
WindowLeft string `xml:"windowLeft"`
|
||||
WindowBottom string `xml:"windowBottom"`
|
||||
WindowRight string `xml:"windowRight"`
|
||||
}
|
||||
|
||||
// OPML Body结构体
|
||||
type OPMLBody struct {
|
||||
Outlines []OPMLOutline `xml:"outline"`
|
||||
}
|
||||
|
||||
// OPML Outline结构体,表示一个RSS源或分类
|
||||
type OPMLOutline struct {
|
||||
Text string `xml:"text,attr"`
|
||||
Title string `xml:"title,attr"`
|
||||
Type string `xml:"type,attr"`
|
||||
XMLURL string `xml:"xmlUrl,attr"`
|
||||
HTMLURL string `xml:"htmlUrl,attr"`
|
||||
Description string `xml:"description,attr"`
|
||||
Language string `xml:"language,attr"`
|
||||
Version string `xml:"version,attr"`
|
||||
Outlines []OPMLOutline `xml:"outline"`
|
||||
}
|
||||
|
||||
// OPML解析结果
|
||||
type OPMLFeedInfo struct {
|
||||
Title string `json:"title"` // RSS源标题
|
||||
XMLURL string `json:"xml_url"` // RSS源XML地址
|
||||
HTMLURL string `json:"html_url"` // RSS源HTML地址
|
||||
Description string `json:"description"` // RSS源描述
|
||||
Category string `json:"category"` // 分类名称
|
||||
}
|
||||
|
@ -125,8 +125,8 @@ func parseRSSFormat(data []byte) (string, []RssItem, error) {
|
||||
rssItem.PubDate = parseTimeString(item.PubDate)
|
||||
}
|
||||
|
||||
// 生成内容哈希值
|
||||
content := fmt.Sprintf("%s%s%s", item.Title, item.Link, item.Description)
|
||||
// 生成标题哈希值
|
||||
content := fmt.Sprintf("%s%s", item.Title, item.Link)
|
||||
hash := md5.Sum([]byte(content))
|
||||
rssItem.Hash = fmt.Sprintf("%x", hash)
|
||||
|
||||
@ -199,7 +199,7 @@ func parseAtomFormat(data []byte) (string, []RssItem, error) {
|
||||
}
|
||||
|
||||
// 生成内容哈希值
|
||||
content := fmt.Sprintf("%s%s%s", rssItem.Title, rssItem.Link, rssItem.Description)
|
||||
content := fmt.Sprintf("%s%s", rssItem.Title, rssItem.Link)
|
||||
hash := md5.Sum([]byte(content))
|
||||
rssItem.Hash = fmt.Sprintf("%x", hash)
|
||||
|
||||
@ -238,3 +238,126 @@ func parseTimeString(timeStr string) time.Time {
|
||||
// 如果所有格式都失败,返回零时间
|
||||
return time.Time{}
|
||||
}
|
||||
|
||||
// CheckOPMLFile 检查OPML文件是否有效
|
||||
func CheckOPMLFile(opmlURL string) error {
|
||||
//确认返回头
|
||||
resp, err := http.Head(opmlURL)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
return fmt.Errorf("OPML文件无效: %d", resp.StatusCode)
|
||||
}
|
||||
contentType := resp.Header.Get("Content-Type")
|
||||
// 支持多种OPML的Content-Type
|
||||
validContentTypes := []string{
|
||||
"application/xml",
|
||||
"text/xml",
|
||||
"text/x-opml",
|
||||
"application/x-opml+xml",
|
||||
}
|
||||
|
||||
isValid := false
|
||||
for _, validType := range validContentTypes {
|
||||
if strings.Contains(contentType, validType) {
|
||||
isValid = true
|
||||
break
|
||||
}
|
||||
}
|
||||
|
||||
if !isValid {
|
||||
return fmt.Errorf("OPML文件无效: %s", resp.Header.Get("Content-Type"))
|
||||
}
|
||||
return nil
|
||||
}
|
||||
|
||||
// ParseOPML 解析OPML文件,返回RSS源列表
|
||||
func ParseOPML(opmlURL string) ([]OPMLFeedInfo, error) {
|
||||
//确认大小
|
||||
resp, err := http.Head(opmlURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
if resp.StatusCode != 200 {
|
||||
return nil, fmt.Errorf("OPML文件无效: %d", resp.StatusCode)
|
||||
}
|
||||
if resp.ContentLength == 0 || resp.ContentLength > 1024*1024*10 {
|
||||
return nil, fmt.Errorf("OPML文件的大小为%d,超出限制", resp.ContentLength)
|
||||
}
|
||||
|
||||
//获取OPML数据
|
||||
resp, err = http.Get(opmlURL)
|
||||
if err != nil {
|
||||
return nil, err
|
||||
}
|
||||
defer resp.Body.Close()
|
||||
|
||||
// 读取响应体内容
|
||||
body, err := io.ReadAll(resp.Body)
|
||||
if err != nil {
|
||||
return nil, fmt.Errorf("读取OPML数据失败: %v", err)
|
||||
}
|
||||
|
||||
return ParseOPMLFormat(body)
|
||||
}
|
||||
|
||||
// ParseOPMLFormat 解析OPML格式数据
|
||||
func ParseOPMLFormat(data []byte) ([]OPMLFeedInfo, error) {
|
||||
var opml OPML
|
||||
decoder := xml.NewDecoder(strings.NewReader(string(data)))
|
||||
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
|
||||
// 处理不同的字符编码
|
||||
switch charset {
|
||||
case "GB2312", "GBK", "GB18030":
|
||||
// 如果需要处理中文编码,可以在这里添加转换逻辑
|
||||
return input, nil
|
||||
default:
|
||||
return input, nil
|
||||
}
|
||||
}
|
||||
|
||||
if err := decoder.Decode(&opml); err != nil {
|
||||
return nil, fmt.Errorf("解析OPML数据失败: %v", err)
|
||||
}
|
||||
|
||||
var feedInfos []OPMLFeedInfo
|
||||
extractFeeds(opml.Body.Outlines, "", &feedInfos)
|
||||
|
||||
if len(feedInfos) == 0 {
|
||||
return nil, errors.New("未在OPML文件中找到RSS源")
|
||||
}
|
||||
|
||||
return feedInfos, nil
|
||||
}
|
||||
|
||||
// extractFeeds 递归提取RSS源信息
|
||||
func extractFeeds(outlines []OPMLOutline, category string, feedInfos *[]OPMLFeedInfo) {
|
||||
for _, outline := range outlines {
|
||||
// 如果有xmlUrl,说明这是一个RSS源
|
||||
if outline.XMLURL != "" {
|
||||
title := outline.Title
|
||||
if title == "" {
|
||||
title = outline.Text
|
||||
}
|
||||
|
||||
feedInfo := OPMLFeedInfo{
|
||||
Title: title,
|
||||
XMLURL: outline.XMLURL,
|
||||
HTMLURL: outline.HTMLURL,
|
||||
Description: outline.Description,
|
||||
Category: category,
|
||||
}
|
||||
*feedInfos = append(*feedInfos, feedInfo)
|
||||
} else if len(outline.Outlines) > 0 {
|
||||
// 如果没有xmlUrl但有子outline,说明这是一个分类
|
||||
categoryName := outline.Title
|
||||
if categoryName == "" {
|
||||
categoryName = outline.Text
|
||||
}
|
||||
|
||||
// 递归处理子outline
|
||||
extractFeeds(outline.Outlines, categoryName, feedInfos)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -24,6 +24,8 @@ func init() {
|
||||
handler.RegisterHelpInform("@我 我的订阅", "rss", "查看我的订阅")
|
||||
handler.RegisterAtHandler("退订", Unsubscribe, constants.LEVEL_USER)
|
||||
handler.RegisterHelpInform("@我 退订 <id>", "rss", "退订rss源")
|
||||
handler.RegisterAtHandler("导入OPML", ImportOPML, constants.LEVEL_USER)
|
||||
handler.RegisterHelpInform("@我 导入OPML <url>", "rss", "从OPML文件导入RSS源")
|
||||
//test
|
||||
handler.RegisterHandler("test_rss", TestRss, constants.LEVEL_ADMIN)
|
||||
}
|
||||
@ -186,3 +188,90 @@ func Unsubscribe(msg model.Message) (reply *model.Reply) {
|
||||
FromMsg: msg,
|
||||
}
|
||||
}
|
||||
|
||||
// ImportOPML 导入OPML文件中的RSS源
|
||||
func ImportOPML(msg model.Message) (reply *model.Reply) {
|
||||
//提取OPML文件URL
|
||||
var opmlURL string
|
||||
for _, data := range msg.StructuredMsg {
|
||||
if data.GetMessageType() == "text" {
|
||||
// 匹配OPML文件链接
|
||||
urls := regexp.MustCompile(`(?i)(?:https?://)?(?:[a-zA-Z0-9-]+\.)+[a-zA-Z]{2,}(?:/[^\s]*)?(?:\.opml)?\b`).FindAllString(data.(*qq_message.TextMessage).Data.Text, -1)
|
||||
if len(urls) > 0 {
|
||||
opmlURL = urls[0]
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
if opmlURL == "" {
|
||||
return &model.Reply{
|
||||
ReplyMsg: "请提供有效的OPML文件链接",
|
||||
ReferOriginMsg: true,
|
||||
FromMsg: msg,
|
||||
}
|
||||
}
|
||||
|
||||
// 确保URL有协议前缀
|
||||
if !regexp.MustCompile(`^https?://`).MatchString(opmlURL) {
|
||||
opmlURL = "https://" + opmlURL
|
||||
}
|
||||
|
||||
// 检查OPML文件是否有效
|
||||
if err := CheckOPMLFile(opmlURL); err != nil {
|
||||
return &model.Reply{
|
||||
ReplyMsg: fmt.Sprintf("OPML文件无效: %v", err),
|
||||
ReferOriginMsg: true,
|
||||
FromMsg: msg,
|
||||
}
|
||||
}
|
||||
|
||||
// 解析OPML文件
|
||||
feedInfos, err := ParseOPML(opmlURL)
|
||||
if err != nil {
|
||||
return &model.Reply{
|
||||
ReplyMsg: fmt.Sprintf("解析OPML文件失败: %v", err),
|
||||
ReferOriginMsg: true,
|
||||
FromMsg: msg,
|
||||
}
|
||||
}
|
||||
|
||||
// 批量订阅RSS源
|
||||
var successCount int
|
||||
var failedFeeds []string
|
||||
var successFeeds []string
|
||||
|
||||
for _, feedInfo := range feedInfos {
|
||||
if feedInfo.XMLURL != "" {
|
||||
title, err := SubscribeToFeed(feedInfo.XMLURL, msg.UserId, msg.GroupInfo.GroupId)
|
||||
if err != nil {
|
||||
failedFeeds = append(failedFeeds, fmt.Sprintf("%s: %v", feedInfo.Title, err))
|
||||
} else {
|
||||
successCount++
|
||||
if feedInfo.Category != "" {
|
||||
successFeeds = append(successFeeds, fmt.Sprintf("[%s] %s", feedInfo.Category, title))
|
||||
} else {
|
||||
successFeeds = append(successFeeds, title)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// 构建回复消息
|
||||
replyMsg := fmt.Sprintf("OPML导入完成!\n成功订阅 %d 个RSS源", successCount)
|
||||
|
||||
if len(successFeeds) > 0 {
|
||||
replyMsg += ":\n" + strings.Join(successFeeds, "\n")
|
||||
}
|
||||
|
||||
if len(failedFeeds) > 0 {
|
||||
replyMsg += fmt.Sprintf("\n\n失败 %d 个:\n", len(failedFeeds))
|
||||
replyMsg += strings.Join(failedFeeds, "\n")
|
||||
}
|
||||
|
||||
return &model.Reply{
|
||||
ReplyMsg: replyMsg,
|
||||
ReferOriginMsg: true,
|
||||
FromMsg: msg,
|
||||
}
|
||||
}
|
||||
|
Loading…
x
Reference in New Issue
Block a user