qq_bot/handler/rss/parse.go

112 lines
2.7 KiB
Go

package rss
import (
"crypto/md5"
"encoding/xml"
"fmt"
"io"
"net/http"
"sort"
"time"
)
func CheckRssFeed(feedURL string) error {
//确认返回头
resp, err := http.Head(feedURL)
if err != nil {
return err
}
if resp.StatusCode != 200 {
return fmt.Errorf("RSS源无效: %d", resp.StatusCode)
}
if resp.Header.Get("Content-Type") != "application/rss+xml" {
return fmt.Errorf("RSS源无效: %s", resp.Header.Get("Content-Type"))
}
return nil
}
func ParseRssFeed(feedURL string) ([]RssItem, error) {
//确认大小
resp, err := http.Head(feedURL)
if err != nil {
return nil, err
}
if resp.StatusCode != 200 {
return nil, fmt.Errorf("RSS源无效: %d", resp.StatusCode)
}
if resp.ContentLength == 0 || resp.ContentLength > 1024*1024*10 {
return nil, fmt.Errorf("RSS源的大小为%d,超出限制", resp.ContentLength)
}
//获取rss数据
resp, err = http.Get(feedURL)
if err != nil {
return nil, err
}
defer resp.Body.Close()
//解析rss数据
var rssFeed RSSFeed
decoder := xml.NewDecoder(resp.Body)
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
// 处理不同的字符编码
switch charset {
case "GB2312", "GBK", "GB18030":
// 如果需要处理中文编码,可以在这里添加转换逻辑
return input, nil
default:
return input, nil
}
}
if err := decoder.Decode(&rssFeed); err != nil {
return nil, fmt.Errorf("解析RSS数据失败: %v", err)
}
// 转换为RssItem数组
var items []RssItem
for _, item := range rssFeed.Channel.Items {
rssItem := RssItem{
Title: item.Title,
Link: item.Link,
Description: item.Description,
GUID: item.GUID,
Author: item.Author,
Category: item.Category,
}
// 解析发布时间
if item.PubDate != "" {
// 尝试多种时间格式
timeFormats := []string{
time.RFC1123, // "Mon, 02 Jan 2006 15:04:05 MST"
time.RFC1123Z, // "Mon, 02 Jan 2006 15:04:05 -0700"
time.RFC822, // "02 Jan 06 15:04 MST"
time.RFC822Z, // "02 Jan 06 15:04 -0700"
"2006-01-02T15:04:05Z07:00", // ISO 8601
"2006-01-02 15:04:05", // 简单格式
}
for _, format := range timeFormats {
if parsedTime, err := time.Parse(format, item.PubDate); err == nil {
rssItem.PubDate = parsedTime
break
}
}
}
// 生成内容哈希值
content := fmt.Sprintf("%s%s%s", item.Title, item.Link, item.Description)
hash := md5.Sum([]byte(content))
rssItem.Hash = fmt.Sprintf("%x", hash)
items = append(items, rssItem)
}
//按时间降序排序
sort.Slice(items, func(i, j int) bool {
return items[i].PubDate.Before(items[j].PubDate)
})
return items, nil
}