112 lines
2.7 KiB
Go
112 lines
2.7 KiB
Go
package rss
|
|
|
|
import (
|
|
"crypto/md5"
|
|
"encoding/xml"
|
|
"fmt"
|
|
"io"
|
|
"net/http"
|
|
"sort"
|
|
"time"
|
|
)
|
|
|
|
func CheckRssFeed(feedURL string) error {
|
|
//确认返回头
|
|
resp, err := http.Head(feedURL)
|
|
if err != nil {
|
|
return err
|
|
}
|
|
if resp.StatusCode != 200 {
|
|
return fmt.Errorf("RSS源无效: %d", resp.StatusCode)
|
|
}
|
|
if resp.Header.Get("Content-Type") != "application/rss+xml" {
|
|
return fmt.Errorf("RSS源无效: %s", resp.Header.Get("Content-Type"))
|
|
}
|
|
return nil
|
|
}
|
|
|
|
func ParseRssFeed(feedURL string) ([]RssItem, error) {
|
|
//确认大小
|
|
resp, err := http.Head(feedURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
if resp.StatusCode != 200 {
|
|
return nil, fmt.Errorf("RSS源无效: %d", resp.StatusCode)
|
|
}
|
|
if resp.ContentLength == 0 || resp.ContentLength > 1024*1024*10 {
|
|
return nil, fmt.Errorf("RSS源的大小为%d,超出限制", resp.ContentLength)
|
|
}
|
|
|
|
//获取rss数据
|
|
resp, err = http.Get(feedURL)
|
|
if err != nil {
|
|
return nil, err
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
//解析rss数据
|
|
var rssFeed RSSFeed
|
|
decoder := xml.NewDecoder(resp.Body)
|
|
decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
|
|
// 处理不同的字符编码
|
|
switch charset {
|
|
case "GB2312", "GBK", "GB18030":
|
|
// 如果需要处理中文编码,可以在这里添加转换逻辑
|
|
return input, nil
|
|
default:
|
|
return input, nil
|
|
}
|
|
}
|
|
|
|
if err := decoder.Decode(&rssFeed); err != nil {
|
|
return nil, fmt.Errorf("解析RSS数据失败: %v", err)
|
|
}
|
|
|
|
// 转换为RssItem数组
|
|
var items []RssItem
|
|
for _, item := range rssFeed.Channel.Items {
|
|
rssItem := RssItem{
|
|
Title: item.Title,
|
|
Link: item.Link,
|
|
Description: item.Description,
|
|
GUID: item.GUID,
|
|
Author: item.Author,
|
|
Category: item.Category,
|
|
}
|
|
|
|
// 解析发布时间
|
|
if item.PubDate != "" {
|
|
// 尝试多种时间格式
|
|
timeFormats := []string{
|
|
time.RFC1123, // "Mon, 02 Jan 2006 15:04:05 MST"
|
|
time.RFC1123Z, // "Mon, 02 Jan 2006 15:04:05 -0700"
|
|
time.RFC822, // "02 Jan 06 15:04 MST"
|
|
time.RFC822Z, // "02 Jan 06 15:04 -0700"
|
|
"2006-01-02T15:04:05Z07:00", // ISO 8601
|
|
"2006-01-02 15:04:05", // 简单格式
|
|
}
|
|
|
|
for _, format := range timeFormats {
|
|
if parsedTime, err := time.Parse(format, item.PubDate); err == nil {
|
|
rssItem.PubDate = parsedTime
|
|
break
|
|
}
|
|
}
|
|
}
|
|
|
|
// 生成内容哈希值
|
|
content := fmt.Sprintf("%s%s%s", item.Title, item.Link, item.Description)
|
|
hash := md5.Sum([]byte(content))
|
|
rssItem.Hash = fmt.Sprintf("%x", hash)
|
|
|
|
items = append(items, rssItem)
|
|
}
|
|
//按时间降序排序
|
|
sort.Slice(items, func(i, j int) bool {
|
|
return items[i].PubDate.Before(items[j].PubDate)
|
|
})
|
|
|
|
return items, nil
|
|
}
|