package rss import ( "context" "crypto/md5" "encoding/xml" "errors" "fmt" "io" "log" "net" "net/http" "net/url" "sort" "strings" "time" "git.lxtend.com/lixiangwuxian/qqbot/config" "golang.org/x/net/proxy" ) // createHTTPClient 创建支持代理的HTTP客户端 func createHTTPClient() *http.Client { client := &http.Client{ Timeout: 30 * time.Second, } // 创建强制使用IPv4的DialContext dialContext := func(ctx context.Context, network, addr string) (net.Conn, error) { // 强制使用IPv4 if network == "tcp" { network = "tcp4" } dialer := &net.Dialer{ Timeout: 30 * time.Second, KeepAlive: 30 * time.Second, } return dialer.DialContext(ctx, network, addr) } // 检查是否配置了代理 if config.ConfigManager.GetConfig().Management.ProxyAddr != "" { proxyURL, err := url.Parse(config.ConfigManager.GetConfig().Management.ProxyAddr) if err != nil { log.Printf("解析代理地址失败: %v\n", err) // 即使代理配置失败,也要使用IPv4 client.Transport = &http.Transport{ DialContext: dialContext, } return client } if proxyURL.Scheme == "socks5" { log.Printf("使用SOCKS5代理: %s\n", proxyURL.Host) dialer, err := proxy.SOCKS5("tcp", proxyURL.Host, nil, proxy.Direct) if err == nil { client.Transport = &http.Transport{ Dial: func(network, addr string) (net.Conn, error) { // 强制使用IPv4 if network == "tcp" { network = "tcp4" } return dialer.Dial(network, addr) }, } } else { log.Printf("创建SOCKS5代理失败: %v\n", err) // 代理失败时使用IPv4直连 client.Transport = &http.Transport{ DialContext: dialContext, } } } else { log.Printf("使用HTTP/HTTPS代理: %s\n", proxyURL.Host) client.Transport = &http.Transport{ Proxy: http.ProxyURL(proxyURL), DialContext: dialContext, } } } else { // 没有代理时使用IPv4直连 client.Transport = &http.Transport{ DialContext: dialContext, } } return client } func CheckRssFeed(feedURL string) error { //确认返回头 client := createHTTPClient() resp, err := client.Head(feedURL) if err != nil { return err } if resp.StatusCode != 200 { return fmt.Errorf("RSS源无效: %d", resp.StatusCode) } contentType := resp.Header.Get("Content-Type") // 支持多种RSS/Atom的Content-Type validContentTypes := []string{ "application/rss+xml", "application/atom+xml", "application/xml", "text/xml", } isValid := false for _, validType := range validContentTypes { if strings.Contains(contentType, validType) { isValid = true break } } if !isValid { return fmt.Errorf("RSS源无效: %s", resp.Header.Get("Content-Type")) } return nil } func ParseFeed(feedURL string) (string, []RssItem, error) { //确认大小 // resp, err := http.Head(feedURL) // if err != nil { // return "", nil, err // } // if resp.StatusCode != 200 { // return "", nil, fmt.Errorf("RSS源无效: %d", resp.StatusCode) // } // if resp.ContentLength == 0 || resp.ContentLength > 1024*1024*10 { // return "", nil, fmt.Errorf("RSS源的大小为%d,超出限制", resp.ContentLength) // } //获取rss数据 client := createHTTPClient() resp, err := client.Get(feedURL) if err != nil { return "", nil, err } defer resp.Body.Close() // 读取响应体内容 body, err := io.ReadAll(resp.Body) if err != nil { return "", nil, fmt.Errorf("读取RSS数据失败: %v", err) } // 首先尝试解析为RSS格式 title, items, err := parseRSSFormat(body) if err == nil { return title, items, nil } // 如果RSS格式解析失败,尝试解析为Atom格式 title, items, err = parseAtomFormat(body) if err != nil { return "", nil, fmt.Errorf("解析RSS/Atom数据失败: %v", err) } if len(items) == 0 { return title, nil, errors.New("未解析到rss信息") } return title, items, nil } // 解析RSS格式 func parseRSSFormat(data []byte) (string, []RssItem, error) { var rssFeed RSSFeed decoder := xml.NewDecoder(strings.NewReader(string(data))) decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { // 处理不同的字符编码 switch charset { case "GB2312", "GBK", "GB18030": // 如果需要处理中文编码,可以在这里添加转换逻辑 return input, nil default: return input, nil } } if err := decoder.Decode(&rssFeed); err != nil { return "", nil, err } // 转换为RssItem数组 var items []RssItem for _, item := range rssFeed.Channel.Items { rssItem := RssItem{ Title: item.Title, Link: item.Link, Description: item.Description, GUID: item.GUID, Author: item.Author, Category: item.Category, } // 解析发布时间 if item.PubDate != "" { rssItem.PubDate = parseTimeString(item.PubDate) } // 生成标题哈希值 content := fmt.Sprintf("%s%s", item.Title, item.Link) hash := md5.Sum([]byte(content)) rssItem.Hash = fmt.Sprintf("%x", hash) items = append(items, rssItem) } //按时间降序排序 sort.Slice(items, func(i, j int) bool { return items[i].PubDate.After(items[j].PubDate) }) return rssFeed.Channel.Title, items, nil } // 解析Atom格式 func parseAtomFormat(data []byte) (string, []RssItem, error) { var atomFeed AtomFeed decoder := xml.NewDecoder(strings.NewReader(string(data))) decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { // 处理不同的字符编码 switch charset { case "GB2312", "GBK", "GB18030": // 如果需要处理中文编码,可以在这里添加转换逻辑 return input, nil default: return input, nil } } if err := decoder.Decode(&atomFeed); err != nil { return "", nil, err } // 转换为RssItem数组 var items []RssItem for _, entry := range atomFeed.Entries { rssItem := RssItem{ Title: entry.Title, GUID: entry.ID, Author: entry.Author.Name, } // 获取链接 for _, link := range entry.Link { if link.Rel == "" || link.Rel == "alternate" { rssItem.Link = link.Href break } } // 获取描述内容 if entry.Content.Value != "" { rssItem.Description = entry.Content.Value } else if entry.Summary != "" { rssItem.Description = entry.Summary } // 获取分类 if len(entry.Category) > 0 { rssItem.Category = entry.Category[0].Term } // 解析发布时间 timeStr := entry.Published if timeStr == "" { timeStr = entry.Updated } if timeStr != "" { rssItem.PubDate = parseTimeString(timeStr) } // 生成内容哈希值 content := fmt.Sprintf("%s%s", rssItem.Title, rssItem.Link) hash := md5.Sum([]byte(content)) rssItem.Hash = fmt.Sprintf("%x", hash) items = append(items, rssItem) } //按时间降序排序 sort.Slice(items, func(i, j int) bool { return items[i].PubDate.After(items[j].PubDate) }) return atomFeed.Title, items, nil } // 解析时间字符串的辅助函数 func parseTimeString(timeStr string) time.Time { // 尝试多种时间格式 timeFormats := []string{ time.RFC3339, // "2006-01-02T15:04:05Z07:00" (ISO 8601, Atom常用) time.RFC3339Nano, // "2006-01-02T15:04:05.999999999Z07:00" time.RFC1123, // "Mon, 02 Jan 2006 15:04:05 MST" (RSS常用) time.RFC1123Z, // "Mon, 02 Jan 2006 15:04:05 -0700" time.RFC822, // "02 Jan 06 15:04 MST" time.RFC822Z, // "02 Jan 06 15:04 -0700" "2006-01-02T15:04:05Z", // UTC时间 "2006-01-02T15:04:05.000Z", // 带毫秒的UTC时间 "2006-01-02 15:04:05", // 简单格式 } for _, format := range timeFormats { if parsedTime, err := time.Parse(format, timeStr); err == nil { return parsedTime } } // 如果所有格式都失败,返回零时间 return time.Time{} } // CheckOPMLFile 检查OPML文件是否有效 func CheckOPMLFile(opmlURL string) error { //确认返回头 client := createHTTPClient() resp, err := client.Head(opmlURL) if err != nil { return err } if resp.StatusCode != 200 { return fmt.Errorf("OPML文件无效: %d", resp.StatusCode) } contentType := resp.Header.Get("Content-Type") // 支持多种OPML的Content-Type validContentTypes := []string{ "application/xml", "text/xml", "text/x-opml", "application/x-opml+xml", } isValid := false for _, validType := range validContentTypes { if strings.Contains(contentType, validType) { isValid = true break } } if !isValid { return fmt.Errorf("OPML文件无效: %s", resp.Header.Get("Content-Type")) } return nil } // ParseOPML 解析OPML文件,返回RSS源列表 func ParseOPML(opmlURL string) ([]OPMLFeedInfo, error) { //确认大小 client := createHTTPClient() resp, err := client.Head(opmlURL) if err != nil { return nil, err } if resp.StatusCode != 200 { return nil, fmt.Errorf("OPML文件无效: %d", resp.StatusCode) } if resp.ContentLength == 0 || resp.ContentLength > 1024*1024*10 { return nil, fmt.Errorf("OPML文件的大小为%d,超出限制", resp.ContentLength) } //获取OPML数据 resp, err = client.Get(opmlURL) if err != nil { return nil, err } defer resp.Body.Close() // 读取响应体内容 body, err := io.ReadAll(resp.Body) if err != nil { return nil, fmt.Errorf("读取OPML数据失败: %v", err) } return ParseOPMLFormat(body) } // ParseOPMLFormat 解析OPML格式数据 func ParseOPMLFormat(data []byte) ([]OPMLFeedInfo, error) { var opml OPML decoder := xml.NewDecoder(strings.NewReader(string(data))) decoder.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) { // 处理不同的字符编码 switch charset { case "GB2312", "GBK", "GB18030": // 如果需要处理中文编码,可以在这里添加转换逻辑 return input, nil default: return input, nil } } if err := decoder.Decode(&opml); err != nil { return nil, fmt.Errorf("解析OPML数据失败: %v", err) } var feedInfos []OPMLFeedInfo extractFeeds(opml.Body.Outlines, "", &feedInfos) if len(feedInfos) == 0 { return nil, errors.New("未在OPML文件中找到RSS源") } return feedInfos, nil } // extractFeeds 递归提取RSS源信息 func extractFeeds(outlines []OPMLOutline, category string, feedInfos *[]OPMLFeedInfo) { for _, outline := range outlines { // 如果有xmlUrl,说明这是一个RSS源 if outline.XMLURL != "" { title := outline.Title if title == "" { title = outline.Text } feedInfo := OPMLFeedInfo{ Title: title, XMLURL: outline.XMLURL, HTMLURL: outline.HTMLURL, Description: outline.Description, Category: category, } *feedInfos = append(*feedInfos, feedInfo) } else if len(outline.Outlines) > 0 { // 如果没有xmlUrl但有子outline,说明这是一个分类 categoryName := outline.Title if categoryName == "" { categoryName = outline.Text } // 递归处理子outline extractFeeds(outline.Outlines, categoryName, feedInfos) } } }