qq_bot/util/url.go
2024-11-08 00:43:06 +08:00

75 lines
1.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package util
import (
"fmt"
"io"
"net/http"
"net/url"
"os"
"strings"
)
// isEquivalentURL 判断两个 URL 是否在规范化后相同
func IsEquivalentURL(url1, url2 string) bool {
norm1 := normalizeURL(url1)
norm2 := normalizeURL(url2)
return norm1 == norm2
}
// normalizeURL 规范化 URL移除末尾斜杠、协议标准化、移除 index.html
func normalizeURL(rawURL string) string {
u, err := url.Parse(rawURL)
if err != nil {
return rawURL
}
// 将 http 和 https 视为同一种协议
u.Scheme = "https"
// 移除尾部的 /index.html 或 .html
u.Path = strings.TrimSuffix(u.Path, "/index.html")
u.Path = strings.TrimSuffix(u.Path, ".html")
// 移除末尾的 /
u.Path = strings.TrimRight(u.Path, "/")
return u.String()
}
func DownloadFile(url string, filepath string) error {
// 发送 HTTP GET 请求
// resp, err := http.Get(url)
var resp *http.Response
var err error
var maxRetry = 100
var retry = 0
for resp, err = http.Get(url); err != nil && retry < maxRetry; resp, err = http.Get(url) {
fmt.Println("下载失败,正在重试...")
retry++
}
if err != nil {
return fmt.Errorf("下载失败: %v", err)
}
defer resp.Body.Close()
// 检查 HTTP 响应状态码
if resp.StatusCode != http.StatusOK {
return fmt.Errorf("请求失败,状态码: %d", resp.StatusCode)
}
// 创建文件
out, err := os.Create(filepath)
if err != nil {
return fmt.Errorf("创建文件失败: %v", err)
}
defer out.Close()
// 将响应的内容复制到文件
_, err = io.Copy(out, resp.Body)
if err != nil {
return fmt.Errorf("保存失败: %v", err)
}
return nil
}