qq_bot/util/url.go
2025-04-16 00:44:01 +08:00

148 lines
3.6 KiB
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package util
import (
"bytes"
"fmt"
"io"
"log"
"net/url"
"os"
"path"
"strings"
"time"
"github.com/google/uuid"
"github.com/valyala/fasthttp"
)
// GetImageExtension 根据文件头检测图片格式并返回对应的扩展名
func GetImageExtension(data []byte) string {
if len(data) < 8 {
return ".jpg" // 默认返回jpg
}
// 检查文件头Magic Numbers
switch {
case bytes.HasPrefix(data, []byte{0x89, 0x50, 0x4E, 0x47}):
return ".png"
case bytes.HasPrefix(data, []byte{0xFF, 0xD8, 0xFF}):
return ".jpg"
case bytes.HasPrefix(data, []byte{0x47, 0x49, 0x46}):
return ".gif"
case bytes.HasPrefix(data, []byte{0x42, 0x4D}):
return ".bmp"
case bytes.HasPrefix(data, []byte{0x52, 0x49, 0x46, 0x46}) && bytes.Contains(data[0:12], []byte("WEBP")):
return ".webp"
default:
return ".jpg" // 默认返回jpg
}
}
// isEquivalentURL 判断两个 URL 是否在规范化后相同
func IsEquivalentURL(url1, url2 string) bool {
norm1 := normalizeURL(url1)
norm2 := normalizeURL(url2)
return norm1 == norm2
}
// normalizeURL 规范化 URL移除末尾斜杠、协议标准化、移除 index.html
func normalizeURL(rawURL string) string {
u, err := url.Parse(rawURL)
if err != nil {
return rawURL
}
// 将 http 和 https 视为同一种协议
u.Scheme = "https"
// 使用循环持续移除后缀直到Path不再变化
for {
oldPath := u.Path
// 移除尾部的 /index.html 或 .html
u.Path = strings.TrimSuffix(u.Path, "/index.html")
u.Path = strings.TrimSuffix(u.Path, ".html")
// 移除末尾的 /
u.Path = strings.TrimRight(u.Path, "/")
// 如果路径不再变化,则退出循环
if oldPath == u.Path {
break
}
}
return u.String()
}
// DownloadFile 下载文件到指定目录,返回带有正确扩展名的完整文件路径
func DownloadFile(urlStr string, dirPath string, skipExist bool) (filepath string, err error) {
// 创建fasthttp客户端
client := &fasthttp.Client{
ReadTimeout: 30 * time.Second,
WriteTimeout: 30 * time.Second,
MaxConnWaitTimeout: 30 * time.Second,
TLSConfig: nil,
}
// 准备请求
req := fasthttp.AcquireRequest()
defer fasthttp.ReleaseRequest(req)
req.SetRequestURI(urlStr)
req.Header.SetMethod("GET")
req.Header.Set("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36")
// 准备响应
resp := fasthttp.AcquireResponse()
defer fasthttp.ReleaseResponse(resp)
// 发送请求
var maxRetry = 100
var retry = 0
for err = client.Do(req, resp); err != nil && retry < maxRetry; err = client.Do(req, resp) {
log.Printf("下载失败,正在重试... 错误: %v\n", err)
retry++
}
if err != nil {
return "", fmt.Errorf("下载失败: %v", err)
}
// 检查响应状态码
if resp.StatusCode() != fasthttp.StatusOK {
return "", fmt.Errorf("请求失败,状态码: %d", resp.StatusCode())
}
// 获取响应体
bodyData := resp.Body()
// 获取正确的文件扩展名
ext := GetImageExtension(bodyData)
// 生成随机文件名
fileName := uuid.New().String() + ext
// 确保目录存在
if err := os.MkdirAll(dirPath, 0755); err != nil {
return "", fmt.Errorf("创建目录失败: %v", err)
}
// 构建完整的文件路径
filepath = path.Join(dirPath, fileName)
// 创建文件
out, err := os.Create(filepath)
if err != nil {
return "", fmt.Errorf("创建文件失败: %v", err)
}
defer out.Close()
// 将内容写入文件
_, err = io.Copy(out, bytes.NewReader(bodyData))
if err != nil {
return "", fmt.Errorf("保存失败: %v", err)
}
return filepath, nil
}