qq_bot/util/url.go
2024-10-13 15:18:43 +08:00

34 lines
732 B
Go
Raw Blame History

This file contains ambiguous Unicode characters

This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.

package util
import (
"net/url"
"strings"
)
// isEquivalentURL 判断两个 URL 是否在规范化后相同
func IsEquivalentURL(url1, url2 string) bool {
norm1 := normalizeURL(url1)
norm2 := normalizeURL(url2)
return norm1 == norm2
}
// normalizeURL 规范化 URL移除末尾斜杠、协议标准化、移除 index.html
func normalizeURL(rawURL string) string {
u, err := url.Parse(rawURL)
if err != nil {
return rawURL
}
// 将 http 和 https 视为同一种协议
u.Scheme = "https"
// 移除尾部的 /index.html 或 .html
u.Path = strings.TrimSuffix(u.Path, "/index.html")
u.Path = strings.TrimSuffix(u.Path, ".html")
// 移除末尾的 /
u.Path = strings.TrimRight(u.Path, "/")
return u.String()
}