154 lines
3.9 KiB
Go
154 lines
3.9 KiB
Go
package urlparser
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"git.lxtend.com/qqbot/handler"
|
|
"git.lxtend.com/qqbot/model"
|
|
"git.lxtend.com/qqbot/util"
|
|
)
|
|
|
|
func init() {
|
|
handler.RegisterFrontMatchHandler("[CQ:json,data=", cqJsonUrlParser)
|
|
handler.RegisterFrontMatchHandler("http://", plainTextUrlParser)
|
|
handler.RegisterFrontMatchHandler("https://", plainTextUrlParser)
|
|
}
|
|
|
|
func plainTextUrlParser(msg model.Message) (reply model.Reply) {
|
|
url := msg.RawMsg
|
|
url = strings.Split(url, " ")[0]
|
|
url = strings.Split(url, "\n")[0]
|
|
url = strings.Split(url, "\r")[0]
|
|
url = strings.Split(url, "\t")[0]
|
|
url, _ = removeTrackingParams(url)
|
|
newUrl, err := resolveFinalURL(url)
|
|
if err != nil {
|
|
return model.Reply{
|
|
ReplyMsg: "",
|
|
ReferOriginMsg: false,
|
|
FromMsg: msg,
|
|
}
|
|
}
|
|
newUrl, _ = removeTrackingParams(newUrl)
|
|
if util.IsEquivalentURL(url, newUrl) {
|
|
return model.Reply{
|
|
ReplyMsg: "",
|
|
ReferOriginMsg: false,
|
|
FromMsg: msg,
|
|
}
|
|
}
|
|
return model.Reply{
|
|
ReplyMsg: newUrl,
|
|
ReferOriginMsg: true,
|
|
FromMsg: msg,
|
|
}
|
|
}
|
|
|
|
func cqJsonUrlParser(msg model.Message) (reply model.Reply) {
|
|
qqdocurl, err := extractQQDocURL(msg.RawMsg)
|
|
if err != nil {
|
|
return model.Reply{
|
|
ReplyMsg: "",
|
|
ReferOriginMsg: true,
|
|
FromMsg: msg,
|
|
}
|
|
}
|
|
|
|
return model.Reply{
|
|
ReplyMsg: qqdocurl,
|
|
ReferOriginMsg: true,
|
|
FromMsg: msg,
|
|
}
|
|
}
|
|
|
|
func extractQQDocURL(input string) (string, error) {
|
|
// 使用正则表达式提取 JSON 数据部分
|
|
re := regexp.MustCompile(`\{.*\}`)
|
|
jsonPart := re.FindString(input)
|
|
if jsonPart == "" {
|
|
return "", fmt.Errorf("无法找到 JSON 数据部分")
|
|
}
|
|
|
|
// 解析 JSON 数据
|
|
var jsonData map[string]interface{}
|
|
// 替换 HTML 实体为普通字符
|
|
jsonPart = strings.ReplaceAll(jsonPart, ",", ",")
|
|
jsonPart = strings.ReplaceAll(jsonPart, "[", "[")
|
|
jsonPart = strings.ReplaceAll(jsonPart, "]", "]")
|
|
jsonPart = strings.ReplaceAll(jsonPart, "&", "&")
|
|
|
|
if err := json.Unmarshal([]byte(jsonPart), &jsonData); err != nil {
|
|
return "", fmt.Errorf("解析 JSON 失败: %w", err)
|
|
}
|
|
|
|
// 定位到 meta -> detail_1 -> qqdocurl
|
|
meta, ok := jsonData["meta"].(map[string]interface{})
|
|
if !ok {
|
|
return "", fmt.Errorf("找不到 meta 字段")
|
|
}
|
|
detail, ok := meta["detail_1"].(map[string]interface{})
|
|
if !ok {
|
|
return "", fmt.Errorf("找不到 detail_1 字段")
|
|
}
|
|
qqdocurl, ok := detail["qqdocurl"].(string)
|
|
if !ok {
|
|
return "", fmt.Errorf("找不到 qqdocurl 字段")
|
|
}
|
|
|
|
qqdocurl, _ = removeTrackingParams(qqdocurl)
|
|
qqdocurl, _ = resolveFinalURL(qqdocurl)
|
|
qqdocurl, _ = removeTrackingParams(qqdocurl)
|
|
|
|
return qqdocurl, nil
|
|
}
|
|
|
|
func removeTrackingParams(rawURL string) (string, error) {
|
|
parsedURL, err := url.Parse(rawURL)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
// 仅保留 URL 的 Scheme 和 Host + Path 部分
|
|
return fmt.Sprintf("%s://%s%s", parsedURL.Scheme, parsedURL.Host, parsedURL.Path), nil
|
|
}
|
|
|
|
func resolveFinalURL(initialURL string) (string, error) {
|
|
// 解析 URL 确保其格式正确
|
|
parsedURL, err := url.Parse(initialURL)
|
|
if err != nil {
|
|
return "", fmt.Errorf("URL 解析失败: %w", err)
|
|
}
|
|
|
|
// 创建一个 HTTP 客户端
|
|
client := &http.Client{
|
|
// 禁用自动重定向,以便手动处理 302
|
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
|
return http.ErrUseLastResponse
|
|
},
|
|
}
|
|
|
|
// 发起 GET 请求
|
|
resp, err := client.Get(parsedURL.String())
|
|
if err != nil {
|
|
return "", fmt.Errorf("请求失败: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// 如果是 302 重定向,则递归访问新链接
|
|
if resp.StatusCode == http.StatusFound || resp.StatusCode == http.StatusMovedPermanently {
|
|
redirectURL, err := resp.Location()
|
|
if err != nil {
|
|
return "", fmt.Errorf("解析重定向地址失败: %w", err)
|
|
}
|
|
fmt.Printf("重定向至: %s\n", redirectURL.String())
|
|
return resolveFinalURL(redirectURL.String())
|
|
}
|
|
|
|
// 返回最终的非 302 链接
|
|
return initialURL, nil
|
|
}
|