201 lines
5.4 KiB
Go
201 lines
5.4 KiB
Go
package urlparser
|
|
|
|
import (
|
|
"encoding/json"
|
|
"fmt"
|
|
"log"
|
|
"net/http"
|
|
"net/url"
|
|
"regexp"
|
|
"strings"
|
|
|
|
"git.lxtend.com/qqbot/constants"
|
|
"git.lxtend.com/qqbot/handler"
|
|
"git.lxtend.com/qqbot/model"
|
|
"git.lxtend.com/qqbot/util"
|
|
)
|
|
|
|
func init() {
|
|
handler.RegisterFrontMatchHandler("[CQ:json", cqJsonUrlParser, constants.LEVEL_USER)
|
|
handler.RegisterFrontMatchHandler("http://", plainTextUrlParser, constants.LEVEL_USER)
|
|
handler.RegisterFrontMatchHandler("https://", plainTextUrlParser, constants.LEVEL_USER)
|
|
}
|
|
|
|
func plainTextUrlParser(msg model.Message) (reply *model.Reply) {
|
|
url := msg.RawMsg
|
|
url = strings.Split(url, " ")[0]
|
|
url = strings.Split(url, "\n")[0]
|
|
url = strings.Split(url, "\r")[0]
|
|
url = strings.Split(url, "\t")[0]
|
|
url, _ = removeTrackingParams(url)
|
|
newUrl, err := resolveFinalURL(url)
|
|
if err != nil {
|
|
return &model.Reply{
|
|
ReplyMsg: "",
|
|
ReferOriginMsg: false,
|
|
FromMsg: msg,
|
|
}
|
|
}
|
|
newUrl, _ = removeTrackingParams(newUrl)
|
|
if util.IsEquivalentURL(url, newUrl) {
|
|
return &model.Reply{
|
|
ReplyMsg: "",
|
|
ReferOriginMsg: false,
|
|
FromMsg: msg,
|
|
}
|
|
}
|
|
return &model.Reply{
|
|
ReplyMsg: newUrl,
|
|
ReferOriginMsg: true,
|
|
FromMsg: msg,
|
|
}
|
|
}
|
|
|
|
func cqJsonUrlParser(msg model.Message) (reply *model.Reply) {
|
|
newMsg := strings.ReplaceAll(msg.RawMsg, "\n", "")
|
|
qqdocurl, err := extractQQDocURL(newMsg)
|
|
if err != nil {
|
|
return &model.Reply{
|
|
ReplyMsg: "",
|
|
ReferOriginMsg: true,
|
|
FromMsg: msg,
|
|
}
|
|
}
|
|
|
|
return &model.Reply{
|
|
ReplyMsg: qqdocurl,
|
|
ReferOriginMsg: true,
|
|
FromMsg: msg,
|
|
}
|
|
}
|
|
|
|
// extractQQDocURL 从字符串中提取 JSON 数据部分
|
|
func extractQQDocURL(input string) (string, error) {
|
|
// 使用非贪婪匹配提取 JSON 数据部分
|
|
re := regexp.MustCompile(`\{.*\}`)
|
|
jsonPart := re.FindString(input)
|
|
if jsonPart == "" {
|
|
return "", fmt.Errorf("无法找到 JSON 数据部分")
|
|
}
|
|
|
|
// 替换 HTML 实体为普通字符
|
|
jsonPart = strings.ReplaceAll(jsonPart, ",", ",")
|
|
jsonPart = strings.ReplaceAll(jsonPart, "[", "[")
|
|
jsonPart = strings.ReplaceAll(jsonPart, "]", "]")
|
|
jsonPart = strings.ReplaceAll(jsonPart, "&", "&")
|
|
url, err := parseQQDocURL(jsonPart)
|
|
if err != nil {
|
|
return "", fmt.Errorf("解析 JSON 失败: %w", err)
|
|
}
|
|
return url, nil
|
|
}
|
|
|
|
// parseQQDocURL 从 JSON 中提取 qqdocurl 字段
|
|
func parseQQDocURL(jsonStr string) (string, error) {
|
|
var jsonData map[string]interface{}
|
|
|
|
// 解析 JSON 数据
|
|
if err := json.Unmarshal([]byte(jsonStr), &jsonData); err != nil {
|
|
return "", fmt.Errorf("解析 JSON 失败: %w", err)
|
|
}
|
|
|
|
url := ""
|
|
|
|
if jsonData["app"] == "com.tencent.miniapp_01" { // 定位到 meta -> detail_1 -> qqdocurl
|
|
meta, ok := jsonData["meta"].(map[string]interface{})
|
|
if !ok {
|
|
return "", fmt.Errorf("找不到 meta 字段")
|
|
}
|
|
detail, ok := meta["detail_1"].(map[string]interface{})
|
|
if !ok {
|
|
return "", fmt.Errorf("找不到 detail_1 字段")
|
|
}
|
|
url, ok = detail["qqdocurl"].(string)
|
|
if !ok {
|
|
return "", fmt.Errorf("找不到 qqdocurl 字段")
|
|
}
|
|
url, _ = resolveFinalURL(url)
|
|
url, _ = removeTrackingParams(url)
|
|
} else if jsonData["app"] == "com.tencent.structmsg" { // 定位到 meta -> news -> jumpUrl
|
|
meta, ok := jsonData["meta"].(map[string]interface{})
|
|
if !ok {
|
|
return "", fmt.Errorf("找不到 meta 字段")
|
|
}
|
|
news, ok := meta["news"].(map[string]interface{})
|
|
if !ok {
|
|
return "", fmt.Errorf("找不到 news 字段")
|
|
}
|
|
url, ok = news["jumpUrl"].(string)
|
|
if !ok {
|
|
return "", fmt.Errorf("找不到 jumpUrl 字段")
|
|
}
|
|
} else if jsonData["app"] == "com.tencent.troopsharecard" { // 定位到 meta -> contact -> jumpUrl
|
|
meta, ok := jsonData["meta"].(map[string]interface{})
|
|
if !ok {
|
|
return "", fmt.Errorf("找不到 meta 字段")
|
|
}
|
|
contact, ok := meta["contact"].(map[string]interface{})
|
|
if !ok {
|
|
return "", fmt.Errorf("找不到 contact 字段")
|
|
}
|
|
url, ok = contact["jumpUrl"].(string)
|
|
if !ok {
|
|
return "", fmt.Errorf("找不到 jumpUrl 字段")
|
|
}
|
|
url, _ = resolveFinalURL(url)
|
|
url, _ = removeTrackingParams(url)
|
|
} else {
|
|
return "", fmt.Errorf("未知的 app 类型")
|
|
}
|
|
|
|
return url, nil
|
|
}
|
|
|
|
func removeTrackingParams(rawURL string) (string, error) {
|
|
parsedURL, err := url.Parse(rawURL)
|
|
if err != nil {
|
|
return "", err
|
|
}
|
|
// 仅保留 URL 的 Scheme 和 Host + Path 部分
|
|
return fmt.Sprintf("%s://%s%s", parsedURL.Scheme, parsedURL.Host, parsedURL.Path), nil
|
|
}
|
|
|
|
func resolveFinalURL(initialURL string) (string, error) {
|
|
// 解析 URL 确保其格式正确
|
|
parsedURL, err := url.Parse(initialURL)
|
|
if err != nil {
|
|
return "", fmt.Errorf("URL 解析失败: %w", err)
|
|
}
|
|
|
|
// 创建一个 HTTP 客户端
|
|
client := &http.Client{
|
|
// 禁用自动重定向,以便手动处理 302
|
|
CheckRedirect: func(req *http.Request, via []*http.Request) error {
|
|
return http.ErrUseLastResponse
|
|
},
|
|
}
|
|
|
|
// 发起 GET 请求
|
|
resp, err := client.Get(parsedURL.String())
|
|
if err != nil {
|
|
return "", fmt.Errorf("请求失败: %w", err)
|
|
}
|
|
defer resp.Body.Close()
|
|
|
|
// 如果是 302 重定向,则递归访问新链接
|
|
if resp.StatusCode == http.StatusFound || resp.StatusCode == http.StatusMovedPermanently {
|
|
redirectURL, err := resp.Location()
|
|
if err != nil {
|
|
return "", fmt.Errorf("解析重定向地址失败: %w", err)
|
|
}
|
|
if redirectURL.String() == initialURL {
|
|
return initialURL, nil
|
|
}
|
|
log.Printf("重定向至: %s\n", redirectURL.String())
|
|
return resolveFinalURL(redirectURL.String())
|
|
}
|
|
|
|
// 返回最终的非 302 链接
|
|
return initialURL, nil
|
|
}
|