Skip to content

Commit 1c65f2f

Browse files
committed
fix(data):修复财联社电报API接口变更导致的数据获取失败问题(白屏/黑屏问题)
财联社电报接口从HTML页面爬取改为JSON API调用
1 parent f28fc5f commit 1c65f2f

3 files changed

Lines changed: 186 additions & 92 deletions

File tree

app.go

Lines changed: 28 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -26,7 +26,6 @@ import (
2626
"github.com/samber/lo"
2727
"golang.org/x/exp/slices"
2828

29-
"github.com/PuerkitoBio/goquery"
3029
"github.com/coocood/freecache"
3130
"github.com/duke-git/lancet/v2/convertor"
3231
"github.com/duke-git/lancet/v2/mathutil"
@@ -989,24 +988,42 @@ func (a *App) AddCronTask(follow data.FollowedStock) func() {
989988
}
990989

991990
func refreshTelegraphList() *[]string {
992-
url := "https://www.cls.cn/telegraph"
991+
clsURL := "https://www.cls.cn/api/cache?app=CailianpressWeb&name=telegraph&os=web&sv=8.7.9"
993992
response, err := data.SharedHTTPClient.R().
994993
SetHeader("Referer", "https://www.cls.cn/").
995-
SetHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.60").
996-
Get(url)
994+
SetHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0").
995+
Get(clsURL)
997996
if err != nil {
998997
return &[]string{}
999998
}
1000-
//logger.SugaredLogger.Info(string(response.Body()))
1001-
document, err := goquery.NewDocumentFromReader(strings.NewReader(string(response.Body())))
1002-
if err != nil {
999+
res := map[string]any{}
1000+
if err := json.Unmarshal(response.Body(), &res); err != nil {
10031001
return &[]string{}
10041002
}
10051003
var telegraph []string
1006-
document.Find("div.telegraph-content-box").Each(func(i int, selection *goquery.Selection) {
1007-
//logger.SugaredLogger.Info(selection.Text())
1008-
telegraph = append(telegraph, selection.Text())
1009-
})
1004+
if v, _ := convertor.ToInt(res["errno"]); v == 0 {
1005+
if res["data"] == nil {
1006+
return &[]string{}
1007+
}
1008+
dataMap, ok := res["data"].(map[string]any)
1009+
if !ok {
1010+
return &[]string{}
1011+
}
1012+
rollData, ok := dataMap["roll_data"].([]any)
1013+
if !ok {
1014+
return &[]string{}
1015+
}
1016+
for _, v := range rollData {
1017+
news, ok := v.(map[string]any)
1018+
if !ok {
1019+
continue
1020+
}
1021+
content, _ := news["content"].(string)
1022+
if content != "" {
1023+
telegraph = append(telegraph, content)
1024+
}
1025+
}
1026+
}
10101027
return &telegraph
10111028
}
10121029

backend/data/market_news_api.go

Lines changed: 128 additions & 71 deletions
Original file line numberDiff line numberDiff line change
@@ -33,35 +33,56 @@ func NewMarketNewsApi() *MarketNewsApi {
3333
}
3434

3535
func (m MarketNewsApi) TelegraphList(crawlTimeOut int64) *[]models.Telegraph {
36-
//https://www.cls.cn/nodeapi/telegraphList
37-
url := "https://www.cls.cn/nodeapi/telegraphList"
36+
//https://www.cls.cn/api/cache?app=CailianpressWeb&name=telegraph&os=web&sv=8.7.9
37+
clsURL := "https://www.cls.cn/api/cache?app=CailianpressWeb&name=telegraph&os=web&sv=8.7.9"
3838
res := map[string]any{}
3939
_, _ = SharedHTTPClient.SetTimeout(time.Duration(crawlTimeOut)*time.Second).R().
4040
SetHeader("Referer", "https://www.cls.cn/").
41-
SetHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.60").
41+
SetHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0").
4242
SetResult(&res).
43-
Get(url)
43+
Get(clsURL)
4444
var telegraphs []models.Telegraph
4545

46-
if v, _ := convertor.ToInt(res["error"]); v == 0 {
46+
if v, _ := convertor.ToInt(res["errno"]); v == 0 {
4747
if res["data"] == nil {
48-
return m.GetNewTelegraph(30)
48+
return m.GetNewTelegraph(crawlTimeOut)
49+
}
50+
data, ok := res["data"].(map[string]any)
51+
if !ok {
52+
return m.GetNewTelegraph(crawlTimeOut)
53+
}
54+
rollData, ok := data["roll_data"].([]any)
55+
if !ok || len(rollData) == 0 {
56+
return m.GetNewTelegraph(crawlTimeOut)
4957
}
50-
data := res["data"].(map[string]any)
51-
rollData := data["roll_data"].([]any)
5258
for _, v := range rollData {
53-
news := v.(map[string]any)
59+
news, ok := v.(map[string]any)
60+
if !ok {
61+
continue
62+
}
5463
ctime, _ := convertor.ToInt(news["ctime"])
5564
dataTime := time.Unix(ctime, 0).Local()
65+
66+
shareURL := ""
67+
if su, ok2 := news["shareurl"].(string); ok2 && su != "" {
68+
shareURL = su
69+
} else if id, ok2 := news["id"]; ok2 {
70+
shareURL = fmt.Sprintf("https://www.cls.cn/telegraph/%v", id)
71+
}
72+
73+
title, _ := news["title"].(string)
74+
content, _ := news["content"].(string)
75+
level, _ := news["level"].(string)
76+
5677
telegraph := models.Telegraph{
57-
Title: news["title"].(string),
58-
Content: news["content"].(string),
78+
Title: title,
79+
Content: content,
5980
Time: dataTime.Format("15:04:05"),
6081
DataTime: &dataTime,
61-
Url: news["shareurl"].(string),
82+
Url: shareURL,
6283
Source: "财联社电报",
63-
IsRed: (news["level"].(string)) != "C",
64-
SentimentResult: AnalyzeSentiment(news["content"].(string)).Description,
84+
IsRed: level != "C",
85+
SentimentResult: AnalyzeSentiment(content).Description,
6586
}
6687
cnt := int64(0)
6788
if telegraph.Title == "" {
@@ -74,13 +95,22 @@ func (m MarketNewsApi) TelegraphList(crawlTimeOut int64) *[]models.Telegraph {
7495
}
7596
telegraphs = append(telegraphs, telegraph)
7697
db.Dao.Model(&models.Telegraph{}).Create(&telegraph)
77-
////logger.SugaredLogger.Debugf("telegraph: %+v", &telegraph)
7898
if news["subjects"] == nil {
7999
continue
80100
}
81-
subjects := news["subjects"].([]any)
101+
subjects, ok := news["subjects"].([]any)
102+
if !ok {
103+
continue
104+
}
82105
for _, subject := range subjects {
83-
name := subject.(map[string]any)["subject_name"].(string)
106+
subMap, ok := subject.(map[string]any)
107+
if !ok {
108+
continue
109+
}
110+
name, ok := subMap["subject_name"].(string)
111+
if !ok || name == "" {
112+
continue
113+
}
84114
tag := &models.Tags{
85115
Name: name,
86116
Type: "subject",
@@ -91,77 +121,104 @@ func (m MarketNewsApi) TelegraphList(crawlTimeOut int64) *[]models.Telegraph {
91121
TagId: tag.ID,
92122
})
93123
}
94-
95124
}
96-
//db.Dao.Model(&models.Telegraph{}).Create(&telegraphs)
97-
////logger.SugaredLogger.Debugf("telegraphs: %+v", &telegraphs)
125+
} else {
126+
return m.GetNewTelegraph(crawlTimeOut)
98127
}
99128

100129
return &telegraphs
101130
}
102131

103132
func (m MarketNewsApi) GetNewTelegraph(crawlTimeOut int64) *[]models.Telegraph {
104-
url := "https://www.cls.cn/telegraph"
105-
response, _ := SharedHTTPClient.SetTimeout(time.Duration(crawlTimeOut)*time.Second).R().
133+
clsURL := "https://www.cls.cn/api/cache?app=CailianpressWeb&name=telegraphList&os=web&sv=8.7.9"
134+
res := map[string]any{}
135+
_, _ = SharedHTTPClient.SetTimeout(time.Duration(crawlTimeOut)*time.Second).R().
106136
SetHeader("Referer", "https://www.cls.cn/").
107-
SetHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.60").
108-
Get(url)
137+
SetHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0").
138+
SetResult(&res).
139+
Get(clsURL)
109140
var telegraphs []models.Telegraph
110-
//logger.SugaredLogger.Info(string(response.Body()))
111-
document, _ := goquery.NewDocumentFromReader(strings.NewReader(string(response.Body())))
112-
113-
document.Find(".telegraph-content-box").Each(func(i int, selection *goquery.Selection) {
114-
//logger.SugaredLogger.Info(selection.Text())
115-
telegraph := models.Telegraph{Source: "财联社电报"}
116-
spans := selection.Find("div.telegraph-content-box span")
117-
if spans.Length() == 2 {
118-
telegraph.Time = spans.First().Text()
119-
telegraph.Content = spans.Last().Text()
120-
if spans.Last().HasClass("c-de0422") {
121-
telegraph.IsRed = true
122-
}
141+
142+
if v, _ := convertor.ToInt(res["errno"]); v == 0 {
143+
if res["data"] == nil {
144+
return &telegraphs
145+
}
146+
data, ok := res["data"].(map[string]any)
147+
if !ok {
148+
return &telegraphs
149+
}
150+
rollData, ok := data["roll_data"].([]any)
151+
if !ok {
152+
return &telegraphs
123153
}
154+
for _, v := range rollData {
155+
news, ok := v.(map[string]any)
156+
if !ok {
157+
continue
158+
}
159+
ctime, _ := convertor.ToInt(news["ctime"])
160+
dataTime := time.Unix(ctime, 0).Local()
124161

125-
labels := selection.Find("div a.label-item")
126-
labels.Each(func(i int, selection *goquery.Selection) {
127-
if selection.HasClass("link-label-item") {
128-
telegraph.Url = selection.AttrOr("href", "")
129-
} else {
130-
tag := &models.Tags{
131-
Name: selection.Text(),
132-
Type: "subject",
133-
}
134-
db.Dao.Model(tag).Where("name=? and type=?", selection.Text(), "subject").FirstOrCreate(&tag)
135-
telegraph.SubjectTags = append(telegraph.SubjectTags, selection.Text())
162+
shareURL := ""
163+
if su, ok2 := news["shareurl"].(string); ok2 && su != "" {
164+
shareURL = su
165+
} else if id, ok2 := news["id"]; ok2 {
166+
shareURL = fmt.Sprintf("https://www.cls.cn/telegraph/%v", id)
136167
}
137-
})
138-
stocks := selection.Find("div.telegraph-stock-plate-box a")
139-
stocks.Each(func(i int, selection *goquery.Selection) {
140-
telegraph.StocksTags = append(telegraph.StocksTags, selection.Text())
141-
})
142168

143-
//telegraph = append(telegraph, ReplaceSensitiveWords(selection.Text()))
144-
if telegraph.Content != "" {
145-
telegraph.SentimentResult = AnalyzeSentiment(telegraph.Content).Description
169+
title, _ := news["title"].(string)
170+
content, _ := news["content"].(string)
171+
level, _ := news["level"].(string)
172+
173+
telegraph := models.Telegraph{
174+
Title: title,
175+
Content: content,
176+
Time: dataTime.Format("15:04:05"),
177+
DataTime: &dataTime,
178+
Url: shareURL,
179+
Source: "财联社电报",
180+
IsRed: level != "C",
181+
SentimentResult: AnalyzeSentiment(content).Description,
182+
}
146183
cnt := int64(0)
147-
db.Dao.Model(telegraph).Where("time=? and content=?", telegraph.Time, telegraph.Content).Count(&cnt)
148-
if cnt == 0 {
149-
db.Dao.Create(&telegraph)
150-
telegraphs = append(telegraphs, telegraph)
151-
for _, tag := range telegraph.SubjectTags {
152-
tagInfo := &models.Tags{}
153-
db.Dao.Model(models.Tags{}).Where("name=? and type=?", tag, "subject").First(&tagInfo)
154-
if tagInfo.ID > 0 {
155-
db.Dao.Model(models.TelegraphTags{}).Where("telegraph_id=? and tag_id=?", telegraph.ID, tagInfo.ID).FirstOrCreate(&models.TelegraphTags{
156-
TelegraphId: telegraph.ID,
157-
TagId: tagInfo.ID,
158-
})
159-
}
184+
if telegraph.Title == "" {
185+
db.Dao.Model(telegraph).Where("content=?", telegraph.Content).Count(&cnt)
186+
} else {
187+
db.Dao.Model(telegraph).Where("title=?", telegraph.Title).Count(&cnt)
188+
}
189+
if cnt > 0 {
190+
continue
191+
}
192+
telegraphs = append(telegraphs, telegraph)
193+
db.Dao.Model(&models.Telegraph{}).Create(&telegraph)
194+
if news["subjects"] == nil {
195+
continue
196+
}
197+
subjects, ok := news["subjects"].([]any)
198+
if !ok {
199+
continue
200+
}
201+
for _, subject := range subjects {
202+
subMap, ok := subject.(map[string]any)
203+
if !ok {
204+
continue
205+
}
206+
name, ok := subMap["subject_name"].(string)
207+
if !ok || name == "" {
208+
continue
160209
}
210+
tag := &models.Tags{
211+
Name: name,
212+
Type: "subject",
213+
}
214+
db.Dao.Model(tag).Where("name=? and type=?", name, "subject").FirstOrCreate(&tag)
215+
db.Dao.Model(models.TelegraphTags{}).Where("telegraph_id=? and tag_id=?", telegraph.ID, tag.ID).FirstOrCreate(&models.TelegraphTags{
216+
TelegraphId: telegraph.ID,
217+
TagId: tag.ID,
218+
})
161219
}
162-
163220
}
164-
})
221+
}
165222
return &telegraphs
166223
}
167224
func (m MarketNewsApi) GetNewsList(source string, limit int) *[]*models.Telegraph {

backend/data/openai_crawler.go

Lines changed: 30 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,7 @@ package data
22

33
import (
44
"context"
5+
"encoding/json"
56
"fmt"
67
"strings"
78
"time"
@@ -11,6 +12,7 @@ import (
1112

1213
"github.com/PuerkitoBio/goquery"
1314
"github.com/chromedp/chromedp"
15+
"github.com/duke-git/lancet/v2/convertor"
1416
"github.com/duke-git/lancet/v2/strutil"
1517
)
1618

@@ -151,24 +153,42 @@ func GetFinancialReports(stockCode string, crawlTimeOut int64) *[]string {
151153
}
152154

153155
func GetTelegraphList(crawlTimeOut int64) *[]string {
154-
url := "https://www.cls.cn/telegraph"
156+
clsURL := "https://www.cls.cn/api/cache?app=CailianpressWeb&name=telegraph&os=web&sv=8.7.9"
155157
response, err := SharedHTTPClient.SetTimeout(time.Duration(crawlTimeOut)*time.Second).R().
156158
SetHeader("Referer", "https://www.cls.cn/").
157-
SetHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/117.0.0.0 Safari/537.36 Edg/117.0.2045.60").
158-
Get(url)
159+
SetHeader("User-Agent", "Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36 Edg/120.0.0.0").
160+
Get(clsURL)
159161
if err != nil {
160162
return &[]string{}
161163
}
162-
//logger.SugaredLogger.Info(string(response.Body()))
163-
document, err := goquery.NewDocumentFromReader(strings.NewReader(string(response.Body())))
164-
if err != nil {
164+
res := map[string]any{}
165+
if err := json.Unmarshal(response.Body(), &res); err != nil {
165166
return &[]string{}
166167
}
167168
var telegraph []string
168-
document.Find("div.telegraph-content-box").Each(func(i int, selection *goquery.Selection) {
169-
//logger.SugaredLogger.Info(selection.Text())
170-
telegraph = append(telegraph, ReplaceSensitiveWords(selection.Text()))
171-
})
169+
if v, _ := convertor.ToInt(res["errno"]); v == 0 {
170+
if res["data"] == nil {
171+
return &[]string{}
172+
}
173+
dataMap, ok := res["data"].(map[string]any)
174+
if !ok {
175+
return &[]string{}
176+
}
177+
rollData, ok := dataMap["roll_data"].([]any)
178+
if !ok {
179+
return &[]string{}
180+
}
181+
for _, v := range rollData {
182+
news, ok := v.(map[string]any)
183+
if !ok {
184+
continue
185+
}
186+
content, _ := news["content"].(string)
187+
if content != "" {
188+
telegraph = append(telegraph, ReplaceSensitiveWords(content))
189+
}
190+
}
191+
}
172192
return &telegraph
173193
}
174194

0 commit comments

Comments
 (0)