Skip to content

Commit d39591e

Browse files
完成数据持久化
1 parent 5f407b0 commit d39591e

File tree

9 files changed

+365
-1
lines changed

9 files changed

+365
-1
lines changed

README.md

Lines changed: 54 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,2 +1,55 @@
11
# xdncov
2-
本仓库仅用作练习gocolly爬虫,不可用于其他用途,若出现任何问题,与本人无关。
2+
本仓库仅用作练习`gocolly`爬虫,不可用于其他用途,若出现任何问题,与本人无关。
3+
4+
## 准备工作
5+
6+
### 文件准备
7+
8+
创建`configs`文件夹,按照`configs`文件夹里面的`example.config`,改成自己的配置文件并更改文件名以`.toml`为后缀。
9+
10+
例如`11111111111.toml`
11+
12+
```toml
13+
# 已返校同学可只更改前三项
14+
name = "张三" # 姓名
15+
id = "11111111111" # 学号
16+
password = "11111111" # 密码
17+
18+
# 其他内容可不更改
19+
province = "陕西省"
20+
city = "西安市"
21+
district = "长安区"
22+
address = "陕西省西安市西沣路兴隆段266号"
23+
ymtys = 0 # 一码通颜色
24+
tw = 1 # 体温
25+
sfzx = 1 # 是否在校
26+
sfcyglq = 0 # 是否处于隔离期
27+
sfyzz = 0 # 是否有症状
28+
29+
# 以下内容可能会由程序自动更改
30+
cookie = "" # 用作持久化
31+
path = "" # 文件保存路径
32+
```
33+
34+
## 执行
35+
36+
下载对应平台的可执行文件,执行即可。
37+
38+
### PowerShell
39+
40+
```powershell
41+
.\xdncov.exe
42+
```
43+
44+
### bash/zsh
45+
46+
```bash
47+
./xdncov
48+
```
49+
50+
## 后续工作
51+
52+
- [x] 持久化存储
53+
- [ ] 定时执行
54+
- [ ] Toml添加最后一次提交时间
55+
- [ ] 邮件提醒

compile.ps1

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,9 @@
1+
$env:GOOS="windows"
2+
$env:GOARCH="amd64"
3+
go build -o xdncov_windows_amd64.exe
4+
5+
$env:GOOS="linux"
6+
go build -o xdncov_linux_amd64
7+
8+
$env:GOOS="darwin"
9+
go build -o xdncov_darwin_amd64

config.go

Lines changed: 76 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,76 @@
1+
package main
2+
3+
import (
4+
"bytes"
5+
"fmt"
6+
"io/ioutil"
7+
"log"
8+
"path"
9+
10+
"github.com/BurntSushi/toml"
11+
)
12+
13+
// StudentConfig 解析toml配置文件
14+
type StudentConfig struct {
15+
Name string `toml:"name"`
16+
ID int `toml:"id"`
17+
Password string `toml:"password"`
18+
Province string `toml:"province"`
19+
City string `toml:"city"`
20+
District string `toml:"district"`
21+
Address string `toml:"address"`
22+
Ymtys int `toml:"ymtys"`
23+
Tw int `toml:"tw"`
24+
Sfzx int `toml:"sfzx"`
25+
Sfcyglq int `toml:"sfcyglq"`
26+
Sfyzz int `toml:"sfyzz"`
27+
Cookie string `toml:"cookie"`
28+
Path string `toml:"path"`
29+
}
30+
31+
// CollectConfigs 收集toml配置文件
32+
func CollectConfigs(configDirectoryPath string) (studentConfigSlice []StudentConfig) {
33+
allFiles, err := ioutil.ReadDir(configDirectoryPath)
34+
if err != nil {
35+
fmt.Println("ioutil.ReadDir has error.", err)
36+
}
37+
38+
for _, eachFileName := range allFiles {
39+
40+
filenameWithSuffix := path.Base(eachFileName.Name())
41+
//filenameWithSuffix: 文件名带后缀。
42+
// fmt.Println("filenameWithSuffix =", filenameWithSuffix)
43+
44+
fileSuffix := path.Ext(filenameWithSuffix)
45+
//fileSuffix: 文件后缀
46+
// fmt.Println("fileSuffix =", fileSuffix)
47+
if fileSuffix == ".toml" {
48+
configPath := fmt.Sprintf("%s/%s", configDirectoryPath, eachFileName.Name())
49+
studentConfigSlice = append(studentConfigSlice, ReadConfig(configPath))
50+
}
51+
}
52+
53+
return studentConfigSlice
54+
}
55+
56+
// ReadConfig 读取配置文件
57+
func ReadConfig(configPath string) (tempConfig StudentConfig) {
58+
if _, err := toml.DecodeFile(configPath, &tempConfig); err != nil {
59+
log.Fatalln(err)
60+
}
61+
tempConfig.Path = configPath
62+
63+
return
64+
}
65+
66+
// UpdateConfig 更新配置文件
67+
func UpdateConfig(newConfig StudentConfig) {
68+
buf := new(bytes.Buffer)
69+
if err := toml.NewEncoder(buf).Encode(newConfig); err != nil {
70+
log.Fatal(err)
71+
}
72+
73+
if ioutil.WriteFile(newConfig.Path, buf.Bytes(), 0644) == nil {
74+
fmt.Println("写入文件成功:", newConfig.Path)
75+
}
76+
}

configs/example.config

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,19 @@
1+
# 已返校同学可只更改前三项
2+
name = "张三" # 姓名
3+
id = "11111111111" # 学号
4+
password = "11111111" # 密码
5+
6+
# 其他内容可不更改
7+
province = "陕西省"
8+
city = "西安市"
9+
district = "长安区"
10+
address = "陕西省西安市西沣路兴隆段266号"
11+
ymtys = 0 # 一码通颜色
12+
tw = 1 # 体温
13+
sfzx = 1 # 是否在校
14+
sfcyglq = 0 # 是否处于隔离期
15+
sfyzz = 0 # 是否有症状
16+
17+
# 以下内容可能会由程序自动更改
18+
cookie = "" # 用作持久化
19+
path = "" # 文件保存路径

go.mod

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,17 @@
1+
module xdncov
2+
3+
go 1.14
4+
5+
require (
6+
github.com/BurntSushi/toml v0.3.1
7+
github.com/PuerkitoBio/goquery v1.5.1 // indirect
8+
github.com/antchfx/htmlquery v1.2.3 // indirect
9+
github.com/antchfx/xmlquery v1.2.4 // indirect
10+
github.com/gobwas/glob v0.2.3 // indirect
11+
github.com/gocolly/colly v1.2.0
12+
github.com/kennygrant/sanitize v1.2.4 // indirect
13+
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca // indirect
14+
github.com/temoto/robotstxt v1.1.1 // indirect
15+
golang.org/x/net v0.0.0-20200625001655-4c5254603344 // indirect
16+
google.golang.org/appengine v1.6.6 // indirect
17+
)

go.sum

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,51 @@
1+
github.com/BurntSushi/toml v0.3.1 h1:WXkYYl6Yr3qBf1K79EBnL4mak0OimBfB0XUf9Vl28OQ=
2+
github.com/BurntSushi/toml v0.3.1/go.mod h1:xHWCNGjB5oqiDr8zfno3MHue2Ht5sIBksp03qcyfWMU=
3+
github.com/PuerkitoBio/goquery v1.5.1 h1:PSPBGne8NIUWw+/7vFBV+kG2J/5MOjbzc7154OaKCSE=
4+
github.com/PuerkitoBio/goquery v1.5.1/go.mod h1:GsLWisAFVj4WgDibEWF4pvYnkVQBpKBKeU+7zCJoLcc=
5+
github.com/andybalholm/cascadia v1.1.0 h1:BuuO6sSfQNFRu1LppgbD25Hr2vLYW25JvxHs5zzsLTo=
6+
github.com/andybalholm/cascadia v1.1.0/go.mod h1:GsXiBklL0woXo1j/WYWtSYYC4ouU9PqHO0sqidkEA4Y=
7+
github.com/antchfx/htmlquery v1.2.3 h1:sP3NFDneHx2stfNXCKbhHFo8XgNjCACnU/4AO5gWz6M=
8+
github.com/antchfx/htmlquery v1.2.3/go.mod h1:B0ABL+F5irhhMWg54ymEZinzMSi0Kt3I2if0BLYa3V0=
9+
github.com/antchfx/xmlquery v1.2.4 h1:T/SH1bYdzdjTMoz2RgsfVKbM5uWh3gjDYYepFqQmFv4=
10+
github.com/antchfx/xmlquery v1.2.4/go.mod h1:KQQuESaxSlqugE2ZBcM/qn+ebIpt+d+4Xx7YcSGAIrM=
11+
github.com/antchfx/xpath v1.1.6 h1:6sVh6hB5T6phw1pFpHRQ+C4bd8sNI+O58flqtg7h0R0=
12+
github.com/antchfx/xpath v1.1.6/go.mod h1:Yee4kTMuNiPYJ7nSNorELQMr1J33uOpXDMByNYhvtNk=
13+
github.com/davecgh/go-spew v1.1.0 h1:ZDRjVQ15GmhC3fiQ8ni8+OwkZQO4DARzQgrnXU1Liz8=
14+
github.com/davecgh/go-spew v1.1.0/go.mod h1:J7Y8YcW2NihsgmVo/mv3lAwl/skON4iLHjSsI+c5H38=
15+
github.com/gobwas/glob v0.2.3 h1:A4xDbljILXROh+kObIiy5kIaPYD8e96x1tgBhUI5J+Y=
16+
github.com/gobwas/glob v0.2.3/go.mod h1:d3Ez4x06l9bZtSvzIay5+Yzi0fmZzPgnTbPcKjJAkT8=
17+
github.com/gocolly/colly v1.2.0 h1:qRz9YAn8FIH0qzgNUw+HT9UN7wm1oF9OBAilwEWpyrI=
18+
github.com/gocolly/colly v1.2.0/go.mod h1:Hof5T3ZswNVsOHYmba1u03W65HDWgpV5HifSuueE0EA=
19+
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e h1:1r7pUrabqp18hOBcwBwiTsbnFeTZHV9eER/QT5JVZxY=
20+
github.com/golang/groupcache v0.0.0-20200121045136-8c9f03a8e57e/go.mod h1:cIg4eruTrX1D+g88fzRXU5OdNfaM+9IcxsU14FzY7Hc=
21+
github.com/golang/protobuf v1.3.1 h1:YF8+flBXS5eO826T4nzqPrxfhQThhXl0YzfuUPu4SBg=
22+
github.com/golang/protobuf v1.3.1/go.mod h1:6lQm79b+lXiMfvg/cZm0SGofjICqVBUtrP5yJMmIC1U=
23+
github.com/kennygrant/sanitize v1.2.4 h1:gN25/otpP5vAsO2djbMhF/LQX6R7+O1TB4yv8NzpJ3o=
24+
github.com/kennygrant/sanitize v1.2.4/go.mod h1:LGsjYYtgxbetdg5owWB2mpgUL6e2nfw2eObZ0u0qvak=
25+
github.com/pmezard/go-difflib v1.0.0 h1:4DBwDE0NGyQoBHbLQYPwSUPoCMWR5BEzIk/f1lZbAQM=
26+
github.com/pmezard/go-difflib v1.0.0/go.mod h1:iKH77koFhYxTK1pcRnkKkqfTogsbg7gZNVY4sRDYZ/4=
27+
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca h1:NugYot0LIVPxTvN8n+Kvkn6TrbMyxQiuvKdEwFdR9vI=
28+
github.com/saintfish/chardet v0.0.0-20120816061221-3af4cd4741ca/go.mod h1:uugorj2VCxiV1x+LzaIdVa9b4S4qGAcH6cbhh4qVxOU=
29+
github.com/stretchr/objx v0.1.0/go.mod h1:HFkY916IF+rwdDfMAkV7OtwuqBVzrE8GR6GFx+wExME=
30+
github.com/stretchr/testify v1.3.0 h1:TivCn/peBQ7UY8ooIcPgZFpTNSz0Q2U6UrFlUfqbe0Q=
31+
github.com/stretchr/testify v1.3.0/go.mod h1:M5WIy9Dh21IEIfnGCwXGc5bZfKNJtfHm1UVUgZn+9EI=
32+
github.com/temoto/robotstxt v1.1.1 h1:Gh8RCs8ouX3hRSxxK7B1mO5RFByQ4CmJZDwgom++JaA=
33+
github.com/temoto/robotstxt v1.1.1/go.mod h1:+1AmkuG3IYkh1kv0d2qEB9Le88ehNO0zwOr3ujewlOo=
34+
golang.org/x/crypto v0.0.0-20190308221718-c2843e01d9a2/go.mod h1:djNgcEr1/C05ACkg1iLfiJU5Ep61QUkGW8qpdssI0+w=
35+
golang.org/x/crypto v0.0.0-20200622213623-75b288015ac9/go.mod h1:LzIPMQfyMNhhGPhUkYOs5KpL4U8rLKemX1yGLhDgUto=
36+
golang.org/x/net v0.0.0-20180218175443-cbe0f9307d01/go.mod h1:mL1N/T3taQHkDXs73rZJwtUhF3w3ftmwwsq0BUmARs4=
37+
golang.org/x/net v0.0.0-20190404232315-eb5bcb51f2a3/go.mod h1:t9HGtf8HONx5eT2rtn7q6eTqICYqUVnKs3thJo3Qplg=
38+
golang.org/x/net v0.0.0-20190603091049-60506f45cf65/go.mod h1:HSz+uSET+XFnRR8LxR5pz3Of3rY3CfYBVs4xY44aLks=
39+
golang.org/x/net v0.0.0-20200202094626-16171245cfb2/go.mod h1:z5CRVTTTmAJ677TzLLGU+0bjPO0LkuOLi4/5GtJWs/s=
40+
golang.org/x/net v0.0.0-20200421231249-e086a090c8fd/go.mod h1:qpuaurCH72eLCgpAm/N6yyVIVM9cpaDIP3A8BGJEC5A=
41+
golang.org/x/net v0.0.0-20200625001655-4c5254603344 h1:vGXIOMxbNfDTk/aXCmfdLgkrSV+Z2tcbze+pEc3v5W4=
42+
golang.org/x/net v0.0.0-20200625001655-4c5254603344/go.mod h1:/O7V0waA8r7cgGh81Ro3o1hOxt32SMVPicZroKQ2sZA=
43+
golang.org/x/sys v0.0.0-20190215142949-d0b11bdaac8a/go.mod h1:STP8DvDyc/dI5b8T5hshtkjS+E42TnysNCUPdjciGhY=
44+
golang.org/x/sys v0.0.0-20190412213103-97732733099d/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
45+
golang.org/x/sys v0.0.0-20200323222414-85ca7c5b95cd/go.mod h1:h1NjWce9XRLGQEsW7wpKNCjG9DtNlClVuFLEZdDNbEs=
46+
golang.org/x/text v0.3.0/go.mod h1:NqM8EUOU14njkJ3fqMW+pc6Ldnwhi/IjpwHt7yyuwOQ=
47+
golang.org/x/text v0.3.2 h1:tW2bmiBqwgJj/UpqtC8EpXEZVYOwU0yG4iWbprSVAcs=
48+
golang.org/x/text v0.3.2/go.mod h1:bEr9sfX3Q8Zfm5fL9x+3itogRgK3+ptLWKqgva+5dAk=
49+
golang.org/x/tools v0.0.0-20180917221912-90fa682c2a6e/go.mod h1:n7NCudcB/nEzxVGmLbDWY5pfWTLqBcC2KZ6jyYvM4mQ=
50+
google.golang.org/appengine v1.6.6 h1:lMO5rYAqUxkmaj76jAkRUvt5JZgFymx/+Q5Mzfivuhc=
51+
google.golang.org/appengine v1.6.6/go.mod h1:8WjMMxjGQR8xUklV/ARdw2HLXBOI7O7uCIDZVag1xfc=

httpRequest.go

Lines changed: 105 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,105 @@
1+
package main
2+
3+
import (
4+
"fmt"
5+
"log"
6+
"strconv"
7+
8+
"github.com/gocolly/colly"
9+
"github.com/gocolly/colly/storage"
10+
)
11+
12+
const (
13+
// BaseURL 域名URL
14+
BaseURL = "https://xxcapp.xidian.edu.cn"
15+
// LoginURL 登录URL
16+
LoginURL = "https://xxcapp.xidian.edu.cn/uc/wap/login/check"
17+
// SaveURL 提交结果URL
18+
SaveURL = "https://xxcapp.xidian.edu.cn/ncov/wap/open-report/save"
19+
// MyUserAgent 模拟手机UA
20+
MyUserAgent = "Mozilla/5.0 (Linux; Android 6.0; Nexus 5 Build/MRA58N) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/84.0.4147.45 Mobile Safari/537.36 Edg/84.0.522.20"
21+
)
22+
23+
// PostSaveForm 提交晨午检表单
24+
func PostSaveForm(newClient *colly.Collector, config StudentConfig) {
25+
savePostForm := map[string]string{
26+
"province": config.Province,
27+
"city": config.City,
28+
"district": config.District,
29+
"address": config.Address,
30+
"ymtys": strconv.Itoa(config.Ymtys),
31+
"tw": strconv.Itoa(config.Tw),
32+
"sfzx": strconv.Itoa(config.Sfzx),
33+
"sfcyglq": strconv.Itoa(config.Sfcyglq),
34+
"sfyzz": strconv.Itoa(config.Sfyzz),
35+
}
36+
err := newClient.Post(SaveURL, savePostForm)
37+
if err != nil {
38+
log.Fatal(2, err)
39+
}
40+
}
41+
42+
// SignIn HTTP主要步骤
43+
func SignIn(config StudentConfig) {
44+
firstPostClient := colly.NewCollector()
45+
firstPostClient.UserAgent = MyUserAgent
46+
47+
firstPostSuccessFlag := false
48+
if config.Cookie != "" {
49+
firstPostClient.OnRequest(func(request *colly.Request) {
50+
firstPostClient.SetCookies(BaseURL, storage.UnstringifyCookies(config.Cookie))
51+
})
52+
53+
}
54+
firstPostClient.OnResponse(func(r *colly.Response) {
55+
tempResponse := UnmarshalHTTPResponse(r.Body)
56+
if tempResponse.M != "" {
57+
if tempResponse.M == "操作成功" || tempResponse.M == "您已上报过" {
58+
firstPostSuccessFlag = true
59+
fmt.Println(config.ID, "第一次提交即成功")
60+
}
61+
}
62+
})
63+
PostSaveForm(firstPostClient, config)
64+
65+
if !firstPostSuccessFlag {
66+
firstLoginClient := firstPostClient.Clone()
67+
loginFlag := Login(firstLoginClient, strconv.Itoa(config.ID), config.Password)
68+
if loginFlag {
69+
fmt.Println(config.ID, "登陆成功")
70+
}
71+
72+
secondPostClient := firstLoginClient.Clone()
73+
secondPostClient.OnResponse(func(response *colly.Response) {
74+
newCookie := storage.StringifyCookies(secondPostClient.Cookies(response.Request.URL.String()))
75+
config.Cookie = newCookie
76+
UpdateConfig(config)
77+
fmt.Println(string(response.Body))
78+
})
79+
PostSaveForm(secondPostClient, config)
80+
}
81+
}
82+
83+
// Login 当持久化未能通过时,模拟登录以获得cookie
84+
func Login(newClient *colly.Collector, id string, password string) (loginFlag bool) {
85+
loginFlag = false
86+
87+
newClient.OnResponse(func(r *colly.Response) {
88+
tempResponse := UnmarshalHTTPResponse(r.Body)
89+
if tempResponse.M != "" {
90+
if tempResponse.M == "操作成功" {
91+
loginFlag = true
92+
}
93+
}
94+
})
95+
96+
err := newClient.Post(LoginURL, map[string]string{
97+
"username": id,
98+
"password": password,
99+
})
100+
if err != nil {
101+
log.Fatal(1, err)
102+
}
103+
104+
return
105+
}

util.go

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
package main
2+
3+
import "encoding/json"
4+
5+
// HTTPResponse 解析返回的json格式的消息
6+
type HTTPResponse struct {
7+
E int `json:"e"`
8+
M string `json:"m"`
9+
D EmptyStruct `json:"d"`
10+
}
11+
12+
// EmptyStruct HTTPResponse.D
13+
type EmptyStruct struct {
14+
}
15+
16+
// unmarshalHTTPResponse 解析json消息
17+
func UnmarshalHTTPResponse(response []byte) (newHTTPResponse HTTPResponse) {
18+
newHTTPResponse = HTTPResponse{}
19+
json.Unmarshal(response, &newHTTPResponse)
20+
21+
return
22+
}

xdncov.go

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,12 @@
1+
package main
2+
3+
func main() {
4+
5+
studentConfigSlice := CollectConfigs("./configs")
6+
7+
for _, eachConfig := range studentConfigSlice {
8+
// go signIn(eachConfig)
9+
SignIn(eachConfig)
10+
}
11+
12+
}

0 commit comments

Comments
 (0)