Skip to content

Commit ab2b09b

Browse files
committed
重构队列逻辑,优化查询策略
1 parent 25695cc commit ab2b09b

File tree

30 files changed

+1568
-782
lines changed

30 files changed

+1568
-782
lines changed

common/config.go

Lines changed: 100 additions & 41 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,11 @@
11
package common
22

33
import (
4-
"path/filepath"
5-
"time"
6-
74
"github.com/tidwall/gjson"
85
"github.com/wgpsec/ENScan/common/utils"
9-
"go.mongodb.org/mongo-driver/bson/primitive"
6+
"path/filepath"
7+
"regexp"
8+
"time"
109
)
1110

1211
var (
@@ -18,41 +17,55 @@ var (
1817
)
1918

2019
type ENOptions struct {
21-
KeyWord string // Keyword of Search
22-
CompanyID string // Company ID
23-
GroupID string // Company ID
24-
InputFile string // Scan Input File
25-
Output string
26-
ScanType string
27-
Proxy string
28-
ISKeyPid bool
29-
IsGroup bool
30-
IsGetBranch bool
31-
IsSearchBranch bool
32-
InvestNum float64
33-
DelayTime int
34-
DelayMaxTime int64
35-
TimeOut int
36-
GetFlags string
37-
Version bool
38-
IsHold bool
39-
IsSupplier bool
40-
IsShow bool
41-
GetField []string
42-
GetType []string
43-
IsDebug bool
44-
IsJsonOutput bool
45-
Deep int
46-
UPOutFile string
47-
IsMergeOut bool //聚合
48-
IsNoMerge bool //聚合
49-
OutPutType string // 导出文件类型
50-
IsApiMode bool
51-
IsMCPServer bool
52-
IsPlugins bool // 是否作为后置插件查询
53-
IsFast bool // 是否快速查询
54-
ENConfig *ENConfig
55-
BranchFilter string
20+
KeyWord string // Keyword of Search
21+
CompanyID string // Company ID
22+
GroupID string // Company ID
23+
InputFile string // Scan Input File
24+
Output string
25+
ScanType string
26+
Proxy string
27+
ISKeyPid bool
28+
IsGroup bool
29+
IsGetBranch bool
30+
IsSearchBranch bool
31+
InvestNum float64
32+
DelayTime int
33+
DelayMaxTime int64
34+
TimeOut int
35+
GetFlags string
36+
Version bool
37+
IsHold bool
38+
IsSupplier bool
39+
IsShow bool
40+
GetField []string
41+
GetType []string
42+
IsDebug bool
43+
IsJsonOutput bool
44+
Deep int
45+
IsMergeOut bool //聚合
46+
IsNoMerge bool //聚合
47+
OutPutType string // 导出文件类型
48+
IsApiMode bool
49+
IsMCPServer bool
50+
IsPlugins bool // 是否作为后置插件查询
51+
IsFast bool // 是否快速查询
52+
ENConfig *ENConfig
53+
BranchFilter string
54+
NameFilterRegexp *regexp.Regexp
55+
}
56+
57+
// ENSearch 搜索必要的参数
58+
// 暂时没想好如何使用,尤其是在多进程情况下
59+
type ENSearch struct {
60+
KeyWord string // Keyword of Search
61+
GetField []string
62+
GetType []string
63+
IsGetBranch bool
64+
NameFilterRegexp *regexp.Regexp
65+
InvestNum float64
66+
IsHold bool
67+
IsSupplier bool
68+
Deep int
5669
}
5770

5871
// EnsGo EnScan 接口请求通用格式接口
@@ -83,6 +96,24 @@ type ENsD struct {
8396
Op *ENOptions
8497
}
8598

99+
type InfoPage struct {
100+
Total int64
101+
Page int64
102+
Size int64
103+
HasNext bool
104+
Data []gjson.Result
105+
}
106+
107+
// DPS ENScan深度搜索包
108+
type DPS struct {
109+
Name string `json:"name"` // 企业名称
110+
Pid string `json:"pid"` // 企业ID
111+
Ref string `json:"ref"` // 关联原因
112+
Deep int `json:"deep"` // 深度
113+
SK string `json:"type"` // 搜索类型
114+
SearchList []string `json:"search_list"` // 深度搜索列表
115+
}
116+
86117
func (h *ENOptions) GetDelayRTime() int64 {
87118
if h.DelayTime == -1 {
88119
return utils.RangeRand(1, 5)
@@ -98,8 +129,29 @@ func (h *ENOptions) GetENConfig() *ENConfig {
98129
return h.ENConfig
99130
}
100131

132+
func (h *ENOptions) GetCookie(tpy string) (b string) {
133+
c := h.ENConfig.Cookies
134+
switch tpy {
135+
case "aqc":
136+
b = c.Aiqicha
137+
case "tyc":
138+
b = c.Tianyancha
139+
case "rb":
140+
b = c.RiskBird
141+
case "qcc":
142+
b = c.Qcc
143+
case "xlb":
144+
b = c.Xlb
145+
case "kc":
146+
b = c.KuaiCha
147+
case "qimai":
148+
b = c.QiMai
149+
}
150+
return b
151+
152+
}
153+
101154
type EnInfos struct {
102-
Id primitive.ObjectID `bson:"_id"`
103155
Search string
104156
Name string
105157
Pid string
@@ -118,12 +170,15 @@ type EnInfos struct {
118170
EnInfo []map[string]interface{}
119171
}
120172

173+
var AbnormalStatus = []string{"注销", "吊销", "停业", "清算", "歇业", "关闭", "撤销", "迁出", "经营异常", "严重违法失信"}
174+
121175
// DefaultAllInfos 默认收集信息列表
122176
var DefaultAllInfos = []string{"icp", "weibo", "wechat", "app", "weibo", "job", "wx_app", "copyright"}
123177
var DefaultInfos = []string{"icp", "weibo", "wechat", "app", "wx_app"}
124178
var CanSearchAllInfos = []string{"enterprise_info", "icp", "weibo", "wechat", "app", "job", "wx_app", "copyright", "supplier", "invest", "branch", "holds", "partner"}
125179
var DeepSearch = []string{"invest", "branch", "holds", "supplier"}
126-
var ENSTypes = []string{"aqc", "tyc", "kc", "miit", "tycapi", "rb"}
180+
var ENSTypes = []string{"aqc", "xlb", "qcc", "tyc", "kc", "tycapi", "rb"}
181+
var ENSApps = []string{"miit"}
127182
var ScanTypeKeys = map[string]string{
128183
"aqc": "爱企查",
129184
"qcc": "企查查",
@@ -146,6 +201,7 @@ type ENConfig struct {
146201
UserAgent string `yaml:"user_agent"` // 自定义 User-Agent
147202
Cookies struct {
148203
Aldzs string `yaml:"aldzs"`
204+
Xlb string `yaml:"xlb"`
149205
Aiqicha string `yaml:"aiqicha"`
150206
Qidian string `yaml:"qidian"`
151207
KuaiCha string `yaml:"kuaicha"`
@@ -154,7 +210,10 @@ type ENConfig struct {
154210
TycApiToken string `yaml:"tyc_api_token"`
155211
RiskBird string `yaml:"risk_bird"`
156212
AuthToken string `yaml:"auth_token"`
213+
Qcc string `yaml:"qcc"`
214+
QccTid string `yaml:"qcctid"`
157215
QiMai string `yaml:"qimai"`
216+
ChinaZ string `yaml:"chinaz"`
158217
}
159218
App struct {
160219
MiitApi string `yaml:"miit_api"`

common/flag.go

Lines changed: 0 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -21,7 +21,6 @@ func Banner() {
2121
gologger.Print().Msgf("https://github.com/wgpsec/ENScan_GO\n\n")
2222
gologger.Print().Msgf("工具仅用于信息收集,请勿用于非法用途\n")
2323
gologger.Print().Msgf("开发人员不承担任何责任,也不对任何滥用或损坏负责.\n")
24-
2524
}
2625

2726
func Flag(Info *ENOptions) {
@@ -42,7 +41,6 @@ func Flag(Info *ENOptions) {
4241
flag.BoolVar(&Info.IsJsonOutput, "json", false, "json导出")
4342
flag.StringVar(&Info.Output, "out-dir", "", "结果输出的文件夹位置(默认为outs)")
4443
flag.StringVar(&Info.BranchFilter, "branch-filter", "", "提供一个正则表达式,名称匹配该正则的分支机构和子公司会被跳过")
45-
flag.StringVar(&Info.UPOutFile, "out-update", "", "导出指定范围文件,自更新")
4644
flag.StringVar(&Info.OutPutType, "out-type", "xlsx", "导出的文件后缀 默认xlsx")
4745
flag.BoolVar(&Info.IsDebug, "debug", false, "是否显示debug详细信息")
4846
flag.BoolVar(&Info.IsShow, "is-show", true, "是否展示信息输出")

common/output.go

Lines changed: 57 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -23,109 +23,113 @@ type ENSMap struct {
2323
}
2424

2525
// ENSMapLN 最终统一导出格式
26-
var ENSMapLN = map[string]*ENSMap{
26+
var ENSMapLN = map[string]*EnsGo{
2727
"enterprise_info": {
2828
Name: "企业信息",
29-
JField: []string{"name", "legal_person", "status", "phone", "email", "registered_capital", "incorporation_date", "address", "scope", "reg_code", "pid"},
29+
Field: []string{"name", "legal_person", "status", "phone", "email", "registered_capital", "incorporation_date", "address", "scope", "reg_code", "pid"},
3030
KeyWord: []string{"企业名称", "法人代表", "经营状态", "电话", "邮箱", "注册资本", "成立日期", "注册地址", "经营范围", "统一社会信用代码", "PID"},
3131
},
3232
"icp": {
3333
Name: "ICP备案",
34-
Only: "domain",
35-
JField: []string{"website_name", "website", "domain", "icp", "company_name"},
34+
Field: []string{"website_name", "website", "domain", "icp", "company_name"},
3635
KeyWord: []string{"网站名称", "网址", "域名", "网站备案/许可证号", "公司名称"},
3736
},
3837
"wx_app": {
3938
Name: "微信小程序",
40-
JField: []string{"name", "category", "logo", "qrcode", "read_num"},
39+
Field: []string{"name", "category", "logo", "qrcode", "read_num"},
4140
KeyWord: []string{"名称", "分类", "头像", "二维码", "阅读量"},
4241
},
4342
"wechat": {
4443
Name: "微信公众号",
45-
JField: []string{"name", "wechat_id", "description", "qrcode", "avatar"},
44+
Field: []string{"name", "wechat_id", "description", "qrcode", "avatar"},
4645
KeyWord: []string{"名称", "ID", "简介", "二维码", "头像"},
4746
},
4847
"weibo": {
4948
Name: "微博",
50-
JField: []string{"name", "profile_url", "description", "avatar"},
49+
Field: []string{"name", "profile_url", "description", "avatar"},
5150
KeyWord: []string{"微博昵称", "链接", "简介", "头像"},
5251
},
5352
"supplier": {
5453
Name: "供应商",
55-
JField: []string{"name", "scale", "amount", "report_time", "data_source", "relation", "pid"},
54+
Field: []string{"name", "scale", "amount", "report_time", "data_source", "relation", "pid"},
5655
KeyWord: []string{"名称", "金额占比", "金额", "报告期/公开时间", "数据来源", "关联关系", "PID"},
5756
},
5857
"job": {
5958
Name: "招聘",
60-
JField: []string{"name", "education", "location", "publish_time", "salary"},
59+
Field: []string{"name", "education", "location", "publish_time", "salary"},
6160
KeyWord: []string{"招聘职位", "学历", "办公地点", "发布日期", "薪资"},
6261
},
6362
"invest": {
6463
Name: "投资",
65-
JField: []string{"name", "legal_person", "status", "scale", "pid"},
64+
Field: []string{"name", "legal_person", "status", "scale", "pid"},
6665
KeyWord: []string{"企业名称", "法人", "状态", "投资比例", "PID"},
6766
},
6867
"branch": {
6968
Name: "分支机构",
70-
JField: []string{"name", "legal_person", "status", "pid"},
69+
Field: []string{"name", "legal_person", "status", "pid"},
7170
KeyWord: []string{"企业名称", "法人", "状态", "PID"},
7271
},
7372
"holds": {
7473
Name: "控股企业",
75-
JField: []string{"name", "legal_person", "status", "scale", "level", "pid"},
74+
Field: []string{"name", "legal_person", "status", "scale", "level", "pid"},
7675
KeyWord: []string{"企业名称", "法人", "状态", "投资比例", "持股层级", "PID"},
7776
},
7877
"app": {
7978
Name: "APP",
80-
JField: []string{"name", "category", "version", "update_at", "description", "logo", "bundle_id", "link", "market"},
79+
Field: []string{"name", "category", "version", "update_at", "description", "logo", "bundle_id", "link", "market"},
8180
KeyWord: []string{"名称", "分类", "当前版本", "更新时间", "简介", "logo", "Bundle ID", "链接", "market"},
8281
},
8382
"copyright": {
8483
Name: "软件著作权",
85-
JField: []string{"name", "short_name", "category", "reg_num", "pub_type"},
84+
Field: []string{"name", "short_name", "category", "reg_num", "pub_type"},
8685
KeyWord: []string{"软件全称", "软件简称", "分类", "登记号", "权利取得方式"},
8786
},
8887
"partner": {
8988
Name: "股东信息",
90-
JField: []string{"name", "scale", "reg_cap", "pid"},
89+
Field: []string{"name", "scale", "reg_cap", "pid"},
9190
KeyWord: []string{"股东名称", "持股比例", "认缴出资金额", "PID"},
9291
},
9392
}
9493

94+
func DataToMap(info []gjson.Result, en *EnsGo, em *EnsGo, ext string) (res []map[string]string) {
95+
for _, v := range info {
96+
strData := make(map[string]string, len(em.Field)+1)
97+
// 获取字段值并转换为字符串
98+
for i, field := range em.Field {
99+
// 判断是否最后一位字符,如果是那就是要加入from字段的
100+
if i == len(em.Field)-1 && i >= len(en.Field) {
101+
strData["ref"] = v.Get(field).String()
102+
} else {
103+
strData[en.Field[i]] = v.Get(field).String()
104+
}
105+
}
106+
// 添加额外信息,用于后期展示
107+
strData["extra"] = ext
108+
res = append(res, strData)
109+
}
110+
return res
111+
}
112+
95113
// InfoToMap 将输出的json转为统一map格式
96114
func InfoToMap(infos map[string][]gjson.Result, enMap map[string]*EnsGo, extraInfo string) (res map[string][]map[string]string) {
97115
res = make(map[string][]map[string]string)
98116
for k, info := range infos {
99-
en := ENSMapLN[k].JField
100-
// 分类信息
101-
var data []map[string]string
102-
for _, v := range info {
103-
strData := make(map[string]string, len(enMap[k].Field)+1)
104-
// 获取字段值并转换为字符串
105-
for i, field := range enMap[k].Field {
106-
if i == len(enMap[k].Field)-1 && i >= len(en) {
107-
strData["from"] = v.Get(field).String()
108-
} else {
109-
strData[en[i]] = v.Get(field).String()
110-
}
111-
}
112-
// 添加额外信息,用于后期展示
113-
strData["extra"] = extraInfo
114-
data = append(data, strData)
117+
// 判断是否有这个类型,有时候数据可能会比较混杂
118+
if _, ok := enMap[k]; !ok {
119+
continue
115120
}
116-
// 信息全部存入
117-
res[k] = data
121+
res[k] = DataToMap(info, ENSMapLN[k], enMap[k], extraInfo)
118122
}
119123
return res
120124
}
121125

122126
func OutStrByEnInfo(data map[string][]map[string]string, types string) (str string) {
123127
var builder strings.Builder
124128
s := data[types]
125-
em := ENSMapLN[types].JField
129+
em := ENSMapLN[types].Field
126130
for _, m := range s {
127131
first := true
128-
for _, key := range append(em, "from", "extra") {
132+
for _, key := range append(em, "ref", "extra") {
129133
if !first { // 如果不是第一个元素,则先写入逗号
130134
builder.WriteString(",")
131135
}
@@ -139,6 +143,23 @@ func OutStrByEnInfo(data map[string][]map[string]string, types string) (str stri
139143
return str
140144
}
141145

146+
func OriginalToMapList(infos []gjson.Result, em *EnsGo) (res []map[string]string) {
147+
for _, info := range infos {
148+
res = append(res, OriginalToMap(info, em))
149+
}
150+
return res
151+
}
152+
153+
func OriginalToMap(info gjson.Result, em *EnsGo) (res map[string]string) {
154+
// 获取字段值并转换为字符串
155+
res = make(map[string]string)
156+
for _, field := range em.Field {
157+
// 判断是否最后一位字符,如果是那就是要加入from字段的
158+
res[field] = info.Get(field).String()
159+
}
160+
return res
161+
}
162+
142163
func OutFileByEnInfo(data map[string][]map[string]string, name string, types string, dir string) (err error) {
143164
if dir == "!" {
144165
gologger.Debug().Str("设定DIR", dir).Msgf("不导出文件")
@@ -182,7 +203,7 @@ func OutFileByEnInfo(data map[string][]map[string]string, name string, types str
182203
for i, m := range v {
183204
if len(m) > 0 {
184205
// 把信息全部提取出来,转为interface
185-
for _, p := range append(em.JField, "from", "extra") {
206+
for _, p := range append(em.Field, "ref", "extra") {
186207
exData[i] = append(exData[i], m[p])
187208
}
188209
}

0 commit comments

Comments
 (0)