-
Notifications
You must be signed in to change notification settings - Fork 293
Expand file tree
/
Copy pathmain.go
More file actions
466 lines (405 loc) · 14.1 KB
/
main.go
File metadata and controls
466 lines (405 loc) · 14.1 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
// package main combines CVEs and security advisories into OSV records.
package main
import (
"cmp"
"context"
"errors"
"flag"
"fmt"
"io/fs"
"log/slog"
"os"
"path/filepath"
"sort"
"slices"
"strings"
"cloud.google.com/go/storage"
"github.com/google/osv/vulnfeeds/cves"
gitpurl "github.com/google/osv/vulnfeeds/git"
"github.com/google/osv/vulnfeeds/models"
"github.com/google/osv/vulnfeeds/upload"
"github.com/google/osv/vulnfeeds/utility/logger"
"github.com/ossf/osv-schema/bindings/go/osvschema"
"google.golang.org/api/iterator"
"google.golang.org/protobuf/encoding/protojson"
)
const (
defaultOSVOutputPath = "osv-output"
defaultCVE5Path = "cve5"
defaultNVDOSVPath = "nvd"
)
func main() {
logger.InitGlobalLogger()
cve5Path := flag.String("cve5-path", defaultCVE5Path, "Path to CVE5 OSV files")
nvdPath := flag.String("nvd-path", defaultNVDOSVPath, "Path to NVD OSV files")
osvOutputPath := flag.String("osv-output-path", defaultOSVOutputPath, "Local output path of combined OSV files, or GCS prefix if uploading.")
outputBucketName := flag.String("output-bucket", "osv-test-cve-osv-conversion", "The GCS bucket to write to.")
overridesBucketName := flag.String("overrides-bucket", "osv-test-cve-osv-conversion", "The GCS bucket to read overrides from.")
uploadToGCS := flag.Bool("upload-to-gcs", false, "If true, upload to GCS bucket instead of writing to local disk.")
numWorkers := flag.Int("workers", 64, "Number of workers to process records")
syncDeletions := flag.Bool("sync-deletions", false, "If false, do not delete files in bucket that are not local")
flag.Parse()
err := os.MkdirAll(*osvOutputPath, 0755)
if err != nil {
logger.Fatal("Can't create output path", slog.Any("err", err))
}
// Load CVE5 OSVs
allCVE5 := loadOSV(*cve5Path)
// Load NVD OSVs
allNVD := loadOSV(*nvdPath)
debianCVEs, err := listBucketObjects("osv-test-debian-osv", "/debian-cve-osv")
if err != nil {
logger.Warn("Failed to list debian cves", slog.Any("err", err))
} else {
for i, filename := range debianCVEs {
cve := extractCVEName(filename, "DEBIAN-")
if cve != "" {
debianCVEs[i] = cve
}
}
}
// run extract file name on each element in debianCVEs and alpineCVEs.
alpineCVEs, err := listBucketObjects("osv-test-cve-osv-conversion", "/alpine")
if err != nil {
logger.Warn("Failed to list alpine cves", slog.Any("err", err))
} else {
for i, filename := range alpineCVEs {
cve := extractCVEName(filename, "ALPINE-")
if cve != "" {
alpineCVEs[i] = cve
}
}
}
// this ensures the creation of CVEs even if they don't have packages
// to ensure Alpine and Debian CVEs have an upstream CVE.
// linter is compaining that we aren't appending to the same slice, but we
// just want to combine these two arrays with a more descriptive name.
mandatoryCVEIDs := append(debianCVEs, alpineCVEs...) //nolint:gocritic
combinedData := combineIntoOSV(allCVE5, allNVD, mandatoryCVEIDs)
ctx := context.Background()
vulnerabilities := make([]*osvschema.Vulnerability, 0, len(combinedData))
for _, v := range combinedData {
vulnerabilities = append(vulnerabilities, v)
}
upload.Upload(ctx, "OSV files", *uploadToGCS, *outputBucketName, *overridesBucketName, *numWorkers, *osvOutputPath, vulnerabilities, *syncDeletions)
}
// extractCVEName extracts the CVE name from a given filename and prefix.
// It returns an empty string if the filename does not start with "CVE".
func extractCVEName(filename string, prefix string) string {
cleaned := strings.TrimPrefix(filename, prefix)
cleaned = strings.TrimSuffix(cleaned, ".json")
pre := strings.SplitAfter(cleaned, "-")
if pre[0] != "CVE" {
return ""
}
return cleaned
}
// listBucketObjects lists the names of all objects in a Google Cloud Storage bucket.
// It does not download the file contents.
func listBucketObjects(bucketName string, prefix string) ([]string, error) {
ctx := context.Background()
client, err := storage.NewClient(ctx)
if err != nil {
return nil, fmt.Errorf("storage.NewClient: %w", err)
}
defer client.Close()
bucket := client.Bucket(bucketName)
it := bucket.Objects(ctx, &storage.Query{Prefix: prefix})
var filenames []string
for {
attrs, err := it.Next()
if errors.Is(err, iterator.Done) {
break // All objects have been listed.
}
if err != nil {
return nil, fmt.Errorf("bucket.Objects: %w", err)
}
filenames = append(filenames, attrs.Name, prefix)
}
return filenames, nil
}
// loadOSV recursively loads all OSV vulnerabilities from a given directory path.
// It walks the directory, reads each ".json" file, and decodes it into an osvschema.Vulnerability object.
// The function returns a map of CVE IDs to their corresponding Vulnerability objects.
// Files that are not ".json" files, directories, or files ending in ".metrics.json" are skipped.
// The function will log warnings for files that fail to open or decode, and will terminate if it fails to walk the directory.
func loadOSV(osvPath string) map[models.CVEID]*osvschema.Vulnerability {
allVulns := make(map[models.CVEID]*osvschema.Vulnerability)
logger.Info("Loading OSV records", slog.String("path", osvPath))
err := filepath.WalkDir(osvPath, func(path string, d fs.DirEntry, err error) error {
if err != nil {
return err
}
if d.IsDir() || !strings.HasSuffix(path, ".json") || strings.HasSuffix(path, ".metrics.json") {
return nil
}
file, err := os.ReadFile(path)
if err != nil {
logger.Warn("Failed to open OSV JSON file", slog.String("path", path), slog.Any("err", err))
return nil
}
var vuln osvschema.Vulnerability
decodeErr := protojson.Unmarshal(file, &vuln)
if decodeErr != nil {
logger.Error("Failed to decode, skipping", slog.String("file", path), slog.Any("err", decodeErr))
return nil
}
allVulns[models.CVEID(vuln.GetId())] = &vuln
return nil
})
if err != nil {
logger.Fatal("Failed to walk OSV directory", slog.String("path", osvPath), slog.Any("err", err))
}
return allVulns
}
// combineIntoOSV creates OSV entry by combining loaded CVEs from NVD and PackageInfo information from security advisories.
func combineIntoOSV(cve5osv map[models.CVEID]*osvschema.Vulnerability, nvdosv map[models.CVEID]*osvschema.Vulnerability, mandatoryCVEIDs []string) map[models.CVEID]*osvschema.Vulnerability {
osvRecords := make(map[models.CVEID]*osvschema.Vulnerability)
// Iterate through CVEs from security advisories (cve5) as the base
for cveID, cve5 := range cve5osv {
var baseOSV *osvschema.Vulnerability
nvd, ok := nvdosv[cveID]
if ok {
baseOSV = combineTwoOSVRecords(cve5, nvd)
// The CVE is processed, so remove it from the nvdosv map to avoid re-processing.
delete(nvdosv, cveID)
} else {
baseOSV = cve5
}
if len(baseOSV.GetAffected()) == 0 {
// check if part exists.
if !slices.Contains(mandatoryCVEIDs, string(cveID)) {
continue
}
}
osvRecords[cveID] = baseOSV
}
// Add any remaining CVEs from NVD that were not in the advisory data.
for cveID, nvd := range nvdosv {
if len(nvd.GetAffected()) == 0 {
continue
}
enrichRepoPURLs(convertedCve)
osvRecords[cveID] = nvd
}
return osvRecords
}
// combineTwoOSVRecords takes two osv records and combines them into one
func combineTwoOSVRecords(cve5 *osvschema.Vulnerability, nvd *osvschema.Vulnerability) *osvschema.Vulnerability {
baseOSV := cve5
combinedAffected := pickAffectedInformation(cve5.GetAffected(), nvd.GetAffected())
baseOSV.Affected = combinedAffected
// Merge references, ensuring no duplicates.
refMap := make(map[string]bool)
for _, r := range baseOSV.GetReferences() {
refMap[r.GetUrl()] = true
}
for _, r := range nvd.GetReferences() {
if !refMap[r.GetUrl()] {
baseOSV.References = append(baseOSV.References, r)
refMap[r.GetUrl()] = true
}
}
// Merge timestamps: latest modified, earliest published.
cve5Modified := baseOSV.GetModified()
if nvd.GetModified().AsTime().After(cve5Modified.AsTime()) {
baseOSV.Modified = nvd.GetModified()
}
cve5Published := baseOSV.GetPublished()
if nvd.GetPublished().AsTime().Before(cve5Published.AsTime()) {
baseOSV.Published = nvd.GetPublished()
}
// Merge aliases, ensuring no duplicates.
aliasMap := make(map[string]bool)
for _, alias := range baseOSV.GetAliases() {
aliasMap[alias] = true
}
for _, alias := range nvd.GetAliases() {
if !aliasMap[alias] {
baseOSV.Aliases = append(baseOSV.Aliases, alias)
aliasMap[alias] = true
}
}
return baseOSV
}
// pickAffectedInformation merges information from nvdAffected into cve5Affected.
// It matches affected packages by the repo URL in their version ranges.
// If a match is found, it merges the version range information, preferring the entry
// with more ranges. Unmatched nvdAffected packages are appended.
// It returns a new slice and does not modify cve5Affected in place.
func pickAffectedInformation(cve5Affected []*osvschema.Affected, nvdAffected []*osvschema.Affected) []*osvschema.Affected {
if len(nvdAffected) == 0 {
return cve5Affected
}
// If NVD has more affected packages, prefer it entirely.
if len(cve5Affected) == 0 || len(nvdAffected) > len(cve5Affected) {
return nvdAffected
}
nvdRepoMap := make(map[string][]*osvschema.Range)
for _, affected := range nvdAffected {
for _, r := range affected.GetRanges() {
if r.GetRepo() != "" {
repo := strings.ToLower(r.GetRepo())
nvdRepoMap[repo] = append(nvdRepoMap[repo], r)
}
}
}
cve5RepoMap := make(map[string][]*osvschema.Range)
for _, affected := range cve5Affected {
for _, r := range affected.GetRanges() {
if r.GetRepo() != "" {
repo := strings.ToLower(r.GetRepo())
cve5RepoMap[repo] = append(cve5RepoMap[repo], r)
}
}
}
newRepoAffectedMap := make(map[string]*osvschema.Affected)
// Finds ranges with the same repo and merges them into one affected set.
for repo, cveRanges := range cve5RepoMap {
if nvdRanges, ok := nvdRepoMap[repo]; ok {
var newAffectedRanges []*osvschema.Range
// Found a match. If NVD has more ranges, use its ranges.
if len(nvdRanges) > len(cveRanges) {
// just use the nvd ranges
newAffectedRanges = nvdRanges
} else if len(cveRanges) == 1 && len(nvdRanges) == 1 {
c5Intro, c5Fixed := getRangeBoundaryVersions(cveRanges[0].GetEvents())
nvdIntro, nvdFixed := getRangeBoundaryVersions(nvdRanges[0].GetEvents())
// Prefer cve5 data, but use nvd data if cve5 data is missing.
if c5Intro == "" {
c5Intro = nvdIntro
}
if c5Fixed == "" {
c5Fixed = nvdFixed
}
if c5Intro != "" || c5Fixed != "" {
newRange := cves.BuildVersionRange(c5Intro, "", c5Fixed)
newRange.Repo = repo
newRange.Type = osvschema.Range_GIT // Preserve the repo
newAffectedRanges = append(newAffectedRanges, newRange)
}
} else {
newAffectedRanges = cveRanges
}
// Remove from map so we know which NVD packages are left.
delete(nvdRepoMap, repo)
newRepoAffectedMap[repo] = &osvschema.Affected{
Ranges: newAffectedRanges,
}
} else {
newRepoAffectedMap[repo] = &osvschema.Affected{
Ranges: cveRanges,
}
}
}
// Add remaining NVD packages that were not in cve5.
for repo, nvdRange := range nvdRepoMap {
newRepoAffectedMap[repo] = &osvschema.Affected{
Ranges: nvdRange,
}
}
var combinedAffected []*osvschema.Affected //nolint:prealloc
for _, aff := range newRepoAffectedMap {
combinedAffected = append(combinedAffected, aff)
}
// sort by repo
slices.SortFunc(combinedAffected, func(a, b *osvschema.Affected) int {
return cmp.Compare(a.GetRanges()[0].GetRepo(), b.GetRanges()[0].GetRepo())
})
return combinedAffected
}
// getRangeBoundaryVersions extracts the introduced and fixed versions from a slice of OSV events.
// It iterates through the events and returns the last non-empty "introduced" and "fixed" versions found.
func getRangeBoundaryVersions(events []*osvschema.Event) (introduced, fixed string) {
for _, e := range events {
if e.GetIntroduced() != "0" && e.GetIntroduced() != "" {
introduced = e.GetIntroduced()
}
if e.GetFixed() != "" {
fixed = e.GetFixed()
}
}
return introduced, fixed
}
// repoURLFromRanges returns the first repo URL from a GIT-type range, if present.
func repoURLFromRanges(ranges []osvschema.Range) string {
for _, r := range ranges {
if r.Type == "GIT" && r.Repo != "" {
return r.Repo
}
}
return ""
}
// enrichRepoPURLs sets affected.package.purl to an unversioned pkg:generic repo pURL
// when a GIT range with a repo URL exists and purl is currently empty.
func enrichRepoPURLs(v *vulns.Vulnerability) {
if v == nil || len(v.Affected) == 0 {
return
}
for i := range v.Affected {
aff := &v.Affected[i]
// Ensure base purl is set (unversioned).
if aff.Package.Purl == "" {
if repo := repoURLFromRanges(aff.Ranges); repo != "" {
if p, err := gitpurl.BuildGenericRepoPURL(repo); err == nil && p != "" {
aff.Package.Purl = p
}
}
}
// Add versioned repo pURLs when possible.
if repo := repoURLFromRanges(aff.Ranges); repo != "" {
addVersionedRepoPURLs(aff, repo)
}
}
}
var repoTagsCache = make(gitpurl.RepoTagsCache)
// addVersionedRepoPURLs populates affected.database_specific["repo_purls"]
// with pkg:generic/...@<tag> entries, using affected.versions if available.
func addVersionedRepoPURLs(aff *osvschema.Affected, repo string) {
if aff == nil || repo == "" {
return
}
var tags []string
if len(aff.Versions) > 0 {
tags = append(tags, aff.Versions...)
} else if os.Getenv("ENABLE_REPO_PURL_TAGS") == "1" {
norm, err := gitpurl.NormalizeRepoTags(repo, repoTagsCache)
if err == nil && len(norm) > 0 {
for tag := range norm {
tags = append(tags, tag)
}
sort.Strings(tags)
const maxTags = 200
if len(tags) > maxTags {
tags = tags[:maxTags]
}
}
}
if len(tags) == 0 {
return
}
base, err := gitpurl.BuildGenericRepoPURL(repo)
if err != nil || base == "" {
return
}
// Dedup and format.
seen := make(map[string]struct{}, len(tags))
vPURLs := make([]string, 0, len(tags))
for _, t := range tags {
if t == "" {
continue
}
if _, ok := seen[t]; ok {
continue
}
seen[t] = struct{}{}
vPURLs = append(vPURLs, base+"@"+t)
}
if len(vPURLs) == 0 {
return
}
if aff.DatabaseSpecific == nil {
aff.DatabaseSpecific = map[string]any{}
}
aff.DatabaseSpecific["repo_purls"] = vPURLs
}