Skip to content

Commit f5997c5

Browse files
committed
feat: add docker tag-level metrics for granular download tracking
- Add new metrics for individual docker tags: - ghcr_tag_downloads: download count per tag - ghcr_tag_last_published_timestamp: when each tag was published - ghcr_package_tags: total number of tags per package - Add getTagDownloadStats function to scrape tag-specific download data - Add updateTagMetrics function to process all tags for each package - This enables tracking which specific versions get downloaded more - Allows monitoring of older versions fading out of use over time
1 parent 3e00c75 commit f5997c5

File tree

4 files changed

+210
-1
lines changed

4 files changed

+210
-1
lines changed

internal/collectors/ghcr_collector.go

Lines changed: 176 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -385,13 +385,59 @@ func (gc *GHCRCollector) updatePackageMetrics(ctx context.Context, pkg config.Pa
385385
gc.metrics.PackageLastPublishedGauge.WithLabelValues(pkg.Owner, pkg.Repo).Set(float64(lastPublished.Unix()))
386386
}
387387

388+
// Update tag-level metrics
389+
gc.updateTagMetrics(ctx, pkg, versions)
390+
388391
slog.Info("Updated package metrics",
389392
"package", pkg.Repo,
390393
"version_count", packageInfo.VersionCount,
391394
"download_count", downloadCount,
392395
"last_published", lastPublished.Format(time.RFC3339))
393396
}
394397

398+
// updateTagMetrics updates metrics for individual docker tags
399+
func (gc *GHCRCollector) updateTagMetrics(ctx context.Context, pkg config.PackageGroup, versions []GHCRVersionResponse) {
400+
totalTags := 0
401+
tagCount := 0
402+
403+
for _, version := range versions {
404+
// Parse the created_at timestamp
405+
created, err := time.Parse(time.RFC3339, version.CreatedAt)
406+
if err != nil {
407+
slog.Warn("Failed to parse version timestamp", "version", version.Name, "error", err)
408+
continue
409+
}
410+
411+
// Process each tag for this version
412+
for _, tag := range version.Metadata.Container.Tags {
413+
totalTags++
414+
415+
// Update tag last published timestamp
416+
gc.metrics.TagLastPublishedGauge.WithLabelValues(pkg.Owner, pkg.Repo, tag).Set(float64(created.Unix()))
417+
418+
// Try to get download statistics for this specific tag
419+
tagDownloads, err := gc.getTagDownloadStats(ctx, pkg.Owner, pkg.Repo, tag)
420+
if err != nil {
421+
slog.Debug("Failed to get tag download statistics", "owner", pkg.Owner, "repo", pkg.Repo, "tag", tag, "error", err)
422+
// Set to -1 to indicate no data available
423+
gc.metrics.TagDownloadsGauge.WithLabelValues(pkg.Owner, pkg.Repo, tag).Set(-1)
424+
} else {
425+
gc.metrics.TagDownloadsGauge.WithLabelValues(pkg.Owner, pkg.Repo, tag).Set(float64(tagDownloads))
426+
427+
tagCount++
428+
}
429+
}
430+
}
431+
432+
// Update total tag count for this package
433+
gc.metrics.TagCountGauge.WithLabelValues(pkg.Owner, pkg.Repo).Set(float64(totalTags))
434+
435+
slog.Info("Updated tag metrics",
436+
"package", pkg.Repo,
437+
"total_tags", totalTags,
438+
"tags_with_download_data", tagCount)
439+
}
440+
395441
func (gc *GHCRCollector) retryWithBackoff(operation func() error, maxRetries int, initialDelay time.Duration) error {
396442
var lastErr error
397443

@@ -581,6 +627,136 @@ func (gc *GHCRCollector) getPackageDownloadStats(ctx context.Context, owner, pac
581627
return downloadCount, nil
582628
}
583629

630+
// getTagDownloadStats scrapes the tag page to get download statistics for a specific tag
631+
func (gc *GHCRCollector) getTagDownloadStats(ctx context.Context, owner, packageName, tag string) (int64, error) {
632+
slog.Debug("Starting tag download statistics collection", "owner", owner, "package", packageName, "tag", tag)
633+
634+
// Construct the tag page URL
635+
tagURL := fmt.Sprintf("https://github.com/%s/%s/pkgs/container/%s?tag=%s", owner, packageName, packageName, tag)
636+
slog.Debug("Constructed tag URL", "url", tagURL)
637+
638+
// Create request to the tag page
639+
req, err := http.NewRequestWithContext(ctx, http.MethodGet, tagURL, nil)
640+
if err != nil {
641+
slog.Error("Failed to create HTTP request for tag", "owner", owner, "package", packageName, "tag", tag, "error", err)
642+
return 0, fmt.Errorf("failed to create request: %w", err)
643+
}
644+
645+
// Set headers to mimic a browser request
646+
req.Header.Set("User-Agent", "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36")
647+
req.Header.Set("Accept", "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7")
648+
req.Header.Set("Accept-Language", "en-US,en;q=0.9")
649+
req.Header.Set("Accept-Encoding", "gzip, deflate, br")
650+
req.Header.Set("DNT", "1")
651+
req.Header.Set("Connection", "keep-alive")
652+
req.Header.Set("Upgrade-Insecure-Requests", "1")
653+
req.Header.Set("Sec-Fetch-Dest", "document")
654+
req.Header.Set("Sec-Fetch-Mode", "navigate")
655+
req.Header.Set("Sec-Fetch-Site", "none")
656+
req.Header.Set("Sec-Fetch-User", "?1")
657+
req.Header.Set("Cache-Control", "max-age=0")
658+
659+
// Make the request
660+
resp, err := gc.client.Do(req)
661+
if err != nil {
662+
slog.Error("Failed to fetch tag page", "owner", owner, "package", packageName, "tag", tag, "url", tagURL, "error", err)
663+
return 0, fmt.Errorf("failed to fetch tag page: %w", err)
664+
}
665+
666+
defer func() {
667+
if closeErr := resp.Body.Close(); closeErr != nil {
668+
slog.Warn("Failed to close response body", "error", closeErr)
669+
}
670+
}()
671+
672+
if resp.StatusCode != http.StatusOK {
673+
slog.Error("Tag page returned non-OK status", "owner", owner, "package", packageName, "tag", tag, "status_code", resp.StatusCode, "url", tagURL)
674+
return 0, fmt.Errorf("tag page returned status %d", resp.StatusCode)
675+
}
676+
677+
// Read the response body
678+
body, err := io.ReadAll(resp.Body)
679+
if err != nil {
680+
slog.Error("Failed to read response body", "owner", owner, "package", packageName, "tag", tag, "error", err)
681+
return 0, fmt.Errorf("failed to read response body: %w", err)
682+
}
683+
684+
// Handle gzip decompression if needed
685+
if resp.Header.Get("Content-Encoding") == "gzip" {
686+
gzReader, err := gzip.NewReader(strings.NewReader(string(body)))
687+
if err != nil {
688+
slog.Error("Failed to create gzip reader", "owner", owner, "package", packageName, "tag", tag, "error", err)
689+
return 0, fmt.Errorf("failed to create gzip reader: %w", err)
690+
}
691+
692+
defer func() {
693+
if closeErr := gzReader.Close(); closeErr != nil {
694+
slog.Warn("Failed to close gzip reader", "error", closeErr)
695+
}
696+
}()
697+
698+
// Read the decompressed content
699+
decompressedBody, err := io.ReadAll(gzReader)
700+
if err != nil {
701+
slog.Error("Failed to read decompressed body", "owner", owner, "package", packageName, "tag", tag, "error", err)
702+
return 0, fmt.Errorf("failed to read decompressed body: %w", err)
703+
}
704+
705+
body = decompressedBody
706+
}
707+
708+
// Parse the HTML document to find download statistics
709+
htmlContent := string(body)
710+
lines := strings.Split(htmlContent, "\n")
711+
712+
var downloadLine string
713+
714+
// Look for download statistics in the tag page
715+
for i, line := range lines {
716+
if strings.Contains(line, "Total downloads") {
717+
if i+1 < len(lines) {
718+
downloadLine = strings.TrimSpace(lines[i+1])
719+
slog.Debug("Found download line after 'Total downloads'", "line", downloadLine)
720+
721+
break
722+
}
723+
}
724+
}
725+
726+
if downloadLine == "" {
727+
slog.Debug("Download statistics not found for tag", "owner", owner, "package", packageName, "tag", tag)
728+
return 0, fmt.Errorf("download statistics not found in tag page")
729+
}
730+
731+
// Extract the title attribute which contains the full number
732+
titleStart := strings.Index(downloadLine, `title="`)
733+
if titleStart == -1 {
734+
slog.Debug("Download count title attribute not found for tag", "owner", owner, "package", packageName, "tag", tag, "line", downloadLine)
735+
return 0, fmt.Errorf("download count title attribute not found")
736+
}
737+
738+
titleStart += 7 // Skip 'title="'
739+
740+
titleEnd := strings.Index(downloadLine[titleStart:], `"`)
741+
if titleEnd == -1 {
742+
slog.Debug("Download count title attribute malformed for tag", "owner", owner, "package", packageName, "tag", tag, "line", downloadLine)
743+
return 0, fmt.Errorf("download count title attribute malformed")
744+
}
745+
746+
title := downloadLine[titleStart : titleStart+titleEnd]
747+
748+
// Parse the download count from the title attribute
749+
downloadCount, err := strconv.ParseInt(title, 10, 64)
750+
if err != nil {
751+
slog.Debug("Failed to parse download count for tag", "owner", owner, "package", packageName, "tag", tag, "title", title, "error", err)
752+
return 0, fmt.Errorf("failed to parse download count %s: %w", title, err)
753+
}
754+
755+
slog.Debug("Successfully extracted tag download statistics", "owner", owner, "package", packageName, "tag", tag, "download_count", downloadCount, "raw_title", title)
756+
757+
return downloadCount, nil
758+
}
759+
584760
// getOwnerPackages retrieves all packages for a given owner
585761
func (gc *GHCRCollector) getOwnerPackages(ctx context.Context, owner string) ([]GHCRPackageResponse, error) {
586762
slog.Info("Getting packages for owner", "owner", owner)

internal/collectors/ghcr_collector_test.go

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,6 @@ import (
1212

1313
"ghcr-exporter/internal/config"
1414
"ghcr-exporter/internal/metrics"
15-
1615
"github.com/prometheus/client_golang/prometheus"
1716
)
1817

internal/config/config.go

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -106,6 +106,7 @@ func (p PackageGroup) GetName() string {
106106
if p.Repo == "" {
107107
return p.Owner + "-all"
108108
}
109+
109110
return p.Owner + "-" + p.Repo
110111
}
111112

internal/metrics/registry.go

Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,11 @@ type Registry struct {
1515
PackageLastPublishedGauge *prometheus.GaugeVec
1616
PackageDownloadStatsGauge *prometheus.GaugeVec
1717

18+
// Tag-level metrics
19+
TagDownloadsGauge *prometheus.GaugeVec
20+
TagLastPublishedGauge *prometheus.GaugeVec
21+
TagCountGauge *prometheus.GaugeVec
22+
1823
// Collection statistics
1924
CollectionFailedCounter *prometheus.CounterVec
2025
CollectionSuccessCounter *prometheus.CounterVec
@@ -84,6 +89,34 @@ func NewRegistry() *Registry {
8489
)
8590
r.addMetricInfo("ghcr_package_last_published_timestamp", "Timestamp of the last published version for a GHCR package", []string{"owner", "repo"})
8691

92+
// Tag-level metrics
93+
r.TagDownloadsGauge = promauto.NewGaugeVec(
94+
prometheus.GaugeOpts{
95+
Name: "ghcr_tag_downloads",
96+
Help: "Total number of downloads for a specific docker tag (scraped from package page)",
97+
},
98+
[]string{"owner", "repo", "tag"},
99+
)
100+
r.addMetricInfo("ghcr_tag_downloads", "Total downloads for a specific docker tag", []string{"owner", "repo", "tag"})
101+
102+
r.TagLastPublishedGauge = promauto.NewGaugeVec(
103+
prometheus.GaugeOpts{
104+
Name: "ghcr_tag_last_published_timestamp",
105+
Help: "Timestamp when a specific docker tag was last published",
106+
},
107+
[]string{"owner", "repo", "tag"},
108+
)
109+
r.addMetricInfo("ghcr_tag_last_published_timestamp", "Timestamp when a specific docker tag was last published", []string{"owner", "repo", "tag"})
110+
111+
r.TagCountGauge = promauto.NewGaugeVec(
112+
prometheus.GaugeOpts{
113+
Name: "ghcr_package_tags",
114+
Help: "Total number of tags for a GHCR package",
115+
},
116+
[]string{"owner", "repo"},
117+
)
118+
r.addMetricInfo("ghcr_package_tags", "Total number of tags for a GHCR package", []string{"owner", "repo"})
119+
87120
r.CollectionFailedCounter = promauto.NewCounterVec(
88121
prometheus.CounterOpts{
89122
Name: "ghcr_collection_failed_total",

0 commit comments

Comments
 (0)