@@ -385,13 +385,59 @@ func (gc *GHCRCollector) updatePackageMetrics(ctx context.Context, pkg config.Pa
385385 gc .metrics .PackageLastPublishedGauge .WithLabelValues (pkg .Owner , pkg .Repo ).Set (float64 (lastPublished .Unix ()))
386386 }
387387
388+ // Update tag-level metrics
389+ gc .updateTagMetrics (ctx , pkg , versions )
390+
388391 slog .Info ("Updated package metrics" ,
389392 "package" , pkg .Repo ,
390393 "version_count" , packageInfo .VersionCount ,
391394 "download_count" , downloadCount ,
392395 "last_published" , lastPublished .Format (time .RFC3339 ))
393396}
394397
398+ // updateTagMetrics updates metrics for individual docker tags
399+ func (gc * GHCRCollector ) updateTagMetrics (ctx context.Context , pkg config.PackageGroup , versions []GHCRVersionResponse ) {
400+ totalTags := 0
401+ tagCount := 0
402+
403+ for _ , version := range versions {
404+ // Parse the created_at timestamp
405+ created , err := time .Parse (time .RFC3339 , version .CreatedAt )
406+ if err != nil {
407+ slog .Warn ("Failed to parse version timestamp" , "version" , version .Name , "error" , err )
408+ continue
409+ }
410+
411+ // Process each tag for this version
412+ for _ , tag := range version .Metadata .Container .Tags {
413+ totalTags ++
414+
415+ // Update tag last published timestamp
416+ gc .metrics .TagLastPublishedGauge .WithLabelValues (pkg .Owner , pkg .Repo , tag ).Set (float64 (created .Unix ()))
417+
418+ // Try to get download statistics for this specific tag
419+ tagDownloads , err := gc .getTagDownloadStats (ctx , pkg .Owner , pkg .Repo , tag )
420+ if err != nil {
421+ slog .Debug ("Failed to get tag download statistics" , "owner" , pkg .Owner , "repo" , pkg .Repo , "tag" , tag , "error" , err )
422+ // Set to -1 to indicate no data available
423+ gc .metrics .TagDownloadsGauge .WithLabelValues (pkg .Owner , pkg .Repo , tag ).Set (- 1 )
424+ } else {
425+ gc .metrics .TagDownloadsGauge .WithLabelValues (pkg .Owner , pkg .Repo , tag ).Set (float64 (tagDownloads ))
426+
427+ tagCount ++
428+ }
429+ }
430+ }
431+
432+ // Update total tag count for this package
433+ gc .metrics .TagCountGauge .WithLabelValues (pkg .Owner , pkg .Repo ).Set (float64 (totalTags ))
434+
435+ slog .Info ("Updated tag metrics" ,
436+ "package" , pkg .Repo ,
437+ "total_tags" , totalTags ,
438+ "tags_with_download_data" , tagCount )
439+ }
440+
395441func (gc * GHCRCollector ) retryWithBackoff (operation func () error , maxRetries int , initialDelay time.Duration ) error {
396442 var lastErr error
397443
@@ -581,6 +627,136 @@ func (gc *GHCRCollector) getPackageDownloadStats(ctx context.Context, owner, pac
581627 return downloadCount , nil
582628}
583629
630+ // getTagDownloadStats scrapes the tag page to get download statistics for a specific tag
631+ func (gc * GHCRCollector ) getTagDownloadStats (ctx context.Context , owner , packageName , tag string ) (int64 , error ) {
632+ slog .Debug ("Starting tag download statistics collection" , "owner" , owner , "package" , packageName , "tag" , tag )
633+
634+ // Construct the tag page URL
635+ tagURL := fmt .Sprintf ("https://github.com/%s/%s/pkgs/container/%s?tag=%s" , owner , packageName , packageName , tag )
636+ slog .Debug ("Constructed tag URL" , "url" , tagURL )
637+
638+ // Create request to the tag page
639+ req , err := http .NewRequestWithContext (ctx , http .MethodGet , tagURL , nil )
640+ if err != nil {
641+ slog .Error ("Failed to create HTTP request for tag" , "owner" , owner , "package" , packageName , "tag" , tag , "error" , err )
642+ return 0 , fmt .Errorf ("failed to create request: %w" , err )
643+ }
644+
645+ // Set headers to mimic a browser request
646+ req .Header .Set ("User-Agent" , "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" )
647+ req .Header .Set ("Accept" , "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7" )
648+ req .Header .Set ("Accept-Language" , "en-US,en;q=0.9" )
649+ req .Header .Set ("Accept-Encoding" , "gzip, deflate, br" )
650+ req .Header .Set ("DNT" , "1" )
651+ req .Header .Set ("Connection" , "keep-alive" )
652+ req .Header .Set ("Upgrade-Insecure-Requests" , "1" )
653+ req .Header .Set ("Sec-Fetch-Dest" , "document" )
654+ req .Header .Set ("Sec-Fetch-Mode" , "navigate" )
655+ req .Header .Set ("Sec-Fetch-Site" , "none" )
656+ req .Header .Set ("Sec-Fetch-User" , "?1" )
657+ req .Header .Set ("Cache-Control" , "max-age=0" )
658+
659+ // Make the request
660+ resp , err := gc .client .Do (req )
661+ if err != nil {
662+ slog .Error ("Failed to fetch tag page" , "owner" , owner , "package" , packageName , "tag" , tag , "url" , tagURL , "error" , err )
663+ return 0 , fmt .Errorf ("failed to fetch tag page: %w" , err )
664+ }
665+
666+ defer func () {
667+ if closeErr := resp .Body .Close (); closeErr != nil {
668+ slog .Warn ("Failed to close response body" , "error" , closeErr )
669+ }
670+ }()
671+
672+ if resp .StatusCode != http .StatusOK {
673+ slog .Error ("Tag page returned non-OK status" , "owner" , owner , "package" , packageName , "tag" , tag , "status_code" , resp .StatusCode , "url" , tagURL )
674+ return 0 , fmt .Errorf ("tag page returned status %d" , resp .StatusCode )
675+ }
676+
677+ // Read the response body
678+ body , err := io .ReadAll (resp .Body )
679+ if err != nil {
680+ slog .Error ("Failed to read response body" , "owner" , owner , "package" , packageName , "tag" , tag , "error" , err )
681+ return 0 , fmt .Errorf ("failed to read response body: %w" , err )
682+ }
683+
684+ // Handle gzip decompression if needed
685+ if resp .Header .Get ("Content-Encoding" ) == "gzip" {
686+ gzReader , err := gzip .NewReader (strings .NewReader (string (body )))
687+ if err != nil {
688+ slog .Error ("Failed to create gzip reader" , "owner" , owner , "package" , packageName , "tag" , tag , "error" , err )
689+ return 0 , fmt .Errorf ("failed to create gzip reader: %w" , err )
690+ }
691+
692+ defer func () {
693+ if closeErr := gzReader .Close (); closeErr != nil {
694+ slog .Warn ("Failed to close gzip reader" , "error" , closeErr )
695+ }
696+ }()
697+
698+ // Read the decompressed content
699+ decompressedBody , err := io .ReadAll (gzReader )
700+ if err != nil {
701+ slog .Error ("Failed to read decompressed body" , "owner" , owner , "package" , packageName , "tag" , tag , "error" , err )
702+ return 0 , fmt .Errorf ("failed to read decompressed body: %w" , err )
703+ }
704+
705+ body = decompressedBody
706+ }
707+
708+ // Parse the HTML document to find download statistics
709+ htmlContent := string (body )
710+ lines := strings .Split (htmlContent , "\n " )
711+
712+ var downloadLine string
713+
714+ // Look for download statistics in the tag page
715+ for i , line := range lines {
716+ if strings .Contains (line , "Total downloads" ) {
717+ if i + 1 < len (lines ) {
718+ downloadLine = strings .TrimSpace (lines [i + 1 ])
719+ slog .Debug ("Found download line after 'Total downloads'" , "line" , downloadLine )
720+
721+ break
722+ }
723+ }
724+ }
725+
726+ if downloadLine == "" {
727+ slog .Debug ("Download statistics not found for tag" , "owner" , owner , "package" , packageName , "tag" , tag )
728+ return 0 , fmt .Errorf ("download statistics not found in tag page" )
729+ }
730+
731+ // Extract the title attribute which contains the full number
732+ titleStart := strings .Index (downloadLine , `title="` )
733+ if titleStart == - 1 {
734+ slog .Debug ("Download count title attribute not found for tag" , "owner" , owner , "package" , packageName , "tag" , tag , "line" , downloadLine )
735+ return 0 , fmt .Errorf ("download count title attribute not found" )
736+ }
737+
738+ titleStart += 7 // Skip 'title="'
739+
740+ titleEnd := strings .Index (downloadLine [titleStart :], `"` )
741+ if titleEnd == - 1 {
742+ slog .Debug ("Download count title attribute malformed for tag" , "owner" , owner , "package" , packageName , "tag" , tag , "line" , downloadLine )
743+ return 0 , fmt .Errorf ("download count title attribute malformed" )
744+ }
745+
746+ title := downloadLine [titleStart : titleStart + titleEnd ]
747+
748+ // Parse the download count from the title attribute
749+ downloadCount , err := strconv .ParseInt (title , 10 , 64 )
750+ if err != nil {
751+ slog .Debug ("Failed to parse download count for tag" , "owner" , owner , "package" , packageName , "tag" , tag , "title" , title , "error" , err )
752+ return 0 , fmt .Errorf ("failed to parse download count %s: %w" , title , err )
753+ }
754+
755+ slog .Debug ("Successfully extracted tag download statistics" , "owner" , owner , "package" , packageName , "tag" , tag , "download_count" , downloadCount , "raw_title" , title )
756+
757+ return downloadCount , nil
758+ }
759+
584760// getOwnerPackages retrieves all packages for a given owner
585761func (gc * GHCRCollector ) getOwnerPackages (ctx context.Context , owner string ) ([]GHCRPackageResponse , error ) {
586762 slog .Info ("Getting packages for owner" , "owner" , owner )
0 commit comments