@@ -385,59 +385,13 @@ func (gc *GHCRCollector) updatePackageMetrics(ctx context.Context, pkg config.Pa
385385 gc .metrics .PackageLastPublishedGauge .WithLabelValues (pkg .Owner , pkg .Repo ).Set (float64 (lastPublished .Unix ()))
386386 }
387387
388- // Update tag-level metrics
389- gc .updateTagMetrics (ctx , pkg , versions )
390-
391388 slog .Info ("Updated package metrics" ,
392389 "package" , pkg .Repo ,
393390 "version_count" , packageInfo .VersionCount ,
394391 "download_count" , downloadCount ,
395392 "last_published" , lastPublished .Format (time .RFC3339 ))
396393}
397394
398- // updateTagMetrics updates metrics for individual docker tags
399- func (gc * GHCRCollector ) updateTagMetrics (ctx context.Context , pkg config.PackageGroup , versions []GHCRVersionResponse ) {
400- totalTags := 0
401- tagCount := 0
402-
403- for _ , version := range versions {
404- // Parse the created_at timestamp
405- created , err := time .Parse (time .RFC3339 , version .CreatedAt )
406- if err != nil {
407- slog .Warn ("Failed to parse version timestamp" , "version" , version .Name , "error" , err )
408- continue
409- }
410-
411- // Process each tag for this version
412- for _ , tag := range version .Metadata .Container .Tags {
413- totalTags ++
414-
415- // Update tag last published timestamp
416- gc .metrics .TagLastPublishedGauge .WithLabelValues (pkg .Owner , pkg .Repo , tag ).Set (float64 (created .Unix ()))
417-
418- // Try to get download statistics for this specific tag
419- tagDownloads , err := gc .getTagDownloadStats (ctx , pkg .Owner , pkg .Repo , tag )
420- if err != nil {
421- slog .Debug ("Failed to get tag download statistics" , "owner" , pkg .Owner , "repo" , pkg .Repo , "tag" , tag , "error" , err )
422- // Set to -1 to indicate no data available
423- gc .metrics .TagDownloadsGauge .WithLabelValues (pkg .Owner , pkg .Repo , tag ).Set (- 1 )
424- } else {
425- gc .metrics .TagDownloadsGauge .WithLabelValues (pkg .Owner , pkg .Repo , tag ).Set (float64 (tagDownloads ))
426-
427- tagCount ++
428- }
429- }
430- }
431-
432- // Update total tag count for this package
433- gc .metrics .TagCountGauge .WithLabelValues (pkg .Owner , pkg .Repo ).Set (float64 (totalTags ))
434-
435- slog .Info ("Updated tag metrics" ,
436- "package" , pkg .Repo ,
437- "total_tags" , totalTags ,
438- "tags_with_download_data" , tagCount )
439- }
440-
441395func (gc * GHCRCollector ) retryWithBackoff (operation func () error , maxRetries int , initialDelay time.Duration ) error {
442396 var lastErr error
443397
@@ -627,136 +581,6 @@ func (gc *GHCRCollector) getPackageDownloadStats(ctx context.Context, owner, pac
627581 return downloadCount , nil
628582}
629583
630- // getTagDownloadStats scrapes the tag page to get download statistics for a specific tag
631- func (gc * GHCRCollector ) getTagDownloadStats (ctx context.Context , owner , packageName , tag string ) (int64 , error ) {
632- slog .Debug ("Starting tag download statistics collection" , "owner" , owner , "package" , packageName , "tag" , tag )
633-
634- // Construct the tag page URL
635- tagURL := fmt .Sprintf ("https://github.com/%s/%s/pkgs/container/%s?tag=%s" , owner , packageName , packageName , tag )
636- slog .Debug ("Constructed tag URL" , "url" , tagURL )
637-
638- // Create request to the tag page
639- req , err := http .NewRequestWithContext (ctx , http .MethodGet , tagURL , nil )
640- if err != nil {
641- slog .Error ("Failed to create HTTP request for tag" , "owner" , owner , "package" , packageName , "tag" , tag , "error" , err )
642- return 0 , fmt .Errorf ("failed to create request: %w" , err )
643- }
644-
645- // Set headers to mimic a browser request
646- req .Header .Set ("User-Agent" , "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/120.0.0.0 Safari/537.36" )
647- req .Header .Set ("Accept" , "text/html,application/xhtml+xml,application/xml;q=0.9,image/avif,image/webp,image/apng,*/*;q=0.8,application/signed-exchange;v=b3;q=0.7" )
648- req .Header .Set ("Accept-Language" , "en-US,en;q=0.9" )
649- req .Header .Set ("Accept-Encoding" , "gzip, deflate, br" )
650- req .Header .Set ("DNT" , "1" )
651- req .Header .Set ("Connection" , "keep-alive" )
652- req .Header .Set ("Upgrade-Insecure-Requests" , "1" )
653- req .Header .Set ("Sec-Fetch-Dest" , "document" )
654- req .Header .Set ("Sec-Fetch-Mode" , "navigate" )
655- req .Header .Set ("Sec-Fetch-Site" , "none" )
656- req .Header .Set ("Sec-Fetch-User" , "?1" )
657- req .Header .Set ("Cache-Control" , "max-age=0" )
658-
659- // Make the request
660- resp , err := gc .client .Do (req )
661- if err != nil {
662- slog .Error ("Failed to fetch tag page" , "owner" , owner , "package" , packageName , "tag" , tag , "url" , tagURL , "error" , err )
663- return 0 , fmt .Errorf ("failed to fetch tag page: %w" , err )
664- }
665-
666- defer func () {
667- if closeErr := resp .Body .Close (); closeErr != nil {
668- slog .Warn ("Failed to close response body" , "error" , closeErr )
669- }
670- }()
671-
672- if resp .StatusCode != http .StatusOK {
673- slog .Error ("Tag page returned non-OK status" , "owner" , owner , "package" , packageName , "tag" , tag , "status_code" , resp .StatusCode , "url" , tagURL )
674- return 0 , fmt .Errorf ("tag page returned status %d" , resp .StatusCode )
675- }
676-
677- // Read the response body
678- body , err := io .ReadAll (resp .Body )
679- if err != nil {
680- slog .Error ("Failed to read response body" , "owner" , owner , "package" , packageName , "tag" , tag , "error" , err )
681- return 0 , fmt .Errorf ("failed to read response body: %w" , err )
682- }
683-
684- // Handle gzip decompression if needed
685- if resp .Header .Get ("Content-Encoding" ) == "gzip" {
686- gzReader , err := gzip .NewReader (strings .NewReader (string (body )))
687- if err != nil {
688- slog .Error ("Failed to create gzip reader" , "owner" , owner , "package" , packageName , "tag" , tag , "error" , err )
689- return 0 , fmt .Errorf ("failed to create gzip reader: %w" , err )
690- }
691-
692- defer func () {
693- if closeErr := gzReader .Close (); closeErr != nil {
694- slog .Warn ("Failed to close gzip reader" , "error" , closeErr )
695- }
696- }()
697-
698- // Read the decompressed content
699- decompressedBody , err := io .ReadAll (gzReader )
700- if err != nil {
701- slog .Error ("Failed to read decompressed body" , "owner" , owner , "package" , packageName , "tag" , tag , "error" , err )
702- return 0 , fmt .Errorf ("failed to read decompressed body: %w" , err )
703- }
704-
705- body = decompressedBody
706- }
707-
708- // Parse the HTML document to find download statistics
709- htmlContent := string (body )
710- lines := strings .Split (htmlContent , "\n " )
711-
712- var downloadLine string
713-
714- // Look for download statistics in the tag page
715- for i , line := range lines {
716- if strings .Contains (line , "Total downloads" ) {
717- if i + 1 < len (lines ) {
718- downloadLine = strings .TrimSpace (lines [i + 1 ])
719- slog .Debug ("Found download line after 'Total downloads'" , "line" , downloadLine )
720-
721- break
722- }
723- }
724- }
725-
726- if downloadLine == "" {
727- slog .Debug ("Download statistics not found for tag" , "owner" , owner , "package" , packageName , "tag" , tag )
728- return 0 , fmt .Errorf ("download statistics not found in tag page" )
729- }
730-
731- // Extract the title attribute which contains the full number
732- titleStart := strings .Index (downloadLine , `title="` )
733- if titleStart == - 1 {
734- slog .Debug ("Download count title attribute not found for tag" , "owner" , owner , "package" , packageName , "tag" , tag , "line" , downloadLine )
735- return 0 , fmt .Errorf ("download count title attribute not found" )
736- }
737-
738- titleStart += 7 // Skip 'title="'
739-
740- titleEnd := strings .Index (downloadLine [titleStart :], `"` )
741- if titleEnd == - 1 {
742- slog .Debug ("Download count title attribute malformed for tag" , "owner" , owner , "package" , packageName , "tag" , tag , "line" , downloadLine )
743- return 0 , fmt .Errorf ("download count title attribute malformed" )
744- }
745-
746- title := downloadLine [titleStart : titleStart + titleEnd ]
747-
748- // Parse the download count from the title attribute
749- downloadCount , err := strconv .ParseInt (title , 10 , 64 )
750- if err != nil {
751- slog .Debug ("Failed to parse download count for tag" , "owner" , owner , "package" , packageName , "tag" , tag , "title" , title , "error" , err )
752- return 0 , fmt .Errorf ("failed to parse download count %s: %w" , title , err )
753- }
754-
755- slog .Debug ("Successfully extracted tag download statistics" , "owner" , owner , "package" , packageName , "tag" , tag , "download_count" , downloadCount , "raw_title" , title )
756-
757- return downloadCount , nil
758- }
759-
760584// getOwnerPackages retrieves all packages for a given owner
761585func (gc * GHCRCollector ) getOwnerPackages (ctx context.Context , owner string ) ([]GHCRPackageResponse , error ) {
762586 slog .Info ("Getting packages for owner" , "owner" , owner )
0 commit comments