From dc1382a0141d2d52efeee739e15ee34e4bd83cca Mon Sep 17 00:00:00 2001 From: Aimee Barciauskas Date: Wed, 12 Nov 2025 09:46:06 -0800 Subject: [PATCH 1/3] Add release stats for virtualizarr to reports --- reports/README.md | 42 ++++++++++++++++++++++++++++++++++++++++++ reports/pyproject.toml | 1 + 2 files changed, 43 insertions(+) diff --git a/reports/README.md b/reports/README.md index e730539..5712614 100644 --- a/reports/README.md +++ b/reports/README.md @@ -13,3 +13,45 @@ 3. Add any new repositories to config.py 4. Run `uv run main.py` 5. Run `uv run plot.py` + +## Running a report to get download stats from pypi + +Pypi stats only go back 180 days. After installing the pypistats package you can run a command to get daily downloads: + +```bash +pypistats overall virtualizarr -sd 2025-05-15 -ed 2025-11-11 --daily -f tsv --mirrors without > virtualizarr-report.tsv +``` +And then generate more informative stats using the analyze_downloads.py script: + +```bash + # With default cutoff date (2025-07-21) + python analyze_downloads.py virtualizarr-report.tsv + + # With custom cutoff date + python analyze_downloads.py virtualizarr-report.tsv --cutoff-date 2025-08-15 + + # Show help + python analyze_downloads.py --help + ``` + + Using the previous pypi command as an example, the following is output from the script: + + ```bash +$ python analyze_downloads.py virtualizarr-report.tsv + Download Analysis +============================================================ +Cutoff date: 2025-07-21 + +Before 2025-07-21 (inclusive): + - Number of days: 64 + - Total downloads: 3,822 + - Average daily downloads: 59.72 + +After 2025-07-21: + - Number of days: 113 + - Total downloads: 20,682 + - Average daily downloads: 183.03 + +Change: +206.48% +Absolute difference: +123.31 downloads/day +``` diff --git a/reports/pyproject.toml b/reports/pyproject.toml index 658d52e..09663bf 100644 --- a/reports/pyproject.toml +++ b/reports/pyproject.toml @@ -8,4 +8,5 @@ dependencies = [ "matplotlib>=3.10.3", "pandas>=2.3.0", "pygithub>=2.6.1", + "pypistats>=1.11.0" ] From 70877036693ba3b7d398aa3ad72a53dab59c004b Mon Sep 17 00:00:00 2001 From: Aimee Barciauskas Date: Wed, 12 Nov 2025 09:47:02 -0800 Subject: [PATCH 2/3] Add script and tsv files --- reports/analyze_downloads.py | 106 +++++++++++++++++++ reports/virtualizarr-report.tsv | 182 ++++++++++++++++++++++++++++++++ 2 files changed, 288 insertions(+) create mode 100644 reports/analyze_downloads.py create mode 100644 reports/virtualizarr-report.tsv diff --git a/reports/analyze_downloads.py b/reports/analyze_downloads.py new file mode 100644 index 0000000..26068d0 --- /dev/null +++ b/reports/analyze_downloads.py @@ -0,0 +1,106 @@ +#!/usr/bin/env python3 +""" +Analyze download statistics before and after a given date using a pypistats report output from a command like: +pypistats overall virtualizarr -sd 2025-05-15 -ed 2025-11-11 --daily -f tsv --mirrors without > virtualizarr-report.tsv +""" + +import csv +from datetime import datetime +from pathlib import Path + + +def analyze_downloads(tsv_file: str, cutoff_date: str = "2025-07-21"): + """ + Calculate average daily downloads before and after a cutoff date. + + Args: + tsv_file: Path to the TSV file + cutoff_date: The date to split the data (format: YYYY-MM-DD) + """ + cutoff = datetime.strptime(cutoff_date, "%Y-%m-%d") + + before_downloads = [] + after_downloads = [] + + with open(tsv_file, 'r') as f: + reader = csv.DictReader(f, delimiter='\t') + + for row in reader: + # Skip non-data rows + if row['category'] == 'Total' or not row['date']: + continue + + try: + date = datetime.strptime(row['date'], "%Y-%m-%d") + downloads = int(row['downloads']) + + if date < cutoff: + before_downloads.append(downloads) + elif date > cutoff: + after_downloads.append(downloads) + else: # date == cutoff + # Include cutoff date in "before" period + before_downloads.append(downloads) + + except (ValueError, KeyError) as e: + print(f"Skipping row due to error: {e}") + continue + + # Calculate averages + avg_before = sum(before_downloads) / len(before_downloads) if before_downloads else 0 + avg_after = sum(after_downloads) / len(after_downloads) if after_downloads else 0 + + # Calculate percentage change + if avg_before > 0: + percent_change = ((avg_after - avg_before) / avg_before) * 100 + else: + percent_change = 0 + + # Print results + print(f"Download Analysis") + print(f"=" * 60) + print(f"Cutoff date: {cutoff_date}") + print() + print(f"Before {cutoff_date} (inclusive):") + print(f" - Number of days: {len(before_downloads)}") + print(f" - Total downloads: {sum(before_downloads):,}") + print(f" - Average daily downloads: {avg_before:.2f}") + print() + print(f"After {cutoff_date}:") + print(f" - Number of days: {len(after_downloads)}") + print(f" - Total downloads: {sum(after_downloads):,}") + print(f" - Average daily downloads: {avg_after:.2f}") + print() + print(f"Change: {percent_change:+.2f}%") + print(f"Absolute difference: {avg_after - avg_before:+.2f} downloads/day") + + +if __name__ == "__main__": + import argparse + + parser = argparse.ArgumentParser( + description="Analyze download statistics before and after a given date", + formatter_class=argparse.RawDescriptionHelpFormatter, + epilog=""" +Example: + %(prog)s virtualizarr-report.tsv --cutoff-date 2025-07-21 + """ + ) + parser.add_argument( + "tsv_file", + help="Path to the TSV file containing download statistics" + ) + parser.add_argument( + "--cutoff-date", + default="2025-07-21", + help="Date to split the analysis (format: YYYY-MM-DD, default: 2025-07-21)" + ) + + args = parser.parse_args() + + tsv_path = Path(args.tsv_file) + if not tsv_path.exists(): + print(f"Error: {tsv_path} not found") + exit(1) + + analyze_downloads(str(tsv_path), args.cutoff_date) diff --git a/reports/virtualizarr-report.tsv b/reports/virtualizarr-report.tsv new file mode 100644 index 0000000..0b84718 --- /dev/null +++ b/reports/virtualizarr-report.tsv @@ -0,0 +1,182 @@ +"category" "date" "percent" "downloads" +"without_mirrors" "2025-09-23" "3.85%" 944 +"without_mirrors" "2025-11-10" "3.79%" 929 +"without_mirrors" "2025-10-10" "3.22%" 789 +"without_mirrors" "2025-08-22" "3.08%" 755 +"without_mirrors" "2025-11-07" "3.06%" 750 +"without_mirrors" "2025-11-11" "2.60%" 637 +"without_mirrors" "2025-11-08" "2.38%" 584 +"without_mirrors" "2025-09-02" "2.35%" 577 +"without_mirrors" "2025-07-01" "2.32%" 569 +"without_mirrors" "2025-08-25" "2.23%" 546 +"without_mirrors" "2025-11-06" "2.07%" 508 +"without_mirrors" "2025-11-09" "2.04%" 499 +"without_mirrors" "2025-08-14" "1.93%" 473 +"without_mirrors" "2025-08-27" "1.82%" 446 +"without_mirrors" "2025-08-29" "1.82%" 446 +"without_mirrors" "2025-09-04" "1.64%" 401 +"without_mirrors" "2025-09-16" "1.54%" 378 +"without_mirrors" "2025-10-11" "1.39%" 340 +"without_mirrors" "2025-08-21" "1.30%" 318 +"without_mirrors" "2025-10-27" "1.23%" 301 +"without_mirrors" "2025-09-08" "1.15%" 281 +"without_mirrors" "2025-08-13" "1.12%" 275 +"without_mirrors" "2025-09-25" "1.09%" 268 +"without_mirrors" "2025-10-01" "1.05%" 257 +"without_mirrors" "2025-09-29" "1.04%" 255 +"without_mirrors" "2025-08-23" "1.01%" 248 +"without_mirrors" "2025-09-15" "1.00%" 244 +"without_mirrors" "2025-09-03" "0.96%" 235 +"without_mirrors" "2025-09-24" "0.95%" 232 +"without_mirrors" "2025-08-12" "0.93%" 228 +"without_mirrors" "2025-07-05" "0.87%" 212 +"without_mirrors" "2025-08-04" "0.84%" 207 +"without_mirrors" "2025-11-03" "0.82%" 202 +"without_mirrors" "2025-06-24" "0.82%" 201 +"without_mirrors" "2025-09-22" "0.80%" 195 +"without_mirrors" "2025-07-16" "0.78%" 192 +"without_mirrors" "2025-08-11" "0.78%" 190 +"without_mirrors" "2025-11-04" "0.78%" 190 +"without_mirrors" "2025-08-26" "0.77%" 188 +"without_mirrors" "2025-11-05" "0.77%" 188 +"without_mirrors" "2025-09-27" "0.72%" 176 +"without_mirrors" "2025-10-13" "0.72%" 176 +"without_mirrors" "2025-07-03" "0.71%" 173 +"without_mirrors" "2025-09-05" "0.69%" 170 +"without_mirrors" "2025-06-26" "0.68%" 167 +"without_mirrors" "2025-08-05" "0.68%" 166 +"without_mirrors" "2025-09-10" "0.67%" 165 +"without_mirrors" "2025-09-30" "0.66%" 162 +"without_mirrors" "2025-10-28" "0.65%" 160 +"without_mirrors" "2025-08-19" "0.64%" 157 +"without_mirrors" "2025-07-22" "0.64%" 156 +"without_mirrors" "2025-07-08" "0.63%" 155 +"without_mirrors" "2025-10-31" "0.58%" 141 +"without_mirrors" "2025-08-08" "0.57%" 140 +"without_mirrors" "2025-09-12" "0.57%" 140 +"without_mirrors" "2025-09-06" "0.56%" 137 +"without_mirrors" "2025-10-05" "0.56%" 136 +"without_mirrors" "2025-10-06" "0.51%" 124 +"without_mirrors" "2025-09-11" "0.50%" 123 +"without_mirrors" "2025-07-30" "0.50%" 122 +"without_mirrors" "2025-09-26" "0.49%" 121 +"without_mirrors" "2025-10-23" "0.47%" 114 +"without_mirrors" "2025-07-15" "0.44%" 107 +"without_mirrors" "2025-08-01" "0.43%" 106 +"without_mirrors" "2025-08-28" "0.43%" 106 +"without_mirrors" "2025-08-15" "0.42%" 102 +"without_mirrors" "2025-10-21" "0.41%" 100 +"without_mirrors" "2025-09-07" "0.40%" 99 +"without_mirrors" "2025-10-29" "0.40%" 99 +"without_mirrors" "2025-10-20" "0.40%" 97 +"without_mirrors" "2025-10-09" "0.39%" 96 +"without_mirrors" "2025-10-14" "0.39%" 96 +"without_mirrors" "2025-07-23" "0.39%" 95 +"without_mirrors" "2025-10-03" "0.39%" 95 +"without_mirrors" "2025-05-16" "0.38%" 94 +"without_mirrors" "2025-09-17" "0.38%" 92 +"without_mirrors" "2025-07-14" "0.37%" 91 +"without_mirrors" "2025-07-31" "0.37%" 91 +"without_mirrors" "2025-08-06" "0.37%" 90 +"without_mirrors" "2025-07-28" "0.36%" 89 +"without_mirrors" "2025-05-20" "0.35%" 86 +"without_mirrors" "2025-07-21" "0.35%" 86 +"without_mirrors" "2025-07-24" "0.34%" 84 +"without_mirrors" "2025-09-09" "0.34%" 84 +"without_mirrors" "2025-05-27" "0.33%" 81 +"without_mirrors" "2025-06-02" "0.33%" 81 +"without_mirrors" "2025-09-18" "0.33%" 81 +"without_mirrors" "2025-10-04" "0.33%" 81 +"without_mirrors" "2025-06-25" "0.33%" 80 +"without_mirrors" "2025-10-15" "0.32%" 79 +"without_mirrors" "2025-08-07" "0.29%" 72 +"without_mirrors" "2025-10-07" "0.29%" 72 +"without_mirrors" "2025-10-24" "0.29%" 72 +"without_mirrors" "2025-10-22" "0.29%" 71 +"without_mirrors" "2025-05-17" "0.29%" 70 +"without_mirrors" "2025-10-02" "0.29%" 70 +"without_mirrors" "2025-05-23" "0.28%" 69 +"without_mirrors" "2025-05-28" "0.28%" 68 +"without_mirrors" "2025-08-20" "0.28%" 68 +"without_mirrors" "2025-06-11" "0.27%" 66 +"without_mirrors" "2025-10-30" "0.27%" 66 +"without_mirrors" "2025-05-30" "0.27%" 65 +"without_mirrors" "2025-08-24" "0.27%" 65 +"without_mirrors" "2025-05-21" "0.26%" 64 +"without_mirrors" "2025-08-18" "0.26%" 63 +"without_mirrors" "2025-06-30" "0.24%" 59 +"without_mirrors" "2025-10-16" "0.24%" 58 +"without_mirrors" "2025-07-10" "0.23%" 57 +"without_mirrors" "2025-09-01" "0.23%" 57 +"without_mirrors" "2025-10-12" "0.23%" 57 +"without_mirrors" "2025-05-29" "0.22%" 55 +"without_mirrors" "2025-06-10" "0.22%" 55 +"without_mirrors" "2025-09-19" "0.22%" 55 +"without_mirrors" "2025-11-01" "0.22%" 55 +"without_mirrors" "2025-07-02" "0.22%" 54 +"without_mirrors" "2025-09-28" "0.22%" 54 +"without_mirrors" "2025-10-17" "0.22%" 54 +"without_mirrors" "2025-07-07" "0.21%" 52 +"without_mirrors" "2025-07-18" "0.21%" 51 +"without_mirrors" "2025-06-23" "0.18%" 45 +"without_mirrors" "2025-07-29" "0.18%" 43 +"without_mirrors" "2025-05-15" "0.17%" 42 +"without_mirrors" "2025-09-20" "0.17%" 42 +"without_mirrors" "2025-06-12" "0.16%" 39 +"without_mirrors" "2025-06-14" "0.16%" 38 +"without_mirrors" "2025-07-11" "0.14%" 35 +"without_mirrors" "2025-08-09" "0.14%" 35 +"without_mirrors" "2025-08-16" "0.14%" 35 +"without_mirrors" "2025-09-14" "0.14%" 35 +"without_mirrors" "2025-09-21" "0.14%" 35 +"without_mirrors" "2025-06-16" "0.14%" 34 +"without_mirrors" "2025-07-25" "0.13%" 33 +"without_mirrors" "2025-10-08" "0.12%" 30 +"without_mirrors" "2025-10-26" "0.12%" 29 +"without_mirrors" "2025-07-17" "0.11%" 26 +"without_mirrors" "2025-06-17" "0.10%" 25 +"without_mirrors" "2025-05-22" "0.10%" 24 +"without_mirrors" "2025-05-24" "0.10%" 24 +"without_mirrors" "2025-06-20" "0.10%" 24 +"without_mirrors" "2025-07-13" "0.10%" 24 +"without_mirrors" "2025-07-09" "0.09%" 22 +"without_mirrors" "2025-10-25" "0.09%" 22 +"without_mirrors" "2025-05-19" "0.09%" 21 +"without_mirrors" "2025-06-05" "0.09%" 21 +"without_mirrors" "2025-10-19" "0.09%" 21 +"without_mirrors" "2025-08-17" "0.08%" 20 +"without_mirrors" "2025-08-31" "0.08%" 20 +"without_mirrors" "2025-05-25" "0.08%" 19 +"without_mirrors" "2025-06-06" "0.07%" 18 +"without_mirrors" "2025-09-13" "0.07%" 18 +"without_mirrors" "2025-10-18" "0.07%" 18 +"without_mirrors" "2025-06-01" "0.07%" 16 +"without_mirrors" "2025-06-28" "0.07%" 16 +"without_mirrors" "2025-08-10" "0.07%" 16 +"without_mirrors" "2025-11-02" "0.07%" 16 +"without_mirrors" "2025-06-18" "0.06%" 15 +"without_mirrors" "2025-08-03" "0.06%" 15 +"without_mirrors" "2025-08-30" "0.06%" 15 +"without_mirrors" "2025-06-04" "0.06%" 14 +"without_mirrors" "2025-06-13" "0.05%" 13 +"without_mirrors" "2025-06-09" "0.05%" 12 +"without_mirrors" "2025-06-21" "0.05%" 12 +"without_mirrors" "2025-05-26" "0.04%" 11 +"without_mirrors" "2025-06-03" "0.04%" 11 +"without_mirrors" "2025-07-19" "0.04%" 11 +"without_mirrors" "2025-07-12" "0.04%" 10 +"without_mirrors" "2025-07-27" "0.04%" 9 +"without_mirrors" "2025-06-27" "0.03%" 8 +"without_mirrors" "2025-07-04" "0.03%" 7 +"without_mirrors" "2025-07-20" "0.03%" 7 +"without_mirrors" "2025-07-26" "0.03%" 7 +"without_mirrors" "2025-08-02" "0.03%" 7 +"without_mirrors" "2025-06-07" "0.02%" 6 +"without_mirrors" "2025-06-29" "0.02%" 4 +"without_mirrors" "2025-06-08" "0.01%" 3 +"without_mirrors" "2025-06-22" "0.01%" 3 +"without_mirrors" "2025-06-15" "0.01%" 2 +"Total" 24,504 + +Date range: 2025-05-15 - 2025-11-11 + From 63fc2fc15578bdb9231d5e512cd8b9a93059d5c9 Mon Sep 17 00:00:00 2001 From: Aimee Barciauskas Date: Wed, 12 Nov 2025 09:48:14 -0800 Subject: [PATCH 3/3] Fix readme --- reports/README.md | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) diff --git a/reports/README.md b/reports/README.md index 5712614..80f61fb 100644 --- a/reports/README.md +++ b/reports/README.md @@ -14,7 +14,7 @@ 4. Run `uv run main.py` 5. Run `uv run plot.py` -## Running a report to get download stats from pypi +## Running a report on pypi package download stats Pypi stats only go back 180 days. After installing the pypistats package you can run a command to get daily downloads: @@ -24,19 +24,19 @@ pypistats overall virtualizarr -sd 2025-05-15 -ed 2025-11-11 --daily -f tsv --mi And then generate more informative stats using the analyze_downloads.py script: ```bash - # With default cutoff date (2025-07-21) - python analyze_downloads.py virtualizarr-report.tsv +# With default cutoff date (2025-07-21) +python analyze_downloads.py virtualizarr-report.tsv - # With custom cutoff date - python analyze_downloads.py virtualizarr-report.tsv --cutoff-date 2025-08-15 +# With custom cutoff date +python analyze_downloads.py virtualizarr-report.tsv --cutoff-date 2025-08-15 - # Show help - python analyze_downloads.py --help - ``` +# Show help +python analyze_downloads.py --help +``` - Using the previous pypi command as an example, the following is output from the script: +Using the previous pypi command as an example, the following is output from the script: - ```bash +```bash $ python analyze_downloads.py virtualizarr-report.tsv Download Analysis ============================================================