diff --git a/.config b/.config
new file mode 100644
index 0000000..1e28817
--- /dev/null
+++ b/.config
@@ -0,0 +1,3 @@
+[DEFAULT]
+REPO_FILE_PATH=repository_list.tsv
+DEBUG=False
diff --git a/.editorconfig b/.editorconfig
new file mode 100644
index 0000000..814d7b2
--- /dev/null
+++ b/.editorconfig
@@ -0,0 +1,24 @@
+# http://editorconfig.org
+
+root = true
+
+[*]
+indent_style = space
+indent_size = 4
+trim_trailing_whitespace = true
+insert_final_newline = true
+charset = utf-8
+end_of_line = lf
+
+[*.bat]
+indent_style = tab
+end_of_line = crlf
+
+[LICENSE]
+insert_final_newline = false
+
+[Makefile]
+indent_style = tab
+
+[*.{yml, yaml}]
+indent_size = 2
diff --git a/.github/workflows/actions.yml b/.github/workflows/actions.yml
new file mode 100644
index 0000000..95db7c5
--- /dev/null
+++ b/.github/workflows/actions.yml
@@ -0,0 +1,34 @@
+name: specdatri reporting
+on:
+ schedule:
+ - cron: 0 0 * * 1 # At 00:00 on Monday
+jobs:
+ build:
+ runs-on: ubuntu-22.04
+ steps:
+ - name: checkout repo content
+ uses: actions/checkout@v4
+ - name: setup python
+ uses: actions/setup-python@v5
+ with:
+ python-version: '3.12'
+ - name: install python packages
+ run: |
+ python -m pip install --upgrade pip
+ pip install -r requirements.txt
+ - name: execute py script
+ env:
+ github_token: '${{ secrets.github_token }}'
+ pepy_x_api_key: '${{ secrets.pepy_x_api_key }}'
+ run: python main.py
+ - name: commit files
+ run: |
+ git config --local user.email "action@github.com"
+ git config --local user.name "GitHub Action"
+ git add -A
+ git diff-index --quiet HEAD || (git commit -a -m "updated files" --allow-empty)
+ - name: push changes
+ uses: ad-m/github-push-action@v0.8.0
+ with:
+ github_token: '${{ secrets.github_token }}'
+ branch: main
diff --git a/.github/workflows/codeql-analysis.yml b/.github/workflows/codeql-analysis.yml
new file mode 100644
index 0000000..576efcd
--- /dev/null
+++ b/.github/workflows/codeql-analysis.yml
@@ -0,0 +1,25 @@
+name: "Code Scanning with CodeQL"
+
+on:
+ push:
+ branches: [ main ]
+ pull_request:
+ branches: [ main ]
+ schedule:
+ - cron: '40 17 * * 3'
+
+jobs:
+ analyze:
+ name: Analyze
+ runs-on: ubuntu-22.04
+ permissions:
+ security-events: write
+ steps:
+ - name: Checkout
+ uses: actions/checkout@v4
+ - name: Initialize
+ uses: github/codeql-action/init@v3
+ with:
+ languages: python
+ - name: Perform CodeQL Analysis
+ uses: github/codeql-action/analyze@v3
diff --git a/.pre-commit-config.yaml b/.pre-commit-config.yaml
new file mode 100644
index 0000000..c5b3d53
--- /dev/null
+++ b/.pre-commit-config.yaml
@@ -0,0 +1,35 @@
+exclude: '.*\.tsv$'
+default_stages: [pre-commit]
+
+default_language_version:
+ python: python3.12
+
+repos:
+ - repo: https://github.com/pre-commit/pre-commit-hooks
+ rev: v5.0.0
+ hooks:
+ - id: trailing-whitespace
+ - id: end-of-file-fixer
+ - id: check-json
+ - id: check-toml
+ - id: check-xml
+ - id: check-yaml
+ - id: debug-statements
+ - id: check-builtin-literals
+ - id: check-case-conflict
+ - id: check-docstring-first
+ - id: detect-private-key
+
+ # Run the Ruff linter.
+ - repo: https://github.com/astral-sh/ruff-pre-commit
+ rev: v0.8.3
+ hooks:
+ # Linter
+ - id: ruff
+ args: [--fix, --exit-non-zero-on-fix]
+
+# sets up .pre-commit-ci.yaml to ensure pre-commit dependencies stay up to date
+ci:
+ autoupdate_schedule: weekly
+ skip: []
+ submodules: false
diff --git a/README.md b/README.md
index 3255161..751293b 100644
--- a/README.md
+++ b/README.md
@@ -1,2 +1,44 @@
# specdatri_reporting
The code base includes a collection of scripts and GitHub Actions designed to gather various metrics on RECETOX's impact.
+
+## Local development
+
+### Project setup
+It is assumed you can clone and change directories into the development repo.
+Create a virtualenv or conda environment (whatever your poison).
+
+Once in the repos directory, activate your env then run the following command to install the needed python libraries.
+
+> pip install -r .\requirements\local.txt
+
+### Simulating Github Actions
+
+You need [act](https://nektosact.com/) to test your code in development mode.
+Install act for your chosen OS.
+At your terminal, run (This simulates a GitHub action on your local device):
+
+> act --secret-file .env schedule
+
+### Things to note
+
+1: Do not push local development changes from `tmp` folder and `reports` folder. In fact do not edit them at all !!!
+
+2: When testing with `act` do not use a token that has the permission to make push requests else your test data will mess with "production" data.
+
+3: When testing with `act` know that the push may fail due to the fact that you can't directly push to main.
+
+4: Always, I repeat always devlop on another branch not main and never push directly to main.
+
+5: You need tokens to test the code locally, place said tokens in `example.env` and change the filename to `.env`
+
+### Running tests:
+
+#### Running with unittest
+> python -m unittest discover -s tests
+
+#### Running with coverage
+> coverage run -m unittest discover -s tests
+
+> coverage report -m
+
+> coverage html
diff --git a/example.env b/example.env
new file mode 100644
index 0000000..63adf08
--- /dev/null
+++ b/example.env
@@ -0,0 +1,2 @@
+github_token="your_github_token"
+pepy_x_api_key="your_pepy_x_api_key"
diff --git a/main.py b/main.py
new file mode 100644
index 0000000..f384f58
--- /dev/null
+++ b/main.py
@@ -0,0 +1,72 @@
+import pandas as pd
+from pandas import DataFrame
+
+from src.github import process_github_repositories
+from src.pypi import process_pypi_repositories
+from src.utils import get_config_var, get_env_var, log_function, setup_logger
+
+logger = setup_logger()
+
+
+@log_function(logger)
+def load_repositories(
+ file_path: str,
+) -> DataFrame:
+ """
+ Reads a list of repositories from a TSV file and returns it as a DataFrame.
+
+ :param file_path: Path to the TSV file containing the list of repositories.
+ :return: DataFrame containing the list of repositories.
+ """
+ return pd.read_csv(file_path, sep="\t")
+
+
+@log_function(logger)
+def process_repositories(
+ repositories_df: DataFrame,
+ github_token: str,
+ pepy_x_api_key: str,
+):
+ """
+ Args:
+ repositories_df (DataFrame): DataFrame containing the list of repositories.
+ github_token (str): GitHub token to access the GitHub API.
+ Returns:
+ None
+ """
+ for _, row in repositories_df.iterrows():
+ source = row["source"].lower()
+ repository = row["repository"]
+ action = row["action"]
+ project = row["project"]
+ package = row["package"]
+ if source == "github":
+ owner, repo = repository.split("/")
+ process_github_repositories(
+ owner, repo, github_token, action, project, package
+ )
+ elif source == "pypi":
+ process_pypi_repositories(
+ package, pepy_x_api_key, action, project
+ )
+ else:
+ logger.error(f"Unknown source: {source}")
+
+
+@log_function(logger)
+def main():
+ repo_file_path = get_config_var("DEFAULT", "REPO_FILE_PATH")
+ if repo_file_path:
+ logger.info("REPO_FILE_PATH found in .config file")
+ repositories_df = load_repositories(repo_file_path)
+ github_token = get_env_var("github_token")
+ pepy_x_api_key = get_env_var("pepy_x_api_key")
+ process_repositories(repositories_df, github_token, pepy_x_api_key)
+ logger.debug(f"Repositories DataFrame: \n{repositories_df}")
+ else:
+ logger.error("REPO_FILE_PATH not found in .config file")
+ print("REPO_FILE_PATH not found in .config file")
+
+
+if __name__ == "__main__":
+ main()
diff --git a/reports/.gitkeep b/reports/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/table.tsv b/repository_list.tsv
similarity index 100%
rename from table.tsv
rename to repository_list.tsv
diff --git a/requirements.txt b/requirements.txt
new file mode 100644
index 0000000..5603c37
--- /dev/null
+++ b/requirements.txt
@@ -0,0 +1 @@
+-r requirements/base.txt
diff --git a/requirements/base.txt b/requirements/base.txt
new file mode 100644
index 0000000..6f62de3
--- /dev/null
+++ b/requirements/base.txt
@@ -0,0 +1,4 @@
+pandas==2.2
+orjson==3.10.15
+requests==2.32.3
+python_dotenv==1.0.1
diff --git a/requirements/local.txt b/requirements/local.txt
new file mode 100644
index 0000000..d2e9e2a
--- /dev/null
+++ b/requirements/local.txt
@@ -0,0 +1,3 @@
+-r base.txt
+# Used in development
+pre-commit==4.1.0
diff --git a/src/github.py b/src/github.py
new file mode 100644
index 0000000..fcd407c
--- /dev/null
+++ b/src/github.py
@@ -0,0 +1,102 @@
+import requests
+from src.reports import write_make_request_response
+
+from .utils import (
+ log_function,
+ make_api_request,
+ setup_logger,
+)
+
+logger = setup_logger()
+
+
+def _get_headers(github_token: str) -> dict:
+ return {
+ "Accept": "application/vnd.github.v3+json",
+ "X-GitHub-Api-Version": "2022-11-28",
+ "Authorization": f"Bearer {github_token}",
+ }
+
+
+@log_function(logger)
+def get_clone_stats(
+ owner: str,
+ repo: str,
+ github_token: str,
+) -> requests.Response:
+ """
+ Fetches the clone statistics for a given GitHub repository.
+
+ Args:
+ owner (str): The owner of the repository.
+ repo (str): The name of the repository.
+ github_token (str): The GitHub token.
+
+ Returns:
+ dict: A dictionary containing the clone statistics.
+ """
+
+ url = f"https://api.github.com/repos/{owner}/{repo}/traffic/clones"
+ headers = _get_headers(github_token)
+ response = make_api_request(http_method="GET", url=url, headers=headers)
+ return response
+
+
+@log_function(logger)
+def get_repo_views(
+ owner: str,
+ repo: str,
+ github_token: str,
+) -> requests.Response:
+ """
+ Fetches the view statistics for a given GitHub repository.
+
+ Args:
+ owner (str): The owner of the repository.
+ repo (str): The name of the repository.
+ github_token (str): The GitHub token.
+
+ Returns:
+ dict: A dictionary containing the view statistics.
+ """
+
+ url = f"https://api.github.com/repos/{owner}/{repo}/traffic/views"
+ headers = _get_headers(github_token)
+ response = make_api_request(http_method="GET", url=url, headers=headers)
+ return response
+
+
+@log_function(logger)
+def process_github_repositories(
+ owner: str,
+ repo: str,
+ github_token: str,
+ action: str,
+ project: str,
+ package: str,
+):
+ """
+ Processes the specified GitHub repository to fetch clone and view statistics.
+
+ Args:
+ owner (str): The owner of the GitHub repository.
+ repo (str): The name of the GitHub repository.
+ github_token (str): The GitHub token to access the GitHub API.
+ action (str): The action to be performed on the repository.
+ project (str): The project name.
+ package (str): The specific package name.
+
+ Returns:
+ None
+
+ Logs:
+ Logs the clone and view statistics for the specified repository.
+ """
+ if action == "clones":
+ clone_stats = get_clone_stats(owner, repo, github_token)
+ write_make_request_response(clone_stats, project, package, "github", "clones")
+ elif action == "views":
+ view_stats = get_repo_views(owner, repo, github_token)
+ write_make_request_response(view_stats, project, package, "github", "views")
+ else:
+ logger.error(f"Invalid action: {action}")
diff --git a/src/pypi.py b/src/pypi.py
new file mode 100644
index 0000000..1d2e0b7
--- /dev/null
+++ b/src/pypi.py
@@ -0,0 +1,53 @@
+import requests
+from .utils import (
+ log_function,
+ make_api_request,
+ setup_logger,
+)
+from .reports import write_make_request_response
+
+logger = setup_logger()
+
+
+@log_function(logger)
+def get_pypi_downloads(
+ package_name: str,
+ pepy_x_api_key: str,
+)->requests.Response:
+ """
+ Fetches the download statistics for a given PyPI package.
+
+ Returns:
+ dict: A dictionary containing the download statistics.
+ """
+
+ url = f"https://api.pepy.tech/api/v2/projects/{package_name}"
+ headers = {"X-API-Key": pepy_x_api_key}
+ response = make_api_request(http_method="GET", url=url, headers=headers)
+ return response
+
+
+@log_function(logger)
+def process_pypi_repositories(
+ package: str,
+ pepy_x_api_key: str,
+ action: str,
+ project: str,
+):
+ """
+ Fetches the download statistics for a given PyPI package.
+
+ Args:
+ package (str): The name of the PyPI package.
+ pepy_x_api_key (str): The PyPI API key.
+ action (str): The action to perform.
+ project (str): The name of the project.
+
+ Returns:
+ None
+ """
+ if action == "downloads":
+ downloads = get_pypi_downloads(package, pepy_x_api_key)
+ write_make_request_response(downloads, project, package, "pypi", action)
+ else:
+ logger.error(f"Unknown action: {action}")
diff --git a/src/reports.py b/src/reports.py
new file mode 100644
index 0000000..e50f2ea
--- /dev/null
+++ b/src/reports.py
@@ -0,0 +1,55 @@
+import orjson
+import requests
+
+from .utils import log_function, setup_logger, prep_filename, write_prep_filename_metadata, get_failed_response_json
+
+logger = setup_logger()
+
+
+@log_function(logger)
+def write_json(data, filename):
+ """
+ Serializes the given data to JSON and writes it to the specified file.
+
+ Args:
+ data (Any): The data to serialize.
+ filename (str): The name of the file to write the JSON data to.
+ """
+ with open(filename, "wb") as f: # Open the file in binary write mode
+ f.write(
+ orjson.dumps(data, option=orjson.OPT_INDENT_2)
+ ) # Serialize the data and write it to the file
+
+
+@log_function(logger)
+def write_make_request_response(
+ response: requests.Response,
+ project: str,
+ package: str,
+ source: str,
+ action: str,
+):
+ """
+ Processes the response from a make_api_request call and writes the data to a file.
+
+ Args:
+ response (requests.Response): The response from the API request.
+ filename (str): The filename to write the data to.
+ project (str): The project name.
+ package (str): The package name.
+ source (str): The source of the data (e.g., "github").
+ action (str): The action performed (e.g., "clones" or "views").
+ """
+ try:
+ data = response.json()
+ filename = prep_filename("tmp", project, package, source, action)
+ write_json(data, filename)
+ write_prep_filename_metadata(project, package, source, action, filename)
+ except Exception:
+ logger.error(
+ f"Failed to fetch {action} status_code: {response.status_code} {response.text}"
+ )
+ failed_response = get_failed_response_json(response)
+ filename = prep_filename("tmp", project, package, source, action)
+ write_json(failed_response, filename)
+ write_prep_filename_metadata(project, package, source, action, filename)
diff --git a/src/utils.py b/src/utils.py
new file mode 100644
index 0000000..01475c4
--- /dev/null
+++ b/src/utils.py
@@ -0,0 +1,263 @@
+import configparser
+import logging
+import os
+import re
+from datetime import datetime
+from functools import wraps
+
+import orjson
+import requests
+from dotenv import load_dotenv
+from requests.adapters import HTTPAdapter
+from urllib3.util.retry import Retry
+
+# Load .config file from the parent directory
+config = configparser.ConfigParser()
+config.read(os.path.join(os.path.dirname(__file__), "..", ".config"))
+
+# Load .env file from the parent directory
+load_dotenv(os.path.join(os.path.dirname(__file__), "..", ".env"))
+
+
+def get_env_var(var_name: str, default: str = None) -> str:
+ """
+ Args:
+ var_name (str): The name of the environment variable.
+ default (Any): The default value to return if the environment variable does not exist.
+
+ Returns:
+ Any: The value of the environment variable or the default value.
+ """
+ return os.getenv(var_name, default)
+
+
+def get_config_var(section: str, var_name: str, default: str = None) -> str:
+ """
+ Args:
+ section (str): The section of the configuration file.
+ var_name (str): The name of the environment variable.
+ default (Any): The default value to return if the environment variable does not exist.
+
+ Returns:
+ Any: The value of the environment variable or the default value.
+ """
+ return config.get(section, var_name, fallback=default)
+
+
+def get_logger(name: str = "spec-logger", level: int = logging.INFO):
+ """
+ Returns a logger instance with the given name and log level.
+
+ Args:
+ name (str): The name of the logger.
+ level (int): The logging level (e.g., logging.INFO, logging.DEBUG).
+
+ Returns:
+ logging.Logger: Configured logger instance.
+
+ """
+ logger = logging.getLogger(name)
+ logger.setLevel(level)
+
+ # create console handler with the same log level
+ ch = logging.StreamHandler()
+ ch.setLevel(level)
+
+ # create formatter and add it to the handlers
+ formatter = logging.Formatter(
+ "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
+ )
+ ch.setFormatter(formatter)
+ logger.addHandler(ch)
+ return logger
+
+
+def log_function(logger: logging.Logger, obfuscate_keywords=None):
+ """
+ A decorator that logs the function name, arguments, return value, and exceptions.
+ Arguments and keyword arguments containing specified keywords are obfuscated to avoid logging sensitive information.
+
+ Args:
+ logger (logging.Logger): The logger instance to use for logging.
+ obfuscate_keywords (list): List of keywords to check for obfuscation. Defaults to ["token", "key"].
+ """
+ if obfuscate_keywords is None:
+ obfuscate_keywords = ["token", "key"]
+
+ def decorator(func):
+ @wraps(func)
+ def wrapper(*args, **kwargs):
+ arg_names = func.__code__.co_varnames[:func.__code__.co_argcount]
+ obfuscated_args = [
+ "***" if any(keyword in name for keyword in obfuscate_keywords) else value
+ for name, value in zip(arg_names, args)
+ ]
+ obfuscated_kwargs = {
+ k: "***" if any(keyword in k for keyword in obfuscate_keywords) else v
+ for k, v in kwargs.items()
+ }
+ logger.info(
+ f"Calling function '{func.__name__}' with arguments {obfuscated_args} and keyword arguments {obfuscated_kwargs}"
+ )
+ try:
+ result = func(*args, **kwargs)
+ logger.info(f"Function '{func.__name__}' returned {result}")
+ return result
+ except Exception as e:
+ logger.error(f"Function '{func.__name__}' raised an exception: {e}")
+ raise
+
+ return wrapper
+
+ return decorator
+
+
+def get_failed_response(
+ error_message="Some kind of API error ccured while interacting with the given URL.",
+) -> requests.Response:
+ failed_response = requests.Response()
+ failed_response.status_code = 500
+ failed_response.reason = error_message
+ failed_response._content = orjson.dumps({"message": f"{error_message}"})
+ return failed_response
+
+
+def make_api_request(
+ url: str,
+ http_method: str = "GET",
+ headers: dict = {},
+ data: dict = {},
+ auth: tuple = (),
+ cookies: dict = {},
+ params: dict = {},
+) -> requests.Response:
+ """Makes an API request to the given url with the given parameters."""
+ if not all(headers.values()):
+ return get_failed_response()
+ s = requests.Session()
+ retries = Retry(
+ total=10,
+ backoff_factor=0.1,
+ status_forcelist=[408, 429, 500, 502, 503, 504],
+ )
+ s.mount("https://", HTTPAdapter(max_retries=retries))
+ s.mount("http://", HTTPAdapter(max_retries=retries))
+
+ try:
+ req = requests.Request(
+ http_method,
+ url,
+ data=data,
+ headers=headers,
+ auth=auth,
+ cookies=cookies,
+ params=params,
+ )
+ prepped = req.prepare()
+ resp = s.send(prepped)
+ return resp
+ except Exception as e:
+ get_logger().error("Connection error while fetching data {}".format(e))
+ return get_failed_response()
+
+
+def setup_logger() -> logging.Logger:
+ """
+ Sets up and returns a logger instance based on the configuration.
+
+ Returns:
+ logging.Logger: Configured logger instance.
+ """
+ debug_mode = get_config_var("DEFAULT", "DEBUG", "False").lower() == "true"
+ log_level = logging.DEBUG if debug_mode else logging.INFO
+ return get_logger(level=log_level)
+
+
+def sanitize_filename_component(component: str) -> str:
+ """
+ Sanitizes a filename component by replacing spaces and special characters with underscores.
+
+ Args:
+ component (str): The filename component to sanitize.
+
+ Returns:
+ str: The sanitized filename component.
+ """
+ # Replace spaces and special characters with underscores
+ return re.sub(r"[^\w\-]", "_", component)
+
+
+def write_prep_filename_metadata(
+ project: str, package: str, source: str, action: str, filename: str
+):
+ """
+ Writes metadata about the prepared filename to a metadata file.
+
+ Args:
+ project (str): The project name.
+ package (str): The package name.
+ source (str): The source of the data (e.g., "github").
+ action (str): The action performed (e.g., "clones" or "views").
+ filename (str): The prepared filename.
+ """
+ metadata = {
+ "project": project,
+ "package": package,
+ "source": source,
+ "action": action,
+ "filename": filename,
+ }
+ base_filename = os.path.splitext(filename)[0]
+ metadata_filename = f"{base_filename}.metadata.json"
+ with open(metadata_filename, "wb") as f:
+ f.write(
+ orjson.dumps(metadata, option=orjson.OPT_INDENT_2)
+ ) # Serialize the data and write it to the file
+
+
+def prep_filename(
+ folder: str,
+ project: str,
+ package: str,
+ source: str,
+ action: str,
+ extension: str = "json",
+) -> str:
+ """
+ Prepares a filename based on the given parameters.
+
+ Args:
+ folder (str): The folder where the file will be saved.
+ project (str): The project name.
+ package (str): The package name.
+ source (str): The source of the data (e.g., "github").
+ action (str): The action performed (e.g., "clones" or "views").
+
+ Returns:
+ str: The prepared filename.
+ """
+ now = datetime.now()
+ date_part = now.strftime("%Y-%m-%d_%H-%M-%S")
+ project = sanitize_filename_component(project)
+ package = sanitize_filename_component(package)
+ source = sanitize_filename_component(source)
+ action = sanitize_filename_component(action)
+ seperator = "__"
+ return f"{folder}/{project}{seperator}{package}{seperator}{source}{seperator}{action}{seperator}{date_part}.{extension}"
+
+
+def get_failed_response_json(response: requests.Response) -> dict:
+ """
+ Extracts and formats the failure details from a given HTTP response.
+
+ Args:
+ response (requests.Response): The HTTP response object.
+
+ Returns:
+ dict: A dictionary containing the status code, failure message, and the full response text.
+ """
+ return {
+ "status": response.status_code,
+ "message": response.json().get("message", "Request failed"),
+ "response": response.text,
+ }
diff --git a/tests/test_github.py b/tests/test_github.py
new file mode 100644
index 0000000..029b3a2
--- /dev/null
+++ b/tests/test_github.py
@@ -0,0 +1,63 @@
+import unittest
+from unittest.mock import MagicMock, patch
+from src.github import get_clone_stats, get_repo_views
+
+class TestGitHubAPI(unittest.TestCase):
+
+ @patch("src.github.make_api_request")
+ def test_get_clone_stats_success(self, mock_make_api_request):
+ success_response = {
+ "count": 3,
+ "uniques": 3,
+ "clones": [
+ {"timestamp": "2025-01-31T00:00:00Z", "count": 2, "uniques": 2},
+ {"timestamp": "2025-02-03T00:00:00Z", "count": 1, "uniques": 1},
+ ],
+ }
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.json.return_value = success_response
+ mock_make_api_request.return_value = mock_response
+
+ result = get_clone_stats("owner", "repo", "fake_token")
+ self.assertEqual(result.status_code, 200)
+ self.assertEqual(result.json(), success_response)
+
+ @patch("src.github.make_api_request")
+ def test_get_clone_stats_failure(self, mock_make_api_request):
+ failure_response = {"message": "Not Found", "documentation_url": "https://docs.github.com/rest/metrics/traffic#get-repository-clones", "status": "404"}
+ mock_response = MagicMock()
+ mock_response.status_code = 404
+ mock_response.json.return_value = failure_response
+ mock_make_api_request.return_value = mock_response
+
+ result = get_clone_stats("owner", "repo", "fake_token")
+ self.assertEqual(result.status_code, 404)
+ self.assertEqual(result.json(), failure_response)
+
+ @patch("src.github.make_api_request")
+ def test_get_repo_views_success(self, mock_make_api_request):
+ success_response = {"count": 20, "uniques": 10}
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ mock_response.json.return_value = success_response
+ mock_make_api_request.return_value = mock_response
+
+ result = get_repo_views("owner", "repo", "fake_token")
+ self.assertEqual(result.status_code, 200)
+ self.assertEqual(result.json(), success_response)
+
+ @patch("src.github.make_api_request")
+ def test_get_repo_views_failure(self, mock_make_api_request):
+ failure_response = {"message": "Not Found", "documentation_url": "https://docs.github.com/rest/metrics/traffic#get-repository-views", "status": "404"}
+ mock_response = MagicMock()
+ mock_response.status_code = 404
+ mock_response.json.return_value = failure_response
+ mock_make_api_request.return_value = mock_response
+
+ result = get_repo_views("owner", "repo", "fake_token")
+ self.assertEqual(result.status_code, 404)
+ self.assertEqual(result.json(), failure_response)
+
+if __name__ == '__main__':
+ unittest.main()
diff --git a/tests/test_report.py b/tests/test_report.py
new file mode 100644
index 0000000..5899d9f
--- /dev/null
+++ b/tests/test_report.py
@@ -0,0 +1,35 @@
+import unittest
+from unittest.mock import mock_open, patch
+
+import orjson
+
+from src.reports import write_json
+
+
+class TestReports(unittest.TestCase):
+
+ @patch("builtins.open", new_callable=mock_open)
+ @patch("src.reports.orjson.dumps")
+ def test_write_json(self, mock_dumps, mock_open):
+ # Mock the return value of orjson.dumps
+ mock_dumps.return_value = b'{"key": "value"}'
+
+ # Data to be serialized
+ data = {"key": "value"}
+ filename = "test.json"
+
+ # Call the function
+ write_json(data, filename)
+
+ # Assert that orjson.dumps was called with the correct data and options
+ mock_dumps.assert_called_once_with(data, option=orjson.OPT_INDENT_2)
+
+ # Assert that the file was opened in binary write mode
+ mock_open.assert_called_once_with(filename, "wb")
+
+ # Assert that the data was written to the file
+ mock_open().write.assert_called_once_with(b'{"key": "value"}')
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tests/test_utils.py b/tests/test_utils.py
new file mode 100644
index 0000000..ed2e0db
--- /dev/null
+++ b/tests/test_utils.py
@@ -0,0 +1,262 @@
+import logging
+import unittest
+from unittest.mock import MagicMock, mock_open, patch
+
+import orjson
+
+from src.utils import (
+ get_config_var,
+ get_env_var,
+ get_failed_response,
+ get_failed_response_json,
+ get_logger,
+ log_function,
+ make_api_request,
+ prep_filename,
+ sanitize_filename_component,
+ write_prep_filename_metadata,
+)
+
+
+class TestUtils(unittest.TestCase):
+
+ @patch("src.utils.config")
+ def test_get_config_var(self, mock_config):
+ mock_config.get.return_value = "test_value"
+ result = get_config_var("DEFAULT", "TEST_VAR", "default_value")
+ self.assertEqual(result, "test_value")
+ mock_config.get.assert_called_once_with(
+ "DEFAULT", "TEST_VAR", fallback="default_value"
+ )
+
+ def test_get_logger(self):
+ logger = get_logger("test-logger", logging.DEBUG)
+ self.assertEqual(logger.name, "test-logger")
+ self.assertEqual(logger.level, logging.DEBUG)
+ self.assertTrue(
+ any(
+ isinstance(handler, logging.StreamHandler)
+ for handler in logger.handlers
+ )
+ )
+
+ @patch("src.utils.logging.Logger")
+ def test_log_function(self, MockLogger):
+ mock_logger = MockLogger.return_value
+
+ @log_function(mock_logger)
+ def sample_function(x, y):
+ return x + y
+
+ result = sample_function(2, 3)
+ self.assertEqual(result, 5)
+
+ mock_logger.info.assert_any_call(
+ "Calling function 'sample_function' with arguments [2, 3] and keyword arguments {}"
+ )
+ mock_logger.info.assert_any_call("Function 'sample_function' returned 5")
+
+ @log_function(mock_logger)
+ def sample_function_exception(x, y):
+ raise ValueError("An error occurred")
+
+ with self.assertRaises(ValueError):
+ sample_function_exception(2, 3)
+ mock_logger.error.assert_any_call(
+ "Function 'sample_function_exception' raised an exception: An error occurred"
+ )
+
+ def test_get_failed_response(self):
+ error_message = "Test error message"
+ response = get_failed_response(error_message)
+ self.assertEqual(response.status_code, 500)
+ self.assertEqual(response.reason, error_message)
+ self.assertEqual(response.json(), {"message": error_message})
+
+ @patch("src.utils.requests.Session")
+ def test_make_api_request_success(self, MockSession):
+ mock_response = MagicMock()
+ mock_response.status_code = 200
+ MockSession.return_value.send.return_value = mock_response
+
+ url = "http://example.com"
+ headers = {"Authorization": "Bearer token"}
+ response = make_api_request(url, headers=headers)
+
+ self.assertEqual(response.status_code, 200)
+ MockSession.return_value.send.assert_called_once()
+
+ @patch("src.utils.requests.Session")
+ def test_make_api_request_failure(self, MockSession):
+ mock_response = MagicMock()
+ mock_response.status_code = 500
+ MockSession.return_value.send.return_value = mock_response
+
+ url = "http://example.com"
+ headers = {"Authorization": ""}
+ response = make_api_request(url, headers=headers)
+
+ self.assertEqual(response.status_code, 500)
+ self.assertEqual(
+ response.json(),
+ {
+ "message": "Some kind of API error ccured while interacting with the given URL."
+ },
+ )
+ MockSession.return_value.send.assert_not_called()
+
+ @patch("src.utils.requests.Session")
+ @patch("src.utils.get_logger")
+ def test_make_api_request_exception(self, MockLogger, MockSession):
+ MockSession.return_value.send.side_effect = Exception("Connection error")
+
+ url = "http://example.com"
+ headers = {"Authorization": "Bearer token"}
+ response = make_api_request(url, headers=headers)
+
+ self.assertEqual(response.status_code, 500)
+ self.assertEqual(
+ response.json(),
+ {
+ "message": "Some kind of API error ccured while interacting with the given URL."
+ },
+ )
+ MockLogger.return_value.error.assert_called_once_with(
+ "Connection error while fetching data Connection error"
+ )
+
+ @patch("src.utils.os.getenv")
+ def test_get_env_var(self, mock_getenv):
+ # Test when the environment variable is set
+ mock_getenv.return_value = "test_value"
+ result = get_env_var("TEST_VAR", "default_value")
+ self.assertEqual(result, "test_value")
+ mock_getenv.assert_called_once_with("TEST_VAR", "default_value")
+
+ # Reset mock for the next test
+ mock_getenv.reset_mock()
+
+ # Test when the environment variable is not set
+ mock_getenv.side_effect = lambda _, default=None: default
+ result = get_env_var("NON_EXISTENT_VAR")
+ self.assertEqual(result, None)
+ mock_getenv.assert_called_with("NON_EXISTENT_VAR", None)
+
+ # Test that default value works
+ result = get_env_var("NON_EXISTENT_VAR", "default_value")
+ self.assertEqual(result, "default_value")
+ mock_getenv.assert_called_with("NON_EXISTENT_VAR", "default_value")
+
+ @patch("src.utils.datetime")
+ def test_prep_filename(self, mock_datetime):
+ # Mock the current date
+ mock_datetime.now.return_value.strftime.return_value = "20230101"
+
+ folder = "tmp"
+ project = "project_name"
+ package = "package_name"
+ source = "github"
+ action = "clones"
+
+ expected_filename = (
+ "tmp/project_name__package_name__github__clones__20230101.json"
+ )
+ result = prep_filename(folder, project, package, source, action)
+
+ self.assertEqual(result, expected_filename)
+
+ def test_sanitize_filename_component(self):
+ # Test with spaces and special characters
+ self.assertEqual(sanitize_filename_component("project name"), "project_name")
+ self.assertEqual(sanitize_filename_component("package@name!"), "package_name_")
+ self.assertEqual(sanitize_filename_component("source#name"), "source_name")
+ self.assertEqual(sanitize_filename_component("action$name"), "action_name")
+
+ # Test with underscores and hyphens
+ self.assertEqual(sanitize_filename_component("project_name"), "project_name")
+ self.assertEqual(sanitize_filename_component("package-name"), "package-name")
+
+ @patch("builtins.open", new_callable=mock_open)
+ @patch("src.utils.orjson.dumps")
+ def test_write_prep_filename_metadata(self, mock_orjson_dumps, mock_open):
+ project = "project_name"
+ package = "package_name"
+ source = "github"
+ action = "clones"
+ filename = "tmp/project_name__package_name__github__clones__20230101.json"
+
+ # Mock the return value of orjson.dumps
+ mock_orjson_dumps.return_value = b'{"project":"project_name","package":"package_name","source":"github","action":"clones","filename":"tmp/project_name__package_name__github__clones__20230101.json"}'
+
+ # Call the function
+ write_prep_filename_metadata(project, package, source, action, filename)
+
+ # Expected metadata
+ expected_metadata = {
+ "project": project,
+ "package": package,
+ "source": source,
+ "action": action,
+ "filename": filename,
+ }
+
+ # Assert that the file was opened in binary write mode
+ metadata_filename = (
+ "tmp/project_name__package_name__github__clones__20230101.metadata.json"
+ )
+ mock_open.assert_called_once_with(metadata_filename, "wb")
+
+ # Assert that orjson.dumps was called with the correct metadata and options
+ mock_orjson_dumps.assert_called_once_with(
+ expected_metadata, option=orjson.OPT_INDENT_2
+ )
+
+ # Assert that the data was written to the file
+ mock_open().write.assert_called_once_with(mock_orjson_dumps.return_value)
+
+ @patch("src.utils.requests.Response")
+ def test_get_failed_response_json(self, MockResponse):
+ # Mock the response object
+ mock_response = MockResponse()
+ mock_response.status_code = 500
+ mock_response.json.return_value = {"message": "Test error message"}
+ mock_response.text = '{"message": "Test error message"}'
+
+ # Call the function
+ result = get_failed_response_json(mock_response)
+
+ # Expected result
+ expected_result = {
+ "status": 500,
+ "message": "Test error message",
+ "response": '{"message": "Test error message"}',
+ }
+
+ # Assert the result
+ self.assertEqual(result, expected_result)
+ mock_response.json.assert_called_once()
+
+ # Test with no message in the response
+ mock_response.json.return_value = {}
+ mock_response.text = "{}"
+
+ # Call the function
+ result = get_failed_response_json(mock_response)
+
+ # Expected result
+ expected_result = {
+ "status": 500,
+ "message": "Request failed",
+ "response": "{}",
+ }
+
+ # Assert the result
+ self.assertEqual(result, expected_result)
+ mock_response.json.assert_called()
+
+ if __name__ == "__main__":
+ unittest.main()
+
+
+if __name__ == "__main__":
+ unittest.main()
diff --git a/tmp/.gitkeep b/tmp/.gitkeep
new file mode 100644
index 0000000..e69de29