diff --git a/CITATION.cff b/CITATION.cff index 1271cb3..d9b9c9d 100644 --- a/CITATION.cff +++ b/CITATION.cff @@ -26,4 +26,4 @@ keywords: - indicators - fairness license: MIT -version: 0.1.2 +version: 0.1.3 diff --git a/codemeta.json b/codemeta.json index c638e9e..f05b920 100644 --- a/codemeta.json +++ b/codemeta.json @@ -34,6 +34,6 @@ "operatingSystem": "Linux", "programmingLanguage": "Python", "relatedLink": "https://github.com/EVERSE-ResearchSoftware/indicators", - "version": "0.1.2", + "version": "0.1.3", "developmentStatus": "wip" } \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index 397607c..d49e701 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,6 +1,6 @@ [tool.poetry] name = "rsfc" -version = "0.1.2" +version = "0.1.3" description = "EVERSE Research Software Fairness Checks" authors = ["Andres Montero "] license = "MIT" diff --git a/requirements.txt b/requirements.txt index 66fadd4..5403f9a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -14,6 +14,7 @@ click-option-group==0.5.7 contourpy==1.3.2 contractions==0.1.73 cycler==0.12.1 +docker==7.1.0 docutils==0.21.2 duckdb==1.3.1 elementpath==4.8.0 @@ -32,7 +33,7 @@ jsonschema==4.24.0 jsonschema-specifications==2025.4.1 jupyter_core==5.8.1 kiwisolver==1.4.8 -lxml==5.4.0 +lxml==5.1.0 Markdown==3.8.1 MarkupSafe==3.0.2 matplotlib==3.10.3 @@ -72,6 +73,7 @@ six==1.17.0 snowballstemmer==3.0.1 somef==0.9.11 soupsieve==2.7 +tabulate==0.9.0 textblob==0.17.1 textsearch==0.0.24 threadpoolctl==3.6.0 @@ -84,4 +86,4 @@ typing_extensions==4.14.0 tzdata==2025.2 urllib3==2.5.0 validators==0.22.0 -xgboost==2.1.4 +xgboost==2.1.4 \ No newline at end of file diff --git a/src/rsfc/__init__.py b/src/rsfc/__init__.py index 0cfe0d8..52f9dca 100644 --- a/src/rsfc/__init__.py +++ b/src/rsfc/__init__.py @@ -1,5 +1,5 @@ # -*- coding: utf-8 -*- -__version__ = "0.1.2" +__version__ = "0.1.3" diff --git a/src/rsfc/harvesters/github_harvester.py b/src/rsfc/harvesters/github_harvester.py index 429f2c9..a4bd88c 100644 --- a/src/rsfc/harvesters/github_harvester.py +++ b/src/rsfc/harvesters/github_harvester.py @@ -3,6 +3,7 @@ import urllib import yaml from rsfc.utils import constants +from rsfc.utils.exceptions import GithubRateLimitExceeded class GithubHarvester: @@ -60,8 +61,7 @@ def get_repo_type(self): def get_repo_default_branch(self): - res = requests.get(self.api_url) - res.raise_for_status() + res = self.safe_request("GET", self.api_url) data = res.json() return data.get("default_branch", "main") @@ -74,17 +74,17 @@ def get_codemeta_file(self): req_url = self.api_url + '/contents/codemeta.json' headers = {'Accept': 'application/vnd.github.v3.raw'} params = {'ref': self.repo_branch} - response = self.session.get(req_url, headers=headers, params=params) - response.raise_for_status() + response = self.safe_request("GET", req_url, headers=headers, params=params) return response.json() + elif self.repo_type == "GITLAB": project_path_encoded = self.api_url.split("/projects/")[-1] branch = self.repo_branch or "main" req_url = f"https://gitlab.com/api/v4/projects/{project_path_encoded}/repository/files/codemeta.json/raw" params = {'ref': branch} - response = self.session.get(req_url, params=params) - response.raise_for_status() + response = self.safe_request("GET", req_url, params=params) return response.json() + else: return None @@ -92,23 +92,25 @@ def get_codemeta_file(self): return None def get_cff_file(self): - + try: if self.repo_type == "GITHUB": req_url = self.api_url + '/contents/CITATION.cff' headers = {'Accept': 'application/vnd.github.v3.raw'} params = {'ref': self.repo_branch} - response = self.session.get(req_url, headers=headers, params=params) - response.raise_for_status() + + response = self.safe_request("GET", req_url, headers=headers, params=params) return yaml.safe_load(response.text) + elif self.repo_type == "GITLAB": project_path_encoded = self.api_url.split("/projects/")[-1] branch = self.repo_branch or "main" req_url = f"https://gitlab.com/api/v4/projects/{project_path_encoded}/repository/files/CITATION.cff/raw" params = {'ref': branch} - response = self.session.get(req_url, params=params) - response.raise_for_status() + + response = self.safe_request("GET", req_url, params=params) return yaml.safe_load(response.text) + else: return None @@ -120,8 +122,7 @@ def get_soft_version(self): try: releases_url = f"{self.api_url}/releases" - response = self.session.get(releases_url) - response.raise_for_status() + response = self.safe_request("GET", releases_url) releases = response.json() latest_release = None @@ -158,11 +159,11 @@ def get_commits(self): if self.repo_type == "GITHUB": commits_url = f"{self.api_url}/commits?per_page=100" headers = {'Accept': 'application/vnd.github.v3.raw'} - response = self.session.get(commits_url, headers=headers) + response = self.safe_request("GET", commits_url, headers=headers) elif self.repo_type == "GITLAB": commits_url = f"{self.api_url}/repository/commits?ref_name={self.repo_branch}&per_page=100" - response = self.session.get(commits_url) + response = self.safe_request("GET", commits_url) else: raise ValueError(f"Not supported repository: {self.repo_type}") @@ -181,11 +182,11 @@ def get_issues(self): if self.repo_type == "GITHUB": issues_url = f"{self.api_url}/issues?state=all&per_page=100" headers = {'Accept': 'application/vnd.github.v3.raw'} - response = self.session.get(issues_url, headers=headers) + response = self.safe_request("GET", issues_url, headers=headers) elif self.repo_type == "GITLAB": issues_url = f"{self.api_url}/issues?state=all&per_page=100" - response = self.session.get(issues_url) + response = self.safe_request("GET", issues_url) else: raise ValueError(f"Not supported repository: {self.repo_type}") @@ -193,30 +194,69 @@ def get_issues(self): issues = [] if response.status_code == 200: data = response.json() - issues = [issue for issue in data if "pull_request" not in issue] #Filter pull requests + issues = [issue for issue in data if "pullsafe_request" not in issue] else: print(f"Error getting issues: {response.status_code}") return issues - def get_tests(self): test_evidences = [] if self.repo_type == "GITHUB": tree_url = f"{self.api_url}/git/trees/HEAD?recursive=1" - resp = self.session.get(tree_url,headers={'Accept': 'application/vnd.github.v3+json'}) + resp = self.safe_request("GET", tree_url, headers={'Accept': 'application/vnd.github.v3+json'}) if resp.status_code == 200: test_evidences = resp.json().get("tree", []) elif self.repo_type == "GITLAB": tree_url = f"{self.api_url}/repository/tree?recursive=true&ref={self.repo_branch}&per_page=100" - resp = self.session.get(tree_url) + resp = self.safe_request("GET", tree_url) if resp.status_code == 200: test_evidences = [{"path": item["path"]} for item in resp.json()] else: raise ValueError("Unsupported repository type") - - return test_evidences \ No newline at end of file + + return test_evidences + + + #Funcion wrapper que implementa la captura de fallo por rate limit alcanzado en la API de Github/lab + def safe_request(self, method, url, **kwargs): + response = self.session.request(method, url, **kwargs) + + if self.repo_type == constants.REPO_TYPES[0] and response.status_code in (403, 429): + remaining = response.headers.get("X-RateLimit-Remaining") + if remaining == "0": + reset = response.headers.get("X-RateLimit-Reset") + if reset: + reset_time = datetime.fromtimestamp(int(reset)) + raise GithubRateLimitExceeded( + f"GitHub rate limit exceeded. Resets at {reset_time}." + ) + else: + raise GithubRateLimitExceeded( + "GitHub rate limit exceeded." + ) + + if self.repo_type == constants.REPO_TYPES[1] and response.status_code == 429: + retry_after = response.headers.get("Retry-After") + reset = response.headers.get("RateLimit-Reset") + + if retry_after: + raise GithubRateLimitExceeded( + f"GitLab rate limit exceeded. Retry after {retry_after} seconds." + ) + elif reset: + reset_time = datetime.fromtimestamp(int(reset)) + raise GithubRateLimitExceeded( + f"GitLab rate limit exceeded. Resets at {reset_time}." + ) + else: + raise GithubRateLimitExceeded( + "GitLab rate limit exceeded." + ) + + response.raise_for_status() + return response \ No newline at end of file diff --git a/src/rsfc/main.py b/src/rsfc/main.py index 32c22b5..ca97ea7 100644 --- a/src/rsfc/main.py +++ b/src/rsfc/main.py @@ -13,15 +13,23 @@ def main(): from rsfc.rsfc_core import start_assessment from rsfc.utils.rsfc_helpers import resolve_w3id, remove_git_from_url + from rsfc.utils.exceptions import GithubRateLimitExceeded import os import json + import sys + print("Checking if url is w3id") repo_url = resolve_w3id(args.repo) repo_url = remove_git_from_url(repo_url) - rsfc_asmt, table = start_assessment(repo_url, args.ftr, args.id, args.t) + try: + rsfc_asmt, table = start_assessment(repo_url, args.ftr, args.id, args.t) + + except GithubRateLimitExceeded as e: + print(f"\nERROR: {e}") + sys.exit(1) output_dir = './rsfc_output/' output_file = "rsfc_assessment.json" diff --git a/src/rsfc/model/assessment.py b/src/rsfc/model/assessment.py index d188ddd..475a12a 100644 --- a/src/rsfc/model/assessment.py +++ b/src/rsfc/model/assessment.py @@ -10,7 +10,6 @@ class Assessment: def __init__(self, checks): self.checks = checks - def render_template(self, sw, ftr, test_id): print("Rendering assessment...") @@ -49,7 +48,7 @@ def render_template(self, sw, ftr, test_id): return json.loads(rendered) - def to_terminal_table(self, test_id): + def to_terminal_table(self, test_id, badge_url): rows = [] for check in self.checks: @@ -74,6 +73,6 @@ def to_terminal_table(self, test_id): headers = ["TEST ID", "Short Description", "Output"] table = tabulate(rows, headers, tablefmt="grid") info = "\n\nFor rationale on the tests performed, please check the assessment file created in the outputs folder.\n" - table = table + info + badge = f"\n\nRSFC badge for your README file:, {badge_url}\n" - return table \ No newline at end of file + return table + info + badge \ No newline at end of file diff --git a/src/rsfc/rsfc_core.py b/src/rsfc/rsfc_core.py index cb9d55c..87219d4 100644 --- a/src/rsfc/rsfc_core.py +++ b/src/rsfc/rsfc_core.py @@ -5,6 +5,7 @@ from rsfc.harvesters import codemeta_harvester as cm from rsfc.harvesters import cff_harvester as cf from rsfc.harvesters import github_harvester as gt +from rsfc.utils import rsfc_helpers def start_assessment(repo_url, ftr, test_id, token): @@ -23,6 +24,7 @@ def start_assessment(repo_url, ftr, test_id, token): assess = asmt.Assessment(checks) rsfc_asmt = assess.render_template(sw, ftr, test_id) - table = assess.to_terminal_table(test_id) + badge_url = rsfc_helpers.generate_badge(checks) + table = assess.to_terminal_table(test_id, badge_url) return rsfc_asmt, table diff --git a/src/rsfc/rsfc_tests/rsfc_tests.py b/src/rsfc/rsfc_tests/rsfc_tests.py index 7097a8e..565cc43 100644 --- a/src/rsfc/rsfc_tests/rsfc_tests.py +++ b/src/rsfc/rsfc_tests/rsfc_tests.py @@ -843,14 +843,12 @@ def test_dependencies_have_version(somef_data): evidence = constants.EVIDENCE_DEPENDENCIES_VERSION suggest = "No suggestions" for item in somef_data['requirements']: - if 'README' not in item['source'] and "version" in item["result"]: - if item["result"]["version"]: - continue - else: - output = "false" - evidence = constants.EVIDENCE_NO_DEPENDENCIES_VERSION - suggest = constants.SUGGEST_NO_DEPENDENCIES_VERSION - break + if 'README' not in item['source']: + if not item["result"].get("version"): + output = "false" + evidence = constants.EVIDENCE_NO_DEPENDENCIES_VERSION + suggest = constants.SUGGEST_NO_DEPENDENCIES_VERSION + break check = ch.Check(constants.INDICATORS_DICT['requirements_specified'], 'RSFC-13-3', "Dependencies have version numbers", constants.PROCESS_DEPENDENCIES_VERSION, output, evidence, suggest) diff --git a/src/rsfc/utils/constants.py b/src/rsfc/utils/constants.py index 22a3aa3..b2888ae 100644 --- a/src/rsfc/utils/constants.py +++ b/src/rsfc/utils/constants.py @@ -420,7 +420,7 @@ 'rsfc' : { 'name' : 'RSFC', 'id' : 'https://w3id.org/rsfc/', - 'version' : '0.1.2' + 'version' : '0.1.3' } } diff --git a/src/rsfc/utils/exceptions.py b/src/rsfc/utils/exceptions.py new file mode 100644 index 0000000..12023bc --- /dev/null +++ b/src/rsfc/utils/exceptions.py @@ -0,0 +1,2 @@ +class GithubRateLimitExceeded(Exception): + pass \ No newline at end of file diff --git a/src/rsfc/utils/rsfc_helpers.py b/src/rsfc/utils/rsfc_helpers.py index 23bc0f2..7c54c16 100644 --- a/src/rsfc/utils/rsfc_helpers.py +++ b/src/rsfc/utils/rsfc_helpers.py @@ -224,4 +224,20 @@ def remove_git_from_url(url): if url.endswith(".git"): return url[:-4] - return url \ No newline at end of file + return url + +def generate_badge(checks): + passed_tests = sum([check["output"] == "true" for check in checks]) + total_checks = len(checks) + score = round((passed_tests/total_checks)*100) + + if score >= 90: + color = "brightgreen" + elif score >= 70: + color = "green" + elif score >= 50: + color = "yellow" + else: + color = "red" + + return f"![RSFC_Score](https://img.shields.io/badge/rsfc-score_{score}/100-{color})" \ No newline at end of file