Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion CITATION.cff
Original file line number Diff line number Diff line change
Expand Up @@ -34,4 +34,4 @@ keywords:
- indicators
- fairness
license: MIT
version: 0.1.5
version: 0.1.6
2 changes: 2 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@ Given a repository URL, RSFC will perform a series of checks based on a list of
- versioning_standards_use
- support_issue_tracking
- has_contribution_guidelines
- project_is_active
- software_is_containerized

For more information about these RSQIs, you can check https://github.com/EVERSE-ResearchSoftware/indicators. We have plans to implement all of the RSQIs available in that repository.

Expand Down
651 changes: 333 additions & 318 deletions RSFC_REPORT.md

Large diffs are not rendered by default.

2 changes: 1 addition & 1 deletion codemeta.json
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,6 @@
"operatingSystem": "Linux",
"programmingLanguage": "Python",
"relatedLink": "https://github.com/EVERSE-ResearchSoftware/indicators",
"version": "0.1.5",
"version": "0.1.6",
"developmentStatus": "wip"
}
287 changes: 184 additions & 103 deletions poetry.lock

Large diffs are not rendered by default.

8 changes: 4 additions & 4 deletions pyproject.toml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[tool.poetry]
name = "rsfc"
version = "0.1.5"
version = "0.1.6"
description = "EVERSE Research Software Fairness Checks"
authors = [
"Andres Montero <andres.montero.martin@upm.es>",
Expand All @@ -15,7 +15,7 @@ homepage = "https://github.com/oeg-upm/rsfc"

[tool.poetry.dependencies]
python = ">=3.11,<3.13"
somef = "0.10.1"
somef = "0.10.3"
regex = "2024.11.6"
requests = "2.32.4"
anyascii = "0.3.2"
Expand All @@ -38,7 +38,7 @@ falcon = "3.1.3"
fastjsonschema = "2.21.1"
fonttools = "4.58.4"
idna = "3.10"
imbalanced-learn = "0.12.4"
imbalanced-learn = ">=0.14.1,<0.15.0"
inflect = "7.5.0"
iniconfig = "2.1.0"
jinja2 = "3.1.6"
Expand All @@ -48,7 +48,7 @@ jsonschema = "4.24.0"
jsonschema-specifications = "2025.4.1"
jupyter-core = "5.8.1"
kiwisolver = "1.4.8"
lxml = "5.1.0"
lxml = "^6.1.0"
markdown = "3.8.1"
markupsafe = "3.0.2"
matplotlib = "3.10.3"
Expand Down
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ scikit-learn==1.5.0
scipy==1.15.3
six==1.17.0
snowballstemmer==3.0.1
somef==0.10.1
somef==0.10.3
soupsieve==2.7
tabulate==0.9.0
textblob==0.17.1
Expand Down
35 changes: 0 additions & 35 deletions src/rsfc/harvesters/cff_harvester.py

This file was deleted.

39 changes: 0 additions & 39 deletions src/rsfc/harvesters/codemeta_harvester.py

This file was deleted.

51 changes: 44 additions & 7 deletions src/rsfc/harvesters/github_harvester.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
import requests
from datetime import datetime
from datetime import datetime, timedelta, timezone
import urllib
import yaml
from rsfc.utils import constants
Expand All @@ -19,6 +19,7 @@ def __init__(self, repo_url, branch, tag, token):
self.codemeta = self.get_codemeta_file()
self.commits = self.get_commits()
self.issues = self.get_issues()
self.bug_issues = self.get_bugs()
self.tests = self.get_tests()


Expand Down Expand Up @@ -153,13 +154,16 @@ def get_soft_version(self):


def get_commits(self):
since = (datetime.now(timezone.utc) - timedelta(days=90)).isoformat()
commits_url = ""

if self.repo_type == "GITHUB":
commits_url = f"{self.api_url}/commits?sha={self.repo_branch}&per_page=100"
commits_url = f"{self.api_url}/commits?sha={self.repo_branch}&since={since}&per_page=100"
headers = {'Accept': 'application/vnd.github.v3.raw'}
response = self.safe_request("GET", commits_url, headers=headers)

elif self.repo_type == "GITLAB":
commits_url = f"{self.api_url}/repository/commits?ref_name={self.repo_branch}&per_page=100"
commits_url = f"{self.api_url}/repository/commits?ref_name={self.repo_branch}&since={since}&per_page=100"
response = self.safe_request("GET", commits_url)

else:
Expand All @@ -171,31 +175,64 @@ def get_commits(self):
print(f"Error getting commits: {response.status_code}")
commits = []

return commits
return commits_url, commits



def get_issues(self):
since = (datetime.now(timezone.utc) - timedelta(days=90)).isoformat()

if self.repo_type == "GITHUB":
issues_url = f"{self.api_url}/issues?state=all&per_page=100"
issues_url = f"{self.api_url}/issues?state=all&since={since}&per_page=100"
headers = {'Accept': 'application/vnd.github.v3.raw'}
response = self.safe_request("GET", issues_url, headers=headers)

elif self.repo_type == "GITLAB":
issues_url = f"{self.api_url}/issues?state=all&per_page=100"
issues_url = f"{self.api_url}/issues?state=all&updated_after={since}&per_page=100"
response = self.safe_request("GET", issues_url)

else:
raise ValueError(f"Not supported repository: {self.repo_type}")

issues = []

if response.status_code == 200:
data = response.json()
issues = [issue for issue in data if "pullsafe_request" not in issue]
if self.repo_type == "GITHUB":
issues = [issue for issue in data if "pull_request" not in issue]
else:
issues = data
else:
print(f"Error getting issues: {response.status_code}")

return issues

def get_bugs(self):

if self.repo_type == "GITHUB":
issues_url = f"{self.api_url}/issues?state=all&labels=bug&per_page=100"
headers = {'Accept': 'application/vnd.github.v3+json'}
response = self.safe_request("GET", issues_url, headers=headers)

elif self.repo_type == "GITLAB":
issues_url = f"{self.api_url}/issues?labels=bug&per_page=100"
response = self.safe_request("GET", issues_url)

else:
raise ValueError(f"Not supported repository: {self.repo_type}")

if response.status_code == 200:
bugs = response.json()

if self.repo_type == "GITHUB":
bugs = [issue for issue in bugs if "pull_request" not in issue]

else:
print(f"Error getting bugs: {response.status_code}")
bugs = []

return bugs



def get_tests(self):
Expand Down
85 changes: 49 additions & 36 deletions src/rsfc/harvesters/somef_harvester.py
Original file line number Diff line number Diff line change
@@ -1,70 +1,83 @@
import io
import contextlib
import json
from somef import somef_cli
import subprocess
import os
import tempfile
import contextlib
import subprocess

from somef.somef_cli import run_cli


class SomefHarvester:

def __init__(self, repo_url, branch, tag, token):

def __init__(self, repo_url, branch=None, tag=None, token=None):

self.somef_configure(token)
self.somef_data = self.somef_assessment(repo_url, branch, tag, 0.8)


self.somef_data = self.somef_assessment(repo_url=repo_url, branch=branch, tag=tag, threshold=0.8)

def somef_configure(self, token):

print("Configuring SOMEF...")

if token:

configure = ["somef", "configure"]

stdin_data = (
f"{token}\n" #To deal with the inputs asked by somef configure
"\n"
"\n"
"\n"
"\n"
"\n"
f"{token}\n"
"\n"
"\n"
"\n"
"\n"
"\n"
)

else:

configure = ["somef", "configure", "-a"]
stdin_data = None

try:
subprocess.run(
configure,
input=stdin_data,
text=True,
check=True,
stdout=subprocess.DEVNULL,
stderr=subprocess.DEVNULL
)
subprocess.run(configure, input=stdin_data, text=True, check=True, stdout=subprocess.DEVNULL, stderr=subprocess.DEVNULL)
except subprocess.CalledProcessError as e:
raise RuntimeError("SOMEF configuration failed") from e

def somef_assessment(self, repo_url, branch=None, tag=None, threshold=0.8):

print("Extracting repository metadata with SOMEF...")


os.makedirs("./rsfc_output/", exist_ok=True)

output_json = "./rsfc_output/somef_assessment.json"

somef_kwargs = {
"threshold": threshold,
"ignore_classifiers": True,
"repo_url": repo_url,
"readme_only": False
"readme_only": False,
"output": output_json,
"pretty": True
}

if branch is not None:
somef_kwargs["branch"] = branch

elif tag is not None:
somef_kwargs["tag"] = tag

with contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO()):
repo_data = somef_cli.cli_get_data(**somef_kwargs)

repo_data = json.loads(json.dumps(repo_data.results))

'''os.makedirs('./rsfc_output/', exist_ok=True)
with open('./rsfc_output/somef_assessment.json', 'w', encoding='utf-8') as f:
json.dump(repo_data, f, indent=4, ensure_ascii=False)'''

with (contextlib.redirect_stdout(io.StringIO()), contextlib.redirect_stderr(io.StringIO())):

run_cli(**somef_kwargs)

if not os.path.exists(output_json):

raise RuntimeError(
"SOMEF did not generate the expected JSON output"
)

with open(output_json, "r", encoding="utf-8") as f:

repo_data = json.load(f)

return repo_data
Loading
Loading