Skip to content

Commit 5bd7d59

Browse files
committed
Merge branch '__rultor'
2 parents 3d34512 + f86da23 commit 5bd7d59

3 files changed

Lines changed: 53 additions & 7 deletions

File tree

sr-data/src/sr_data/steps/workflows.py

Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,7 @@
11
"""
22
Collect information about GitHub workflows in the repo.
33
"""
4+
import numpy as np
45
# The MIT License (MIT)
56
#
67
# Copyright (c) 2024 Aliaksei Bialiauski
@@ -30,7 +31,7 @@
3031

3132
def main(repos, out):
3233
frame = pd.read_csv(repos)
33-
frame["workflows"] = frame["workflows"].fillna("")
34+
frame["workflows"] = frame["workflows"].fillna(0)
3435
for idx, row in frame.iterrows():
3536
repo = row["repo"]
3637
branch = row["branch"]
@@ -65,13 +66,42 @@ def main(repos, out):
6566
if info["w_release"]:
6667
releases = True
6768
frame.at[idx, "workflows"] = len(ymls)
69+
frame["workflows"] = frame["workflows"]
6870
frame.at[idx, "w_jobs"] = tjobs
6971
frame.at[idx, "w_oss"] = len(set(oss))
7072
frame.at[idx, "w_steps"] = steps
71-
frame.at[idx, "has_release_workflow"] = releases
73+
frame.at[idx, "has_release_workflow"] = int(releases)
74+
frame.at[idx, "w_simplicity"] = w_score(frame.loc[idx])
7275
frame.to_csv(out, index=False)
7376
logger.info(f"Saved repositories to {out}")
7477

78+
wscope = ["workflows", "w_jobs", "w_oss", "w_steps", "has_release_workflow"]
79+
weights = {
80+
"workflows": 0.3,
81+
"w_jobs": 0.25,
82+
"w_steps": 0.25,
83+
"w_oss": 0.1,
84+
"has_release_workflow": 0.1,
85+
}
86+
87+
def w_score(row) -> int:
88+
"""
89+
Workflow simplicity score.
90+
:return: Calculated metric for workflow simplicity score.
91+
@todo #244:35min Enhance workflow simplicity score with min and max adjustment.
92+
Currently, we just subtract collected value from 1. We should adjust it with
93+
min and max values from the dataset. So formula should look like:
94+
1 - (row - min) / (max - min).
95+
"""
96+
normalized = {
97+
"workflows": 1 - row["workflows"],
98+
"w_jobs": 1 - row["w_jobs"],
99+
"w_steps": 1 - row["w_steps"],
100+
"w_oss": 1 - row["w_oss"],
101+
"has_release_workflow": 1 - row["has_release_workflow"],
102+
}
103+
return sum(normalized[key] * weights[key] for key in weights)
104+
75105

76106
def fetch(path) -> str:
77107
return requests.get(f"https://raw.githubusercontent.com/{path}").text
Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,2 @@
1+
repo,workflows,w_jobs,w_steps,w_oss,has_release_workflow
2+
foo/bar,1,2,3,3,0

sr-data/src/tests/test_workflows.py

Lines changed: 19 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -29,8 +29,7 @@
2929
import pandas as pd
3030
import pytest
3131
import yaml
32-
from sr_data.steps.workflows import workflow_info, main, fetch, \
33-
used_for_releases
32+
from sr_data.steps.workflows import workflow_info, main, fetch, used_for_releases, w_score
3433

3534

3635
class TestWorkflows(unittest.TestCase):
@@ -90,7 +89,7 @@ def test_outputs_workflow_info_correctly(self):
9089
f"Steps count in workflow: '{info}' does not match with expected"
9190
)
9291

93-
@pytest.mark.fast
92+
@pytest.mark.nightly
9493
def test_collects_unique_oss_across_all_files(self):
9594
with TemporaryDirectory() as temp:
9695
path = os.path.join(temp, "workflows.csv")
@@ -109,7 +108,7 @@ def test_collects_unique_oss_across_all_files(self):
109108
f"OSS count: {oss} does not match with expected: {expected}"
110109
)
111110

112-
@pytest.mark.fast
111+
@pytest.mark.nightly
113112
def test_collects_workflows_for_all(self):
114113
with TemporaryDirectory() as temp:
115114
path = os.path.join(temp, "workflows.csv")
@@ -129,7 +128,7 @@ def test_collects_workflows_for_all(self):
129128
f"Frame {frame.columns} doesn't have expected columns"
130129
)
131130

132-
@pytest.mark.fast
131+
@pytest.mark.nightly
133132
def test_counts_workflows_correctly(self):
134133
with TemporaryDirectory() as temp:
135134
path = os.path.join(temp, "workflows.csv")
@@ -403,3 +402,18 @@ def test_parses_oss_as_list_in_matrix(self):
403402
0,
404403
f"Steps count in workflow: '{info}' does not match with expected"
405404
)
405+
406+
407+
@pytest.mark.fast
408+
def test_calculates_simplicity_score(self):
409+
scores = pd.read_csv(
410+
os.path.join(
411+
os.path.dirname(os.path.realpath(__file__)),
412+
"resources/to-wscore.csv"
413+
)
414+
)
415+
self.assertEqual(
416+
w_score(scores.iloc[0]),
417+
-0.85,
418+
"Calculated score does not match with expected"
419+
)

0 commit comments

Comments
 (0)