|
1 | 1 | """ |
2 | 2 | Collect information about GitHub workflows in the repo. |
3 | 3 | """ |
| 4 | +import numpy as np |
4 | 5 | # The MIT License (MIT) |
5 | 6 | # |
6 | 7 | # Copyright (c) 2024 Aliaksei Bialiauski |
|
30 | 31 |
|
31 | 32 | def main(repos, out): |
32 | 33 | frame = pd.read_csv(repos) |
33 | | - frame["workflows"] = frame["workflows"].fillna("") |
| 34 | + frame["workflows"] = frame["workflows"].fillna(0) |
34 | 35 | for idx, row in frame.iterrows(): |
35 | 36 | repo = row["repo"] |
36 | 37 | branch = row["branch"] |
@@ -65,13 +66,42 @@ def main(repos, out): |
65 | 66 | if info["w_release"]: |
66 | 67 | releases = True |
67 | 68 | frame.at[idx, "workflows"] = len(ymls) |
| 69 | + frame["workflows"] = frame["workflows"] |
68 | 70 | frame.at[idx, "w_jobs"] = tjobs |
69 | 71 | frame.at[idx, "w_oss"] = len(set(oss)) |
70 | 72 | frame.at[idx, "w_steps"] = steps |
71 | | - frame.at[idx, "has_release_workflow"] = releases |
| 73 | + frame.at[idx, "has_release_workflow"] = int(releases) |
| 74 | + frame.at[idx, "w_simplicity"] = w_score(frame.loc[idx]) |
72 | 75 | frame.to_csv(out, index=False) |
73 | 76 | logger.info(f"Saved repositories to {out}") |
74 | 77 |
|
| 78 | +wscope = ["workflows", "w_jobs", "w_oss", "w_steps", "has_release_workflow"] |
| 79 | +weights = { |
| 80 | + "workflows": 0.3, |
| 81 | + "w_jobs": 0.25, |
| 82 | + "w_steps": 0.25, |
| 83 | + "w_oss": 0.1, |
| 84 | + "has_release_workflow": 0.1, |
| 85 | +} |
| 86 | + |
| 87 | +def w_score(row) -> int: |
| 88 | + """ |
| 89 | + Workflow simplicity score. |
| 90 | + :return: Calculated metric for workflow simplicity score. |
| 91 | + @todo #244:35min Enhance workflow simplicity score with min and max adjustment. |
| 92 | + Currently, we just subtract collected value from 1. We should adjust it with |
| 93 | + min and max values from the dataset. So formula should look like: |
| 94 | + 1 - (row - min) / (max - min). |
| 95 | + """ |
| 96 | + normalized = { |
| 97 | + "workflows": 1 - row["workflows"], |
| 98 | + "w_jobs": 1 - row["w_jobs"], |
| 99 | + "w_steps": 1 - row["w_steps"], |
| 100 | + "w_oss": 1 - row["w_oss"], |
| 101 | + "has_release_workflow": 1 - row["has_release_workflow"], |
| 102 | + } |
| 103 | + return sum(normalized[key] * weights[key] for key in weights) |
| 104 | + |
75 | 105 |
|
76 | 106 | def fetch(path) -> str: |
77 | 107 | return requests.get(f"https://raw.githubusercontent.com/{path}").text |
|
0 commit comments