Skip to content

Commit da2ce1a

Browse files
Updating the ground truth dataset and pages
1 parent 70c4dd9 commit da2ce1a

13 files changed

Lines changed: 14175 additions & 2962 deletions

BACKUP/ground_truth/comparison.html

Lines changed: 529 additions & 0 deletions
Large diffs are not rendered by default.

BACKUP/ground_truth/index.html

Lines changed: 305 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,305 @@
1+
<!DOCTYPE html>
2+
<html lang="en">
3+
<head>
4+
<meta charset="UTF-8">
5+
<meta name="viewport" content="width=device-width, initial-scale=1.0">
6+
<title>RsMetaCheck Pitfalls Report</title>
7+
<style>
8+
body {
9+
font-family: 'Segoe UI', Tahoma, Geneva, Verdana, sans-serif;
10+
background-color: #ffffff;
11+
color: #333333;
12+
margin: 0;
13+
padding: 20px;
14+
}
15+
h1 {
16+
text-align: center;
17+
color: #2c3e50;
18+
margin-bottom: 30px;
19+
}
20+
.table-container {
21+
max-width: 95%;
22+
margin: 0 auto;
23+
overflow-x: auto;
24+
box-shadow: 0 4px 6px rgba(0,0,0,0.1);
25+
border-radius: 8px;
26+
}
27+
table {
28+
width: 100%;
29+
border-collapse: collapse;
30+
background-color: #fff;
31+
}
32+
th, td {
33+
padding: 6px 10px;
34+
text-align: left;
35+
border-bottom: 1px solid #e0e0e0;
36+
word-wrap: break-word;
37+
word-break: break-word;
38+
max-width: 350px;
39+
}
40+
th {
41+
background-color: #f8f9fa;
42+
font-weight: 600;
43+
color: #2c3e50;
44+
position: sticky;
45+
top: 0;
46+
z-index: 10;
47+
}
48+
tbody:hover {
49+
background-color: #f5f5f5;
50+
}
51+
.repo-link {
52+
color: #3498db;
53+
text-decoration: none;
54+
word-break: break-all;
55+
}
56+
.repo-link:hover {
57+
text-decoration: underline;
58+
}
59+
.code-badge {
60+
display: inline-block;
61+
padding: 4px 8px;
62+
border-radius: 4px;
63+
font-size: 0.85em;
64+
font-weight: bold;
65+
}
66+
.code-p {
67+
background-color: #ffebee;
68+
color: #c0392b;
69+
border: 1px solid #ffcdd2;
70+
}
71+
.code-w {
72+
background-color: #fff8e1;
73+
color: #e67e22;
74+
border: 1px solid #ffecb3;
75+
}
76+
.desc-text {
77+
font-size: 0.9em;
78+
color: #666;
79+
margin-top: 4px;
80+
}
81+
.cell-only-030 {
82+
background-color: #ffebee !important;
83+
}
84+
.cell-only-032 {
85+
background-color: #e8f5e9 !important;
86+
}
87+
.cell-missing {
88+
color: #bbb;
89+
font-style: italic;
90+
text-align: center;
91+
}
92+
.section-divider {
93+
border: none;
94+
border-top: 3px solid #e0e0e0;
95+
max-width: 95%;
96+
margin: 40px auto 20px auto;
97+
}
98+
.section-title {
99+
text-align: center;
100+
color: #2c3e50;
101+
margin: 0 auto 20px auto;
102+
}
103+
.nav {
104+
text-align: center;
105+
margin-bottom: 30px;
106+
}
107+
.nav a {
108+
color: #3498db;
109+
text-decoration: none;
110+
font-size: 0.95em;
111+
padding: 6px 14px;
112+
border: 1px solid #3498db;
113+
border-radius: 4px;
114+
}
115+
.nav a:hover {
116+
background-color: #3498db;
117+
color: #fff;
118+
}
119+
</style>
120+
</head>
121+
<body>
122+
123+
<div class="nav">
124+
<a href="comparison.html">0.3.0 vs 0.3.1 Comparison &rarr;</a>
125+
</div>
126+
127+
<h1>Ground Truth Report</h1>
128+
129+
<p style="text-align: center; color: #555; font-size: 0.95em; line-height: 1.5; max-width: 800px; margin: 0 auto 20px auto;">
130+
This ground truth study evaluated 84 software repositories from the openAIRE graph.
131+
All repositories contain a <code>codemeta.json</code> file and were examined across
132+
multiple metadata sources including package manifests (e.g., <code>setup.py</code>,
133+
<code>package.json</code>, <code>DESCRIPTION</code>), citation files
134+
(<code>CITATION.cff</code>), licensing files, README documentation, and other
135+
structured or semi-structured metadata artifacts.
136+
</p>
137+
138+
<p id="summary-text" style="text-align: center; color: #444; font-size: 1.05em; line-height: 1.4;"></p>
139+
140+
<div class="table-container">
141+
<table id="pitfallsTable">
142+
<thead>
143+
<tr>
144+
<th>Repository</th>
145+
<th>Commit ID</th>
146+
<th>Pitfall / Warning Code</th>
147+
<th>Description</th>
148+
<th>Source File</th>
149+
</tr>
150+
</thead>
151+
</table>
152+
</div>
153+
154+
<script>
155+
document.addEventListener("DOMContentLoaded", () => {
156+
const gtTable = document.querySelector('#pitfallsTable');
157+
158+
Promise.all([
159+
fetch('summary_pitfalls_warnings.json').then(r => r.json()),
160+
fetch('summary_0_3_0.json').then(r => r.json()),
161+
fetch('summary_0_3_1.json').then(r => r.json())
162+
])
163+
.then(([gtData, data030, data031]) => {
164+
const gtNames = new Set();
165+
for (const entry of Object.values(gtData)) {
166+
const url = (entry.url || '').replace(/\/$/, "").split("/");
167+
if (url.length >= 2) gtNames.add(url.slice(-2).join("/"));
168+
}
169+
const allThree = new Set(
170+
[...gtNames].filter(r => data030[r] && data031[r])
171+
);
172+
173+
buildGroundTruthTable(gtTable, gtData, data030, data031, allThree);
174+
})
175+
.catch(err => {
176+
console.error('Failed to load data:', err);
177+
gtTable.innerHTML += '<tbody><tr><td colspan="5" style="text-align:center;color:red;">Error loading data.</td></tr></tbody>';
178+
});
179+
});
180+
181+
function buildGroundTruthTable(table, gtData, data030, data031, allThree) {
182+
for (const [repoId, repoData] of Object.entries(gtData)) {
183+
const url = repoData.url || 'Unknown URL';
184+
const repoName = url.replace(/\/$/, "").split("/").slice(-2).join("/");
185+
186+
if (!allThree.has(repoName)) continue;
187+
188+
const r030 = data030[repoName] || {};
189+
const r031 = data031[repoName] || {};
190+
191+
const matchedPitfalls = {};
192+
for (const [code, info] of Object.entries(repoData.pitfalls || {})) {
193+
const r031Code = r031.pitfalls && r031.pitfalls[code];
194+
const r030Code = r030.pitfalls && r030.pitfalls[code];
195+
if (r031Code || r030Code) {
196+
const gtInfo = { source_file: info.source_file, description: info.description };
197+
if (gtInfo.source_file === 'Metadata files (codemeta.json, setup.py, pom.xml etc...)') {
198+
const resCode = r031Code || r030Code;
199+
if (resCode && resCode.source_file) {
200+
gtInfo.source_file = resCode.source_file;
201+
}
202+
}
203+
matchedPitfalls[code] = gtInfo;
204+
}
205+
}
206+
207+
const matchedWarnings = {};
208+
for (const [code, info] of Object.entries(repoData.warnings || {})) {
209+
if (code === 'W003') continue;
210+
const r031Code = r031.warnings && r031.warnings[code];
211+
const r030Code = r030.warnings && r030.warnings[code];
212+
if (r031Code || r030Code) {
213+
const gtInfo = { source_file: info.source_file, description: info.description };
214+
if (gtInfo.source_file === 'Metadata files (codemeta.json, setup.py, pom.xml etc...)') {
215+
const resCode = r031Code || r030Code;
216+
if (resCode && resCode.source_file) {
217+
gtInfo.source_file = resCode.source_file;
218+
}
219+
}
220+
matchedWarnings[code] = gtInfo;
221+
}
222+
}
223+
224+
const pEntries = Object.entries(matchedPitfalls);
225+
const wEntries = Object.entries(matchedWarnings);
226+
const totalRows = pEntries.length + wEntries.length;
227+
228+
if (totalRows === 0) continue;
229+
230+
const tbody = document.createElement('tbody');
231+
let isFirstContext = true;
232+
233+
for (const [code, info] of pEntries) {
234+
addGTRow(tbody, url, repoName, code, info, 'p', isFirstContext, totalRows, repoData, allThree);
235+
isFirstContext = false;
236+
}
237+
for (const [code, info] of wEntries) {
238+
addGTRow(tbody, url, repoName, code, info, 'w', isFirstContext, totalRows, repoData, allThree);
239+
isFirstContext = false;
240+
}
241+
242+
table.appendChild(tbody);
243+
}
244+
}
245+
246+
function addGTRow(tbody, url, repoName, code, info, type, isFirstContext, totalRows, repoData, allThree) {
247+
const tr = document.createElement('tr');
248+
249+
if (isFirstContext) {
250+
const tdRepo = document.createElement('td');
251+
const aRepo = document.createElement('a');
252+
aRepo.href = url;
253+
aRepo.textContent = repoName;
254+
aRepo.className = 'repo-link';
255+
aRepo.target = '_blank';
256+
tdRepo.appendChild(aRepo);
257+
tdRepo.rowSpan = totalRows;
258+
tr.appendChild(tdRepo);
259+
260+
const tdCommit = document.createElement('td');
261+
tdCommit.style.whiteSpace = 'nowrap';
262+
const commitId = repoData['commit ID'];
263+
if (commitId) {
264+
const aCommit = document.createElement('a');
265+
let cleanUrl = url.endsWith('/') ? url.slice(0, -1) : url;
266+
let treePath = cleanUrl.includes('gitlab') ? '/-/tree/' : '/tree/';
267+
aCommit.href = `${cleanUrl}${treePath}${commitId}`;
268+
aCommit.textContent = commitId.substring(0, 7);
269+
aCommit.className = 'repo-link';
270+
aCommit.style.wordBreak = 'normal';
271+
aCommit.target = '_blank';
272+
tdCommit.appendChild(aCommit);
273+
} else {
274+
tdCommit.textContent = 'Unknown';
275+
}
276+
tdCommit.rowSpan = totalRows;
277+
tr.appendChild(tdCommit);
278+
}
279+
280+
const tdCode = document.createElement('td');
281+
const spanCode = document.createElement('span');
282+
spanCode.textContent = code;
283+
spanCode.className = `code-badge code-${type}`;
284+
tdCode.appendChild(spanCode);
285+
286+
const tdDesc = document.createElement('td');
287+
if (info.description && info.description !== "No description available") {
288+
tdDesc.textContent = info.description;
289+
tdDesc.className = 'desc-text';
290+
} else {
291+
tdDesc.textContent = '';
292+
}
293+
294+
const tdSource = document.createElement('td');
295+
tdSource.textContent = info.source_file || 'Unknown';
296+
297+
tr.appendChild(tdCode);
298+
tr.appendChild(tdDesc);
299+
tr.appendChild(tdSource);
300+
301+
tbody.appendChild(tr);
302+
}
303+
</script>
304+
</body>
305+
</html>

0 commit comments

Comments
 (0)