1+ <!DOCTYPE html>
2+ < html lang ="en ">
3+ < head >
4+ < meta charset ="UTF-8 ">
5+ < meta name ="viewport " content ="width=device-width, initial-scale=1.0 ">
6+ < title > RsMetaCheck Pitfalls Report</ title >
7+ < style >
8+ body {
9+ font-family : 'Segoe UI' , Tahoma, Geneva, Verdana, sans-serif;
10+ background-color : # ffffff ;
11+ color : # 333333 ;
12+ margin : 0 ;
13+ padding : 20px ;
14+ }
15+ h1 {
16+ text-align : center;
17+ color : # 2c3e50 ;
18+ margin-bottom : 30px ;
19+ }
20+ .table-container {
21+ max-width : 95% ;
22+ margin : 0 auto;
23+ overflow-x : auto;
24+ box-shadow : 0 4px 6px rgba (0 , 0 , 0 , 0.1 );
25+ border-radius : 8px ;
26+ }
27+ table {
28+ width : 100% ;
29+ border-collapse : collapse;
30+ background-color : # fff ;
31+ }
32+ th , td {
33+ padding : 6px 10px ;
34+ text-align : left;
35+ border-bottom : 1px solid # e0e0e0 ;
36+ word-wrap : break-word;
37+ word-break : break-word;
38+ max-width : 350px ;
39+ }
40+ th {
41+ background-color : # f8f9fa ;
42+ font-weight : 600 ;
43+ color : # 2c3e50 ;
44+ position : sticky;
45+ top : 0 ;
46+ z-index : 10 ;
47+ }
48+ tbody : hover {
49+ background-color : # f5f5f5 ;
50+ }
51+ .repo-link {
52+ color : # 3498db ;
53+ text-decoration : none;
54+ word-break : break-all;
55+ }
56+ .repo-link : hover {
57+ text-decoration : underline;
58+ }
59+ .code-badge {
60+ display : inline-block;
61+ padding : 4px 8px ;
62+ border-radius : 4px ;
63+ font-size : 0.85em ;
64+ font-weight : bold;
65+ }
66+ .code-p {
67+ background-color : # ffebee ;
68+ color : # c0392b ;
69+ border : 1px solid # ffcdd2 ;
70+ }
71+ .code-w {
72+ background-color : # fff8e1 ;
73+ color : # e67e22 ;
74+ border : 1px solid # ffecb3 ;
75+ }
76+ .desc-text {
77+ font-size : 0.9em ;
78+ color : # 666 ;
79+ margin-top : 4px ;
80+ }
81+ .cell-only-030 {
82+ background-color : # ffebee !important ;
83+ }
84+ .cell-only-032 {
85+ background-color : # e8f5e9 !important ;
86+ }
87+ .cell-missing {
88+ color : # bbb ;
89+ font-style : italic;
90+ text-align : center;
91+ }
92+ .section-divider {
93+ border : none;
94+ border-top : 3px solid # e0e0e0 ;
95+ max-width : 95% ;
96+ margin : 40px auto 20px auto;
97+ }
98+ .section-title {
99+ text-align : center;
100+ color : # 2c3e50 ;
101+ margin : 0 auto 20px auto;
102+ }
103+ .nav {
104+ text-align : center;
105+ margin-bottom : 30px ;
106+ }
107+ .nav a {
108+ color : # 3498db ;
109+ text-decoration : none;
110+ font-size : 0.95em ;
111+ padding : 6px 14px ;
112+ border : 1px solid # 3498db ;
113+ border-radius : 4px ;
114+ }
115+ .nav a : hover {
116+ background-color : # 3498db ;
117+ color : # fff ;
118+ }
119+ </ style >
120+ </ head >
121+ < body >
122+
123+ < div class ="nav ">
124+ < a href ="comparison.html "> 0.3.0 vs 0.3.1 Comparison →</ a >
125+ </ div >
126+
127+ < h1 > Ground Truth Report</ h1 >
128+
129+ < p style ="text-align: center; color: #555; font-size: 0.95em; line-height: 1.5; max-width: 800px; margin: 0 auto 20px auto; ">
130+ This ground truth study evaluated 84 software repositories from the openAIRE graph.
131+ All repositories contain a < code > codemeta.json</ code > file and were examined across
132+ multiple metadata sources including package manifests (e.g., < code > setup.py</ code > ,
133+ < code > package.json</ code > , < code > DESCRIPTION</ code > ), citation files
134+ (< code > CITATION.cff</ code > ), licensing files, README documentation, and other
135+ structured or semi-structured metadata artifacts.
136+ </ p >
137+
138+ < p id ="summary-text " style ="text-align: center; color: #444; font-size: 1.05em; line-height: 1.4; "> </ p >
139+
140+ < div class ="table-container ">
141+ < table id ="pitfallsTable ">
142+ < thead >
143+ < tr >
144+ < th > Repository</ th >
145+ < th > Commit ID</ th >
146+ < th > Pitfall / Warning Code</ th >
147+ < th > Description</ th >
148+ < th > Source File</ th >
149+ </ tr >
150+ </ thead >
151+ </ table >
152+ </ div >
153+
154+ < script >
155+ document . addEventListener ( "DOMContentLoaded" , ( ) => {
156+ const gtTable = document . querySelector ( '#pitfallsTable' ) ;
157+
158+ Promise . all ( [
159+ fetch ( 'summary_pitfalls_warnings.json' ) . then ( r => r . json ( ) ) ,
160+ fetch ( 'summary_0_3_0.json' ) . then ( r => r . json ( ) ) ,
161+ fetch ( 'summary_0_3_1.json' ) . then ( r => r . json ( ) )
162+ ] )
163+ . then ( ( [ gtData , data030 , data031 ] ) => {
164+ const gtNames = new Set ( ) ;
165+ for ( const entry of Object . values ( gtData ) ) {
166+ const url = ( entry . url || '' ) . replace ( / \/ $ / , "" ) . split ( "/" ) ;
167+ if ( url . length >= 2 ) gtNames . add ( url . slice ( - 2 ) . join ( "/" ) ) ;
168+ }
169+ const allThree = new Set (
170+ [ ...gtNames ] . filter ( r => data030 [ r ] && data031 [ r ] )
171+ ) ;
172+
173+ buildGroundTruthTable ( gtTable , gtData , data030 , data031 , allThree ) ;
174+ } )
175+ . catch ( err => {
176+ console . error ( 'Failed to load data:' , err ) ;
177+ gtTable . innerHTML += '<tbody><tr><td colspan="5" style="text-align:center;color:red;">Error loading data.</td></tr></tbody>' ;
178+ } ) ;
179+ } ) ;
180+
181+ function buildGroundTruthTable ( table , gtData , data030 , data031 , allThree ) {
182+ for ( const [ repoId , repoData ] of Object . entries ( gtData ) ) {
183+ const url = repoData . url || 'Unknown URL' ;
184+ const repoName = url . replace ( / \/ $ / , "" ) . split ( "/" ) . slice ( - 2 ) . join ( "/" ) ;
185+
186+ if ( ! allThree . has ( repoName ) ) continue ;
187+
188+ const r030 = data030 [ repoName ] || { } ;
189+ const r031 = data031 [ repoName ] || { } ;
190+
191+ const matchedPitfalls = { } ;
192+ for ( const [ code , info ] of Object . entries ( repoData . pitfalls || { } ) ) {
193+ const r031Code = r031 . pitfalls && r031 . pitfalls [ code ] ;
194+ const r030Code = r030 . pitfalls && r030 . pitfalls [ code ] ;
195+ if ( r031Code || r030Code ) {
196+ const gtInfo = { source_file : info . source_file , description : info . description } ;
197+ if ( gtInfo . source_file === 'Metadata files (codemeta.json, setup.py, pom.xml etc...)' ) {
198+ const resCode = r031Code || r030Code ;
199+ if ( resCode && resCode . source_file ) {
200+ gtInfo . source_file = resCode . source_file ;
201+ }
202+ }
203+ matchedPitfalls [ code ] = gtInfo ;
204+ }
205+ }
206+
207+ const matchedWarnings = { } ;
208+ for ( const [ code , info ] of Object . entries ( repoData . warnings || { } ) ) {
209+ if ( code === 'W003' ) continue ;
210+ const r031Code = r031 . warnings && r031 . warnings [ code ] ;
211+ const r030Code = r030 . warnings && r030 . warnings [ code ] ;
212+ if ( r031Code || r030Code ) {
213+ const gtInfo = { source_file : info . source_file , description : info . description } ;
214+ if ( gtInfo . source_file === 'Metadata files (codemeta.json, setup.py, pom.xml etc...)' ) {
215+ const resCode = r031Code || r030Code ;
216+ if ( resCode && resCode . source_file ) {
217+ gtInfo . source_file = resCode . source_file ;
218+ }
219+ }
220+ matchedWarnings [ code ] = gtInfo ;
221+ }
222+ }
223+
224+ const pEntries = Object . entries ( matchedPitfalls ) ;
225+ const wEntries = Object . entries ( matchedWarnings ) ;
226+ const totalRows = pEntries . length + wEntries . length ;
227+
228+ if ( totalRows === 0 ) continue ;
229+
230+ const tbody = document . createElement ( 'tbody' ) ;
231+ let isFirstContext = true ;
232+
233+ for ( const [ code , info ] of pEntries ) {
234+ addGTRow ( tbody , url , repoName , code , info , 'p' , isFirstContext , totalRows , repoData , allThree ) ;
235+ isFirstContext = false ;
236+ }
237+ for ( const [ code , info ] of wEntries ) {
238+ addGTRow ( tbody , url , repoName , code , info , 'w' , isFirstContext , totalRows , repoData , allThree ) ;
239+ isFirstContext = false ;
240+ }
241+
242+ table . appendChild ( tbody ) ;
243+ }
244+ }
245+
246+ function addGTRow ( tbody , url , repoName , code , info , type , isFirstContext , totalRows , repoData , allThree ) {
247+ const tr = document . createElement ( 'tr' ) ;
248+
249+ if ( isFirstContext ) {
250+ const tdRepo = document . createElement ( 'td' ) ;
251+ const aRepo = document . createElement ( 'a' ) ;
252+ aRepo . href = url ;
253+ aRepo . textContent = repoName ;
254+ aRepo . className = 'repo-link' ;
255+ aRepo . target = '_blank' ;
256+ tdRepo . appendChild ( aRepo ) ;
257+ tdRepo . rowSpan = totalRows ;
258+ tr . appendChild ( tdRepo ) ;
259+
260+ const tdCommit = document . createElement ( 'td' ) ;
261+ tdCommit . style . whiteSpace = 'nowrap' ;
262+ const commitId = repoData [ 'commit ID' ] ;
263+ if ( commitId ) {
264+ const aCommit = document . createElement ( 'a' ) ;
265+ let cleanUrl = url . endsWith ( '/' ) ? url . slice ( 0 , - 1 ) : url ;
266+ let treePath = cleanUrl . includes ( 'gitlab' ) ? '/-/tree/' : '/tree/' ;
267+ aCommit . href = `${ cleanUrl } ${ treePath } ${ commitId } ` ;
268+ aCommit . textContent = commitId . substring ( 0 , 7 ) ;
269+ aCommit . className = 'repo-link' ;
270+ aCommit . style . wordBreak = 'normal' ;
271+ aCommit . target = '_blank' ;
272+ tdCommit . appendChild ( aCommit ) ;
273+ } else {
274+ tdCommit . textContent = 'Unknown' ;
275+ }
276+ tdCommit . rowSpan = totalRows ;
277+ tr . appendChild ( tdCommit ) ;
278+ }
279+
280+ const tdCode = document . createElement ( 'td' ) ;
281+ const spanCode = document . createElement ( 'span' ) ;
282+ spanCode . textContent = code ;
283+ spanCode . className = `code-badge code-${ type } ` ;
284+ tdCode . appendChild ( spanCode ) ;
285+
286+ const tdDesc = document . createElement ( 'td' ) ;
287+ if ( info . description && info . description !== "No description available" ) {
288+ tdDesc . textContent = info . description ;
289+ tdDesc . className = 'desc-text' ;
290+ } else {
291+ tdDesc . textContent = '' ;
292+ }
293+
294+ const tdSource = document . createElement ( 'td' ) ;
295+ tdSource . textContent = info . source_file || 'Unknown' ;
296+
297+ tr . appendChild ( tdCode ) ;
298+ tr . appendChild ( tdDesc ) ;
299+ tr . appendChild ( tdSource ) ;
300+
301+ tbody . appendChild ( tr ) ;
302+ }
303+ </ script >
304+ </ body >
305+ </ html >
0 commit comments