1
+ use super :: github:: github_async_new;
1
2
use crate :: { config:: get_env_var_or_default, error:: LDNError } ;
2
3
use fplus_database:: {
3
4
database:: {
4
- applications:: get_applications_by_clients_addresses ,
5
+ applications:: get_distinct_applications_by_clients_addresses ,
5
6
comparable_applications:: get_comparable_applications,
6
7
} ,
7
8
models:: comparable_applications:: ApplicationComparableData ,
@@ -16,9 +17,19 @@ pub struct Document {
16
17
pub text : String ,
17
18
}
18
19
20
+ type Owner = String ;
21
+ type Repo = String ;
22
+ type ClientAddress = String ;
23
+ type Similarities = Vec < String > ;
24
+ type RepoSimilarities = HashMap < ( Owner , Repo ) , Vec < ( ClientAddress , Similarities ) > > ;
25
+ type SortedRepoSimilarities = Vec < ( ( Owner , Repo ) , Vec < ( ClientAddress , Similarities ) > ) > ;
26
+
19
27
pub async fn detect_similar_applications (
20
28
client_address : & str ,
21
29
comparable_data : & ApplicationComparableData ,
30
+ owner : & str ,
31
+ repo : & str ,
32
+ issue_number : & u64 ,
22
33
) -> Result < ( ) , LDNError > {
23
34
let comparable_applications = get_comparable_applications ( ) . await . map_err ( |e| {
24
35
LDNError :: New ( format ! (
@@ -80,16 +91,81 @@ pub async fn detect_similar_applications(
80
91
let similar_data_set_sample = get_similar_texts_levenshtein ( & data_set_samples) ?;
81
92
82
93
let unique_addresses: HashSet < String > = similar_project_desciptions
94
+ . clone ( )
83
95
. into_iter ( )
84
- . chain ( similar_stored_data_desciptions. into_iter ( ) )
85
- . chain ( similar_project_and_stored_data_desciptions. into_iter ( ) )
86
- . chain ( similar_data_set_sample. into_iter ( ) )
96
+ . chain ( similar_stored_data_desciptions. clone ( ) . into_iter ( ) )
97
+ . chain (
98
+ similar_project_and_stored_data_desciptions
99
+ . clone ( )
100
+ . into_iter ( ) ,
101
+ )
102
+ . chain ( similar_data_set_sample. clone ( ) . into_iter ( ) )
103
+ . chain ( existing_data_owner_name. clone ( ) . into_iter ( ) )
87
104
. collect ( ) ;
105
+
88
106
let unique_addresses: Vec < String > = unique_addresses. into_iter ( ) . collect ( ) ;
107
+ let gh = github_async_new ( owner. to_string ( ) , repo. to_string ( ) ) . await ?;
108
+
109
+ if unique_addresses. is_empty ( ) {
110
+ let comment = "## Similarity Report\n \n No similar applications found for the issue" ;
111
+ gh. add_comment_to_issue ( * issue_number, comment)
112
+ . await
113
+ . map_err ( |e| LDNError :: New ( format ! ( "Failed to get add comment to the issue: {}" , e) ) ) ?;
114
+ return Ok ( ( ) ) ;
115
+ }
89
116
90
- let _applications = get_applications_by_clients_addresses ( unique_addresses)
117
+ let applications = get_distinct_applications_by_clients_addresses ( unique_addresses)
91
118
. await
92
119
. map_err ( |e| LDNError :: New ( format ! ( "Failed to get applications from database: {}" , e) ) ) ?;
120
+
121
+ let mut repo_similarities: RepoSimilarities = HashMap :: new ( ) ;
122
+
123
+ for application in applications {
124
+ let repo_key = ( application. owner . clone ( ) , application. repo . clone ( ) ) ;
125
+ let issue_link = format ! (
126
+ "https://github.com/{}/{}/issues/{}" ,
127
+ application. owner, application. repo, application. issue_number
128
+ ) ;
129
+
130
+ let entry = repo_similarities. entry ( repo_key) . or_default ( ) ;
131
+ let mut similarities = Vec :: new ( ) ;
132
+
133
+ if similar_project_and_stored_data_desciptions. contains ( & application. id ) {
134
+ similarities. push ( "Similar project and stored data description" . to_string ( ) ) ;
135
+ } else if similar_project_desciptions. contains ( & application. id ) {
136
+ similarities. push ( "Similar project description" . to_string ( ) ) ;
137
+ } else if similar_stored_data_desciptions. contains ( & application. id ) {
138
+ similarities. push ( "Similar stored data description" . to_string ( ) ) ;
139
+ }
140
+ if similar_data_set_sample. contains ( & application. id ) {
141
+ similarities. push ( "Similar data set sample" . to_string ( ) ) ;
142
+ }
143
+ if existing_data_owner_name. contains ( & application. id ) {
144
+ similarities. push ( "The same data owner name" . to_string ( ) ) ;
145
+ }
146
+
147
+ if !similarities. is_empty ( ) {
148
+ entry. push ( ( issue_link, similarities) ) ;
149
+ }
150
+ }
151
+
152
+ let mut sorted_results: SortedRepoSimilarities = repo_similarities. into_iter ( ) . collect ( ) ;
153
+ sorted_results. sort_by ( |owner_repo, similarities| {
154
+ similarities
155
+ . 1
156
+ . iter ( )
157
+ . map ( |( _, sim) | sim. len ( ) )
158
+ . sum :: < usize > ( )
159
+ . cmp ( & owner_repo. 1 . iter ( ) . map ( |( _, sim) | sim. len ( ) ) . sum :: < usize > ( ) )
160
+ } ) ;
161
+
162
+ let comment = format ! (
163
+ "## Similarity Report\n \n This application is similar to the following applications:\n \n {}" ,
164
+ format_comment( & sorted_results)
165
+ ) ;
166
+ gh. add_comment_to_issue ( * issue_number, & comment)
167
+ . await
168
+ . map_err ( |e| LDNError :: New ( format ! ( "Failed to get add comment to the issue: {}" , e) ) ) ?;
93
169
Ok ( ( ) )
94
170
}
95
171
@@ -172,3 +248,24 @@ fn cosine_similarity(v1: &Array1<f64>, v2: &Array1<f64>) -> f64 {
172
248
dot_product / ( norm_v1 * norm_v2)
173
249
}
174
250
}
251
+
252
+ fn format_comment ( repos : & SortedRepoSimilarities ) -> String {
253
+ repos
254
+ . iter ( )
255
+ . map ( |( ( owner, repo) , issues) | {
256
+ format ! (
257
+ "### {}/{}\n \n {}" ,
258
+ owner,
259
+ repo,
260
+ issues
261
+ . iter( )
262
+ . map( |( issue, similarities) | {
263
+ format!( "* {}:\n * {}" , issue, similarities. join( "\n * " ) )
264
+ } )
265
+ . collect:: <Vec <String >>( )
266
+ . join( "\n \n " )
267
+ )
268
+ } )
269
+ . collect :: < Vec < String > > ( )
270
+ . join ( "\n \n " )
271
+ }
0 commit comments