Skip to content

Commit 350f0ea

Browse files
authored
Generate comment with results (#253)
1 parent 5701e68 commit 350f0ea

File tree

3 files changed

+126
-9
lines changed

3 files changed

+126
-9
lines changed

fplus-database/src/database/applications.rs

+7-3
Original file line numberDiff line numberDiff line change
@@ -424,13 +424,17 @@ pub async fn get_applications_by_client_id(
424424
Ok(result)
425425
}
426426

427-
pub async fn get_applications_by_clients_addresses(
427+
pub async fn get_distinct_applications_by_clients_addresses(
428428
clients_addresses: Vec<String>,
429429
) -> Result<Vec<ApplicationModel>, sea_orm::DbErr> {
430430
let conn = get_database_connection().await?;
431-
432431
let result = Application::find()
433-
.filter(Column::Id.is_in(clients_addresses))
432+
.from_raw_sql(Statement::from_sql_and_values(
433+
DbBackend::Postgres,
434+
"SELECT DISTINCT ON (id) * FROM applications
435+
WHERE id = ANY($1)",
436+
[clients_addresses.into()],
437+
))
434438
.all(&conn)
435439
.await?;
436440

fplus-lib/src/core/mod.rs

+17-1
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@ use serde::{Deserialize, Serialize};
1818
use serde_json::from_str;
1919

2020
use crate::external_services::dmob::get_client_allocation;
21+
use crate::external_services::similarity_detection::detect_similar_applications;
2122
use crate::{
2223
base64,
2324
config::get_env_var_or_default,
@@ -702,7 +703,7 @@ impl LDNApplication {
702703
parsed_ldn.version,
703704
parsed_ldn.id.clone(),
704705
parsed_ldn.client.clone(),
705-
parsed_ldn.project,
706+
parsed_ldn.project.clone(),
706707
parsed_ldn.datacap,
707708
)
708709
.await;
@@ -872,6 +873,21 @@ impl LDNApplication {
872873
application_id, e
873874
))
874875
})?;
876+
let comparable_data = ApplicationComparableData {
877+
project_desc: parsed_ldn.project.history.clone(),
878+
stored_data_desc: parsed_ldn.project.stored_data_desc.clone(),
879+
data_owner_name: parsed_ldn.client.name.clone(),
880+
data_set_sample: parsed_ldn.project.data_sample_link.clone(),
881+
};
882+
883+
detect_similar_applications(
884+
&parsed_ldn.id,
885+
&comparable_data,
886+
&info.owner,
887+
&info.repo,
888+
&(issue_number as u64),
889+
)
890+
.await?;
875891
create_comparable_application(
876892
&application_id,
877893
&ApplicationComparableData {

fplus-lib/src/external_services/similarity_detection.rs

+102-5
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,8 @@
1+
use super::github::github_async_new;
12
use crate::{config::get_env_var_or_default, error::LDNError};
23
use fplus_database::{
34
database::{
4-
applications::get_applications_by_clients_addresses,
5+
applications::get_distinct_applications_by_clients_addresses,
56
comparable_applications::get_comparable_applications,
67
},
78
models::comparable_applications::ApplicationComparableData,
@@ -16,9 +17,19 @@ pub struct Document {
1617
pub text: String,
1718
}
1819

20+
type Owner = String;
21+
type Repo = String;
22+
type ClientAddress = String;
23+
type Similarities = Vec<String>;
24+
type RepoSimilarities = HashMap<(Owner, Repo), Vec<(ClientAddress, Similarities)>>;
25+
type SortedRepoSimilarities = Vec<((Owner, Repo), Vec<(ClientAddress, Similarities)>)>;
26+
1927
pub async fn detect_similar_applications(
2028
client_address: &str,
2129
comparable_data: &ApplicationComparableData,
30+
owner: &str,
31+
repo: &str,
32+
issue_number: &u64,
2233
) -> Result<(), LDNError> {
2334
let comparable_applications = get_comparable_applications().await.map_err(|e| {
2435
LDNError::New(format!(
@@ -80,16 +91,81 @@ pub async fn detect_similar_applications(
8091
let similar_data_set_sample = get_similar_texts_levenshtein(&data_set_samples)?;
8192

8293
let unique_addresses: HashSet<String> = similar_project_desciptions
94+
.clone()
8395
.into_iter()
84-
.chain(similar_stored_data_desciptions.into_iter())
85-
.chain(similar_project_and_stored_data_desciptions.into_iter())
86-
.chain(similar_data_set_sample.into_iter())
96+
.chain(similar_stored_data_desciptions.clone().into_iter())
97+
.chain(
98+
similar_project_and_stored_data_desciptions
99+
.clone()
100+
.into_iter(),
101+
)
102+
.chain(similar_data_set_sample.clone().into_iter())
103+
.chain(existing_data_owner_name.clone().into_iter())
87104
.collect();
105+
88106
let unique_addresses: Vec<String> = unique_addresses.into_iter().collect();
107+
let gh = github_async_new(owner.to_string(), repo.to_string()).await?;
108+
109+
if unique_addresses.is_empty() {
110+
let comment = "## Similarity Report\n\nNo similar applications found for the issue";
111+
gh.add_comment_to_issue(*issue_number, comment)
112+
.await
113+
.map_err(|e| LDNError::New(format!("Failed to get add comment to the issue: {}", e)))?;
114+
return Ok(());
115+
}
89116

90-
let _applications = get_applications_by_clients_addresses(unique_addresses)
117+
let applications = get_distinct_applications_by_clients_addresses(unique_addresses)
91118
.await
92119
.map_err(|e| LDNError::New(format!("Failed to get applications from database: {}", e)))?;
120+
121+
let mut repo_similarities: RepoSimilarities = HashMap::new();
122+
123+
for application in applications {
124+
let repo_key = (application.owner.clone(), application.repo.clone());
125+
let issue_link = format!(
126+
"https://github.com/{}/{}/issues/{}",
127+
application.owner, application.repo, application.issue_number
128+
);
129+
130+
let entry = repo_similarities.entry(repo_key).or_default();
131+
let mut similarities = Vec::new();
132+
133+
if similar_project_and_stored_data_desciptions.contains(&application.id) {
134+
similarities.push("Similar project and stored data description".to_string());
135+
} else if similar_project_desciptions.contains(&application.id) {
136+
similarities.push("Similar project description".to_string());
137+
} else if similar_stored_data_desciptions.contains(&application.id) {
138+
similarities.push("Similar stored data description".to_string());
139+
}
140+
if similar_data_set_sample.contains(&application.id) {
141+
similarities.push("Similar data set sample".to_string());
142+
}
143+
if existing_data_owner_name.contains(&application.id) {
144+
similarities.push("The same data owner name".to_string());
145+
}
146+
147+
if !similarities.is_empty() {
148+
entry.push((issue_link, similarities));
149+
}
150+
}
151+
152+
let mut sorted_results: SortedRepoSimilarities = repo_similarities.into_iter().collect();
153+
sorted_results.sort_by(|owner_repo, similarities| {
154+
similarities
155+
.1
156+
.iter()
157+
.map(|(_, sim)| sim.len())
158+
.sum::<usize>()
159+
.cmp(&owner_repo.1.iter().map(|(_, sim)| sim.len()).sum::<usize>())
160+
});
161+
162+
let comment = format!(
163+
"## Similarity Report\n\nThis application is similar to the following applications:\n\n{}",
164+
format_comment(&sorted_results)
165+
);
166+
gh.add_comment_to_issue(*issue_number, &comment)
167+
.await
168+
.map_err(|e| LDNError::New(format!("Failed to get add comment to the issue: {}", e)))?;
93169
Ok(())
94170
}
95171

@@ -172,3 +248,24 @@ fn cosine_similarity(v1: &Array1<f64>, v2: &Array1<f64>) -> f64 {
172248
dot_product / (norm_v1 * norm_v2)
173249
}
174250
}
251+
252+
fn format_comment(repos: &SortedRepoSimilarities) -> String {
253+
repos
254+
.iter()
255+
.map(|((owner, repo), issues)| {
256+
format!(
257+
"### {}/{}\n\n{}",
258+
owner,
259+
repo,
260+
issues
261+
.iter()
262+
.map(|(issue, similarities)| {
263+
format!("* {}:\n * {}", issue, similarities.join("\n * "))
264+
})
265+
.collect::<Vec<String>>()
266+
.join("\n\n")
267+
)
268+
})
269+
.collect::<Vec<String>>()
270+
.join("\n\n")
271+
}

0 commit comments

Comments
 (0)