Skip to content
This repository was archived by the owner on Jul 15, 2025. It is now read-only.

Commit f671b12

Browse files
Merge pull request #199 from pixlie/update-get-matches-api
get matches for urls and web pages
2 parents 518aed9 + 28c63e2 commit f671b12

File tree

1 file changed

+102
-96
lines changed

1 file changed

+102
-96
lines changed

pixlie_ai/src/engine/api.rs

Lines changed: 102 additions & 96 deletions
Original file line numberDiff line numberDiff line change
@@ -8,12 +8,12 @@
88
use super::node::{ArcedNodeItem, NodeLabel};
99
use super::{EdgeLabel, Engine, NodeFlags};
1010
use crate::engine::node::{NodeId, NodeItem, Payload};
11+
use crate::entity::classifier::ClassifierSettings;
1112
use crate::entity::content::TableRow;
1213
use crate::entity::crawler::CrawlerSettings;
13-
use crate::entity::classifier::ClassifierSettings;
14-
use crate::entity::web::domain::{Domain, FindDomainOf};
1514
use crate::entity::project_settings::ProjectSettings;
1615
use crate::entity::search::saved_search::SavedSearch;
16+
use crate::entity::web::domain::{Domain, FindDomainOf};
1717
use crate::entity::web::link::Link;
1818
use crate::entity::web::web_metadata::WebMetadata;
1919
use crate::error::PiError;
@@ -116,14 +116,19 @@ pub struct Explore {
116116

117117
#[derive(Clone, Serialize, TS, ToSchema)]
118118
#[ts(export)]
119-
pub struct APIMatch {
120-
pub node_id: NodeId,
121-
pub full_url: String,
122-
pub metadata: WebMetadata, // TODO: support other types of matches
119+
pub struct WebPageMatch {
120+
pub metadata: WebMetadata,
123121
pub insight: String,
124122
pub reason: String,
125123
}
126124

125+
#[derive(Clone, Serialize, TS, ToSchema)]
126+
#[ts(export)]
127+
pub struct Matches {
128+
pub urls: Vec<String>,
129+
pub web_pages: Vec<WebPageMatch>,
130+
}
131+
127132
/// Engine's response for an API request.
128133
///
129134
/// API requests for a project are sent to its engine.
@@ -145,8 +150,8 @@ pub enum EngineResponsePayload {
145150
// Change this and handle chain-effects, if any
146151
/// Response for label retrieval. Returns a list of labels.
147152
Labels(Vec<String>),
148-
/// Response for matches retrieval. Returns a list of matches.
149-
Matches(Vec<APIMatch>),
153+
/// Response for matches retrieval. Returns a list of urls and web pages.
154+
Matches(Matches),
150155
Explore(Explore),
151156
/// Error response.
152157
Error(String),
@@ -871,15 +876,18 @@ pub async fn get_matches(
871876
let request_id = api_state.req_id.fetch_add(1);
872877
let project_id = project_id.into_inner();
873878

874-
debug!("API request {} for project {} to get matches", request_id, project_id);
879+
debug!(
880+
"API request {} for project {} to get matches",
881+
request_id, project_id
882+
);
875883

876884
// Subscribe to receive engine response
877885
let mut rx = api_state.api_channel_tx.subscribe();
878886

879887
api_state.main_tx.send(PiEvent::APIRequest(
880888
project_id.clone(),
881889
EngineRequest {
882-
request_id,
890+
request_id: request_id.clone(),
883891
project_id: project_id.clone(),
884892
payload: EngineRequestPayload::GetMatches,
885893
},
@@ -1212,119 +1220,117 @@ pub fn handle_engine_api_request(
12121220
None => EngineResponsePayload::Error(format!("Node {} not found", node_id)),
12131221
},
12141222
EngineRequestPayload::GetMatches => {
1215-
let mut results = vec![];
1216-
1217-
for node in engine.get_all_nodes() {
1218-
if !node.labels.contains(&NodeLabel::WebPage) {
1223+
let mut urls = vec![];
1224+
let mut web_pages = vec![];
1225+
let mut web_page_node_ids = engine.get_node_ids_with_label(&NodeLabel::WebPage);
1226+
web_page_node_ids.sort();
1227+
for web_page_node_id in web_page_node_ids {
1228+
let Some(web_page_node) = engine.get_node_by_id(&web_page_node_id) else {
12191229
continue;
1220-
}
1221-
1222-
let connected = match engine.get_connected_nodes(&node.id)? {
1223-
Some(edges) => edges,
1224-
None => continue,
12251230
};
1226-
1227-
let full_url = connected.edges.iter().find_map(|(id, label)| {
1228-
if *label == EdgeLabel::ParentOf {
1229-
engine.get_node_by_id(id).and_then(|n| {
1230-
if n.labels.contains(&NodeLabel::Link) {
1231-
match &n.payload {
1231+
let Some(full_url) =
1232+
engine
1233+
.get_connected_nodes(&web_page_node.id)?
1234+
.and_then(|edges| {
1235+
edges.edges.iter().find_map(|(id, label)| {
1236+
if *label != EdgeLabel::ParentOf {
1237+
return None;
1238+
}
1239+
let link_node = engine.get_node_by_id(id)?;
1240+
if !link_node.labels.contains(&NodeLabel::Link) {
1241+
return None;
1242+
}
1243+
match &link_node.payload {
12321244
Payload::Link(link) => {
12331245
let domain_node = Domain::find_existing(
12341246
engine.clone(),
12351247
FindDomainOf::Node(*id),
1236-
).ok().flatten()?;
1237-
let domain_name = Domain::get_domain_name(&domain_node).ok()?;
1238-
Some(format!("https://{}{}", domain_name, link.get_full_link()))
1239-
},
1248+
)
1249+
.ok()
1250+
.flatten()?;
1251+
let domain_name =
1252+
Domain::get_domain_name(&domain_node).ok()?;
1253+
Some(format!(
1254+
"https://{}{}",
1255+
domain_name,
1256+
link.get_full_link()
1257+
))
1258+
}
12401259
_ => None,
12411260
}
1242-
} else {
1243-
None
1244-
}
1261+
})
12451262
})
1246-
} else {
1247-
None
1248-
}
1249-
});
1250-
if full_url.is_none() {
1263+
else {
12511264
continue;
1252-
}
1253-
let full_url = full_url.unwrap();
1254-
1255-
let metadata = connected.edges.iter().find_map(|(id, label)| {
1256-
if *label == EdgeLabel::ParentOf {
1257-
engine.get_node_by_id(id).and_then(|n| {
1258-
if n.labels.contains(&NodeLabel::WebMetadata) {
1259-
match &n.payload {
1260-
Payload::WebMetadata(meta) => Some(meta.clone()),
1261-
_ => None,
1265+
};
1266+
let Some(insight) =
1267+
engine
1268+
.get_connected_nodes(&web_page_node.id)?
1269+
.and_then(|edges| {
1270+
edges.edges.iter().find_map(|(id, label)| {
1271+
if *label != EdgeLabel::Matches {
1272+
return None;
1273+
}
1274+
let node = engine.get_node_by_id(id)?;
1275+
if node.labels.contains(&NodeLabel::Insight) {
1276+
if let Payload::Text(text) = &node.payload {
1277+
return Some(text.clone());
1278+
}
12621279
}
1263-
} else {
12641280
None
1265-
}
1281+
})
12661282
})
1267-
} else {
1268-
None
1269-
}
1270-
});
1271-
if metadata.is_none() {
1283+
else {
12721284
continue;
1273-
}
1274-
let metadata = metadata.unwrap();
1275-
1276-
let insight = connected.edges.iter().find_map(|(id, label)| {
1277-
if *label == EdgeLabel::Matches {
1278-
engine.get_node_by_id(id).and_then(|n| {
1279-
if n.labels.contains(&NodeLabel::Insight) {
1280-
match &n.payload {
1281-
Payload::Text(text) => Some(text.clone()),
1282-
_ => None,
1285+
};
1286+
let Some(reason) =
1287+
engine
1288+
.get_connected_nodes(&web_page_node.id)?
1289+
.and_then(|edges| {
1290+
edges.edges.iter().find_map(|(id, label)| {
1291+
if *label != EdgeLabel::Matches {
1292+
return None;
1293+
}
1294+
let node = engine.get_node_by_id(id)?;
1295+
if node.labels.contains(&NodeLabel::Reason) {
1296+
if let Payload::Text(text) = &node.payload {
1297+
return Some(text.clone());
1298+
}
12831299
}
1284-
} else {
12851300
None
1286-
}
1301+
})
12871302
})
1288-
} else {
1289-
None
1290-
}
1291-
});
1292-
if insight.is_none() {
1303+
else {
12931304
continue;
1294-
}
1295-
let insight = insight.unwrap();
1296-
1297-
let reason = connected.edges.iter().find_map(|(id, label)| {
1298-
if *label == EdgeLabel::Matches {
1299-
engine.get_node_by_id(id).and_then(|n| {
1300-
if n.labels.contains(&NodeLabel::Reason) {
1301-
match &n.payload {
1302-
Payload::Text(text) => Some(text.clone()),
1303-
_ => None,
1305+
};
1306+
let Some(metadata) =
1307+
engine
1308+
.get_connected_nodes(&web_page_node.id)?
1309+
.and_then(|edges| {
1310+
edges.edges.iter().find_map(|(id, label)| {
1311+
if *label != EdgeLabel::ParentOf {
1312+
return None;
1313+
}
1314+
let node = engine.get_node_by_id(id)?;
1315+
if node.labels.contains(&NodeLabel::WebMetadata) {
1316+
if let Payload::WebMetadata(meta) = &node.payload {
1317+
return Some(meta.clone());
1318+
}
13041319
}
1305-
} else {
13061320
None
1307-
}
1321+
})
13081322
})
1309-
} else {
1310-
None
1311-
}
1312-
});
1313-
if reason.is_none() {
1323+
else {
13141324
continue;
1315-
}
1316-
let reason = reason.unwrap();
1317-
1318-
results.push(APIMatch {
1319-
node_id: node.id,
1320-
full_url,
1325+
};
1326+
urls.push(full_url.clone());
1327+
web_pages.push(WebPageMatch {
13211328
metadata,
13221329
insight,
13231330
reason,
13241331
});
13251332
}
1326-
1327-
EngineResponsePayload::Matches(results)
1333+
EngineResponsePayload::Matches(Matches { urls, web_pages })
13281334
}
13291335
};
13301336

0 commit comments

Comments
 (0)