88use super :: node:: { ArcedNodeItem , NodeLabel } ;
99use super :: { EdgeLabel , Engine , NodeFlags } ;
1010use crate :: engine:: node:: { NodeId , NodeItem , Payload } ;
11+ use crate :: entity:: classifier:: ClassifierSettings ;
1112use crate :: entity:: content:: TableRow ;
1213use crate :: entity:: crawler:: CrawlerSettings ;
13- use crate :: entity:: classifier:: ClassifierSettings ;
14- use crate :: entity:: web:: domain:: { Domain , FindDomainOf } ;
1514use crate :: entity:: project_settings:: ProjectSettings ;
1615use crate :: entity:: search:: saved_search:: SavedSearch ;
16+ use crate :: entity:: web:: domain:: { Domain , FindDomainOf } ;
1717use crate :: entity:: web:: link:: Link ;
1818use crate :: entity:: web:: web_metadata:: WebMetadata ;
1919use crate :: error:: PiError ;
@@ -116,14 +116,19 @@ pub struct Explore {
116116
117117#[ derive( Clone , Serialize , TS , ToSchema ) ]
118118#[ ts( export) ]
119- pub struct APIMatch {
120- pub node_id : NodeId ,
121- pub full_url : String ,
122- pub metadata : WebMetadata , // TODO: support other types of matches
119+ pub struct WebPageMatch {
120+ pub metadata : WebMetadata ,
123121 pub insight : String ,
124122 pub reason : String ,
125123}
126124
125+ #[ derive( Clone , Serialize , TS , ToSchema ) ]
126+ #[ ts( export) ]
127+ pub struct Matches {
128+ pub urls : Vec < String > ,
129+ pub web_pages : Vec < WebPageMatch > ,
130+ }
131+
127132/// Engine's response for an API request.
128133///
129134/// API requests for a project are sent to its engine.
@@ -145,8 +150,8 @@ pub enum EngineResponsePayload {
145150 // Change this and handle chain-effects, if any
146151 /// Response for label retrieval. Returns a list of labels.
147152 Labels ( Vec < String > ) ,
148- /// Response for matches retrieval. Returns a list of matches .
149- Matches ( Vec < APIMatch > ) ,
153+ /// Response for matches retrieval. Returns a list of urls and web pages .
154+ Matches ( Matches ) ,
150155 Explore ( Explore ) ,
151156 /// Error response.
152157 Error ( String ) ,
@@ -871,15 +876,18 @@ pub async fn get_matches(
871876 let request_id = api_state. req_id . fetch_add ( 1 ) ;
872877 let project_id = project_id. into_inner ( ) ;
873878
874- debug ! ( "API request {} for project {} to get matches" , request_id, project_id) ;
879+ debug ! (
880+ "API request {} for project {} to get matches" ,
881+ request_id, project_id
882+ ) ;
875883
876884 // Subscribe to receive engine response
877885 let mut rx = api_state. api_channel_tx . subscribe ( ) ;
878886
879887 api_state. main_tx . send ( PiEvent :: APIRequest (
880888 project_id. clone ( ) ,
881889 EngineRequest {
882- request_id,
890+ request_id : request_id . clone ( ) ,
883891 project_id : project_id. clone ( ) ,
884892 payload : EngineRequestPayload :: GetMatches ,
885893 } ,
@@ -1212,119 +1220,117 @@ pub fn handle_engine_api_request(
12121220 None => EngineResponsePayload :: Error ( format ! ( "Node {} not found" , node_id) ) ,
12131221 } ,
12141222 EngineRequestPayload :: GetMatches => {
1215- let mut results = vec ! [ ] ;
1216-
1217- for node in engine. get_all_nodes ( ) {
1218- if !node. labels . contains ( & NodeLabel :: WebPage ) {
1223+ let mut urls = vec ! [ ] ;
1224+ let mut web_pages = vec ! [ ] ;
1225+ let mut web_page_node_ids = engine. get_node_ids_with_label ( & NodeLabel :: WebPage ) ;
1226+ web_page_node_ids. sort ( ) ;
1227+ for web_page_node_id in web_page_node_ids {
1228+ let Some ( web_page_node) = engine. get_node_by_id ( & web_page_node_id) else {
12191229 continue ;
1220- }
1221-
1222- let connected = match engine. get_connected_nodes ( & node. id ) ? {
1223- Some ( edges) => edges,
1224- None => continue ,
12251230 } ;
1226-
1227- let full_url = connected. edges . iter ( ) . find_map ( |( id, label) | {
1228- if * label == EdgeLabel :: ParentOf {
1229- engine. get_node_by_id ( id) . and_then ( |n| {
1230- if n. labels . contains ( & NodeLabel :: Link ) {
1231- match & n. payload {
1231+ let Some ( full_url) =
1232+ engine
1233+ . get_connected_nodes ( & web_page_node. id ) ?
1234+ . and_then ( |edges| {
1235+ edges. edges . iter ( ) . find_map ( |( id, label) | {
1236+ if * label != EdgeLabel :: ParentOf {
1237+ return None ;
1238+ }
1239+ let link_node = engine. get_node_by_id ( id) ?;
1240+ if !link_node. labels . contains ( & NodeLabel :: Link ) {
1241+ return None ;
1242+ }
1243+ match & link_node. payload {
12321244 Payload :: Link ( link) => {
12331245 let domain_node = Domain :: find_existing (
12341246 engine. clone ( ) ,
12351247 FindDomainOf :: Node ( * id) ,
1236- ) . ok ( ) . flatten ( ) ?;
1237- let domain_name = Domain :: get_domain_name ( & domain_node) . ok ( ) ?;
1238- Some ( format ! ( "https://{}{}" , domain_name, link. get_full_link( ) ) )
1239- } ,
1248+ )
1249+ . ok ( )
1250+ . flatten ( ) ?;
1251+ let domain_name =
1252+ Domain :: get_domain_name ( & domain_node) . ok ( ) ?;
1253+ Some ( format ! (
1254+ "https://{}{}" ,
1255+ domain_name,
1256+ link. get_full_link( )
1257+ ) )
1258+ }
12401259 _ => None ,
12411260 }
1242- } else {
1243- None
1244- }
1261+ } )
12451262 } )
1246- } else {
1247- None
1248- }
1249- } ) ;
1250- if full_url. is_none ( ) {
1263+ else {
12511264 continue ;
1252- }
1253- let full_url = full_url. unwrap ( ) ;
1254-
1255- let metadata = connected. edges . iter ( ) . find_map ( |( id, label) | {
1256- if * label == EdgeLabel :: ParentOf {
1257- engine. get_node_by_id ( id) . and_then ( |n| {
1258- if n. labels . contains ( & NodeLabel :: WebMetadata ) {
1259- match & n. payload {
1260- Payload :: WebMetadata ( meta) => Some ( meta. clone ( ) ) ,
1261- _ => None ,
1265+ } ;
1266+ let Some ( insight) =
1267+ engine
1268+ . get_connected_nodes ( & web_page_node. id ) ?
1269+ . and_then ( |edges| {
1270+ edges. edges . iter ( ) . find_map ( |( id, label) | {
1271+ if * label != EdgeLabel :: Matches {
1272+ return None ;
1273+ }
1274+ let node = engine. get_node_by_id ( id) ?;
1275+ if node. labels . contains ( & NodeLabel :: Insight ) {
1276+ if let Payload :: Text ( text) = & node. payload {
1277+ return Some ( text. clone ( ) ) ;
1278+ }
12621279 }
1263- } else {
12641280 None
1265- }
1281+ } )
12661282 } )
1267- } else {
1268- None
1269- }
1270- } ) ;
1271- if metadata. is_none ( ) {
1283+ else {
12721284 continue ;
1273- }
1274- let metadata = metadata. unwrap ( ) ;
1275-
1276- let insight = connected. edges . iter ( ) . find_map ( |( id, label) | {
1277- if * label == EdgeLabel :: Matches {
1278- engine. get_node_by_id ( id) . and_then ( |n| {
1279- if n. labels . contains ( & NodeLabel :: Insight ) {
1280- match & n. payload {
1281- Payload :: Text ( text) => Some ( text. clone ( ) ) ,
1282- _ => None ,
1285+ } ;
1286+ let Some ( reason) =
1287+ engine
1288+ . get_connected_nodes ( & web_page_node. id ) ?
1289+ . and_then ( |edges| {
1290+ edges. edges . iter ( ) . find_map ( |( id, label) | {
1291+ if * label != EdgeLabel :: Matches {
1292+ return None ;
1293+ }
1294+ let node = engine. get_node_by_id ( id) ?;
1295+ if node. labels . contains ( & NodeLabel :: Reason ) {
1296+ if let Payload :: Text ( text) = & node. payload {
1297+ return Some ( text. clone ( ) ) ;
1298+ }
12831299 }
1284- } else {
12851300 None
1286- }
1301+ } )
12871302 } )
1288- } else {
1289- None
1290- }
1291- } ) ;
1292- if insight. is_none ( ) {
1303+ else {
12931304 continue ;
1294- }
1295- let insight = insight. unwrap ( ) ;
1296-
1297- let reason = connected. edges . iter ( ) . find_map ( |( id, label) | {
1298- if * label == EdgeLabel :: Matches {
1299- engine. get_node_by_id ( id) . and_then ( |n| {
1300- if n. labels . contains ( & NodeLabel :: Reason ) {
1301- match & n. payload {
1302- Payload :: Text ( text) => Some ( text. clone ( ) ) ,
1303- _ => None ,
1305+ } ;
1306+ let Some ( metadata) =
1307+ engine
1308+ . get_connected_nodes ( & web_page_node. id ) ?
1309+ . and_then ( |edges| {
1310+ edges. edges . iter ( ) . find_map ( |( id, label) | {
1311+ if * label != EdgeLabel :: ParentOf {
1312+ return None ;
1313+ }
1314+ let node = engine. get_node_by_id ( id) ?;
1315+ if node. labels . contains ( & NodeLabel :: WebMetadata ) {
1316+ if let Payload :: WebMetadata ( meta) = & node. payload {
1317+ return Some ( meta. clone ( ) ) ;
1318+ }
13041319 }
1305- } else {
13061320 None
1307- }
1321+ } )
13081322 } )
1309- } else {
1310- None
1311- }
1312- } ) ;
1313- if reason. is_none ( ) {
1323+ else {
13141324 continue ;
1315- }
1316- let reason = reason. unwrap ( ) ;
1317-
1318- results. push ( APIMatch {
1319- node_id : node. id ,
1320- full_url,
1325+ } ;
1326+ urls. push ( full_url. clone ( ) ) ;
1327+ web_pages. push ( WebPageMatch {
13211328 metadata,
13221329 insight,
13231330 reason,
13241331 } ) ;
13251332 }
1326-
1327- EngineResponsePayload :: Matches ( results)
1333+ EngineResponsePayload :: Matches ( Matches { urls, web_pages } )
13281334 }
13291335 } ;
13301336
0 commit comments